Simone authored on 10/12/2017 16:29:05
Showing60 changed files

... ...
@@ -95,6 +95,7 @@ Collate:
95 95
     'gmql_difference.R'
96 96
     'gmql_extend.R'
97 97
     'gmql_group.R'
98
+    'gmql_init.R'
98 99
     'gmql_join.R'
99 100
     'gmql_map.R'
100 101
     'gmql_materialize.R'
... ...
@@ -57,6 +57,7 @@ export(show_jobs_list)
57 57
 export(show_queries_list)
58 58
 export(show_samples_list)
59 59
 export(show_schema)
60
+export(stop_gmql)
60 61
 export(stop_job)
61 62
 export(trace_job)
62 63
 export(upload_dataset)
... ...
@@ -80,6 +81,7 @@ importClassesFrom(GenomicRanges,GRangesList)
80 81
 importClassesFrom(S4Vectors,DataTable)
81 82
 importFrom(BiocGenerics,setdiff)
82 83
 importFrom(BiocGenerics,union)
84
+importFrom(GenomicRanges,GRangesList)
83 85
 importFrom(GenomicRanges,makeGRangesFromDataFrame)
84 86
 importFrom(S4Vectors,aggregate)
85 87
 importFrom(S4Vectors,merge)
... ...
@@ -6,7 +6,7 @@
6 6
 #' @slot value value associated to GMQL dataset
7 7
 #' @name GMQLDataset-class
8 8
 #' @rdname GMQLDataset-class
9
-#' 
9
+#' @noRd
10 10
 #' @return instance of GMQL dataset
11 11
 #' 
12 12
 setClass("GMQLDataset",
... ...
@@ -22,7 +22,7 @@ setClass("GMQLDataset",
22 22
 #' 
23 23
 #' @param value value associated to GMQL dataset
24 24
 #' @rdname GMQLDataset-class
25
-#' 
25
+#' @noRd
26 26
 GMQLDataset <- function(value) {
27 27
     dataset <- new("GMQLDataset",value = value)
28 28
     return(dataset)
... ...
@@ -10,7 +10,7 @@ setGeneric("cover", function(data, ...) standardGeneric("cover"))
10 10
 
11 11
 #' Method map
12 12
 #' 
13
-#' Wrapper to GMQL map function
13
+#' Wrapper to GMQL MAP operator
14 14
 #' 
15 15
 #' @name map
16 16
 #' @rdname map
... ...
@@ -21,7 +21,7 @@ setGeneric("map", function(x, y, ...) standardGeneric("map"))
21 21
 
22 22
 #' Method take
23 23
 #' 
24
-#' Wrapper to take function
24
+#' Wrapper to TAKE operation
25 25
 #' 
26 26
 #' @name take
27 27
 #' @rdname take
... ...
@@ -32,7 +32,7 @@ setGeneric("take", function(data, ...) standardGeneric("take"))
32 32
 
33 33
 #' Method extend
34 34
 #' 
35
-#' Wrapper to GMQL extend function
35
+#' Wrapper to GMQL EXTEND operator 
36 36
 #' 
37 37
 #' @name extend
38 38
 #' @rdname extend
... ...
@@ -1,7 +1,7 @@
1
-#' Create GRangesList from GMQL Dataset
1
+#' Create GRangesList from GMQL dataset
2 2
 #'
3
-#' It create a GrangesList from GMQL samples in dataset
4
-#' It reads sample files in GTF or GDM/tabulated format
3
+#' It creates a GRangesList from GMQL samples in dataset. 
4
+#' It reads sample files in GTF or GDM/tab-delimited format.
5 5
 #'
6 6
 #' @importFrom rtracklayer import
7 7
 #' @importClassesFrom GenomicRanges GRangesList
... ...
@@ -9,16 +9,19 @@
9 9
 #' @importFrom utils read.delim
10 10
 #' @import xml2
11 11
 #'
12
-#' @param dataset_path string GMQL dataset folder path
13
-#' @param is_gtf logical value indicating if samples inside are in GTF format
14
-#' if TRUE and dataset does not contain gtf sample an error occures 
12
+#' @param dataset_path string with GMQL dataset folder path
13
+#' @param is_gtf logical value indicating if dataset samples are in GTF format;
14
+#' if TRUE and dataset does not contain GTF samples an error occurs 
15 15
 #' 
16
-#' @return GrangesList containing all GMQL samples in dataset
16
+#' @return GRangesList containing all GMQL samples in dataset
17 17
 #'
18 18
 #' @seealso \code{\link{export_gmql}}
19 19
 #'
20 20
 #' @examples
21
-#'
21
+#' 
22
+#' ## This statement defines the path to the subdirectory "example" of the 
23
+#' ## package "RGMQL" and import as GRangesList the GMQL dataset
24
+#' 
22 25
 #' test_path <- system.file("example", "DATASET", package = "RGMQL")
23 26
 #' grl = import_gmql(test_path, TRUE)
24 27
 #'
... ...
@@ -1,6 +1,6 @@
1
-#' Create GMQL dataset from GrangesList
1
+#' Create GMQL dataset from GRangesList
2 2
 #'
3
-#' It create GMQL dataset from GRangesList.
3
+#' It creates GMQL dataset from GRangesList.
4 4
 #' All samples are in GDM (tab-separated values) or GTF file format
5 5
 #'
6 6
 #' @import xml2
... ...
@@ -11,8 +11,9 @@
11 11
 #' @importFrom S4Vectors metadata
12 12
 #' @import GenomicRanges
13 13
 #'
14
-#' @param samples GrangesList
15
-#' @param dir_out folder path where create a folder and write the sample files
14
+#' @param samples GRangesList
15
+#' @param dir_out folder path where to create a folder and write the sample 
16
+#' files
16 17
 #' @param is_gtf logical value indicating if samples have to be exported
17 18
 #' with GTF or GDM format
18 19
 #'
... ...
@@ -21,28 +22,43 @@
21 22
 #' @seealso \code{\link{import_gmql}} 
22 23
 #'
23 24
 #' @details
24
-#' The GMQL dataset is made up by two differet file type
25
+#' The GMQL dataset is made up by two different file types:
25 26
 #'
26 27
 #' \itemize{
27
-#' \item{metadata files: contain metadata associated to corrisponding sample}
28
-#' \item{region files: contain many genomic regions }
29
-#' \item{region schema file: XML file contains region attributes 
28
+#' \item{metadata files: they contain metadata associated to corrisponding 
29
+#' sample.}
30
+#' \item{region files: they contain many genomic regions data.}
31
+#' \item{region schema file: XML file that contains region attribute name 
30 32
 #' (e.g. chr, start, end, pvalue)}
31 33
 #' }
32
-#' region sample files and metadata files are associated through file name:
34
+#' Region sample files and metadata files are associated through file name:
33 35
 #' for example S_0001.gdm for region file and S_0001.gdm.meta for 
34 36
 #' its metadata file
35 37
 #'
36 38
 #'
37 39
 #' @examples
38 40
 #' 
41
+#' ## load and attach add-on GenomicRanges package
39 42
 #' library(GenomicRanges)
43
+#' 
44
+#' ## These statemens create two GRanges with the region attribute: seqnames, 
45
+#' ## ranges (region coordinates) and strand, plus two column element:  
46
+#' ## score and GC
47
+#' 
40 48
 #' gr1 <- GRanges(seqnames = "chr2", ranges = IRanges(3, 6), strand = "+", 
41 49
 #' score = 5L, GC = 0.45)
42 50
 #' gr2 <- GRanges(seqnames = c("chr1", "chr1"),
43 51
 #' ranges = IRanges(c(7,13), width = 3), strand = c("+", "-"), 
44 52
 #' score = 3:4, GC = c(0.3, 0.5))
53
+#' 
54
+#' ## This statement creates a GRangesList using the previous GRanges 
55
+#' 
45 56
 #' grl = GRangesList(gr1, gr2)
57
+#' 
58
+#' ## This statement defines the path to the subdirectory "example" of the 
59
+#' ## package "RGMQL" and export the GRangesList as GMQL dataset using the 
60
+#' ## last name of 'dir_out' path as dataset name
61
+#' 
46 62
 #' test_out_path <- system.file("example", package = "RGMQL")
47 63
 #' export_gmql(grl, test_out_path,TRUE)
48 64
 #' 
... ...
@@ -72,22 +72,22 @@ take_value.META_AGGREGATES <- function(obj){
72 72
 #' function sum, performing all the type conversions needed  }
73 73
 #' \item{COUNT: It prepares input parameter to be passed to the library 
74 74
 #' function count, performing all the type conversions needed }
75
-#' \item{MIN:It prepares input parameter to be passed to the library 
75
+#' \item{MIN: It prepares input parameter to be passed to the library 
76 76
 #' function minimum, performing all the type conversions needed  }
77 77
 #' \item{MAX: It prepares input parameter to be passed to the library 
78 78
 #' function maximum, performing all the type conversions needed }
79
-#' \item{BAG: It prepares input parameter to be passed to the library 
80
-#' function bag, this function creates comma-separated strings of 
81
-#' attribute values, performing all the types conversions needed}
82
-#' \item{BAGD: It prepares input parameter to be passed to the library 
83
-#' function bag, this function creates comma-separated strings of distinct 
84
-#' attribute values, performing all the types conversions needed}
85 79
 #' \item{AVG: It prepares input parameter to be passed to the library 
86 80
 #' function mean, performing all the type conversions needed }
87 81
 #' \item{MEDIAN: It prepares input parameter to be passed to the library 
88 82
 #' function median, performing all the type conversions needed }
89 83
 #' \item{STD: It prepares input parameter to be passed to the library 
90 84
 #' function standard deviation, performing all the type conversions needed}
85
+#' \item{BAG: It prepares input parameter to be passed to the library 
86
+#' function bag; this function creates comma-separated strings of 
87
+#' attribute values, performing all the type conversions needed}
88
+#' \item{BAGD: It prepares input parameter to be passed to the library 
89
+#' function bag; this function creates comma-separated strings of distinct 
90
+#' attribute values, performing all the type conversions needed}
91 91
 #' \item{Q1: It prepares input parameter to be passed to the library 
92 92
 #' function fist quartile, performing all the type conversions needed}
93 93
 #' \item{Q2: It prepares input parameter to be passed to the library 
... ...
@@ -98,61 +98,66 @@ take_value.META_AGGREGATES <- function(obj){
98 98
 #' 
99 99
 #' @param value string identifying name of metadata or region attribute
100 100
 #'
101
-#' @return aggregate object
101
+#' @return Aggregate object
102 102
 #' 
103 103
 #' @examples
104 104
 #' 
105
-#' ## local with CustomParser
105
+#' ## This statement initializes and runs the GMQL server for local execution 
106
+#' ## and creation of results on disk. Then, with system.file() it defines 
107
+#' ## the path to the folder "DATASET" in the subdirectory "example"
108
+#' ## of the package "RGMQL" and opens such folder as a GMQL dataset 
109
+#' ## named "exp" using customParser
110
+#' 
106 111
 #' init_gmql()
107 112
 #' test_path <- system.file("example", "DATASET", package = "RGMQL")
108 113
 #' exp = read_dataset(test_path)
109 114
 #' 
110
-#' ## This statement copies all samples of exp into res dataset, and 
111
-#' ## then calculates new metadata attributes for each of them: 
115
+#' ## This statement copies all samples of exp dataset into res dataset, and 
116
+#' ## then calculates new metadata attribute for each of them: 
112 117
 #' ## sum_score is the sum of score of the sample regions.
113 118
 #' 
114 119
 #' res = extend(exp, sum_score = SUM("score"))
115 120
 #' 
116
-#' ## This statement copies all samples of exp into res dataset, 
117
-#' ## and then calculates new metadata attributes for each of them: 
118
-#' ## MinP is the minimum pvalue of the sample regions.
121
+#' ## This statement copies all samples of exp dataset into res dataset, 
122
+#' ## and then calculates new metadata attribute for each of them: 
123
+#' ## min_pvalue is the minimum pvalue of the sample regions.
119 124
 #' 
120
-#' res = extend(exp, minP = MIN("pvalue"))
125
+#' res = extend(exp, min_pvalue = MIN("pvalue"))
121 126
 #' 
122
-#' ## This statement copies all samples of exp into res dataset, 
123
-#' ## and then calculates new metadata attributes for each of them: 
127
+#' ## This statement copies all samples of exp dataset into res dataset, 
128
+#' ## and then calculates new metadata attribute for each of them: 
124 129
 #' ## max_score is the maximum score of the sample regions.
125 130
 #' 
126 131
 #' res = extend(exp, max_score = MAX("score"))
127 132
 #' 
128 133
 #' ## The following cover operation produces output regions where at least 2 
129
-#' ## and at most 3 regions ofexp overlap, having as resulting region 
130
-#' ## attributes the average signal of the overlapping regions; 
134
+#' ## and at most 3 regions of exp dataset overlap, having as resulting region 
135
+#' ## attribute the average signal of the overlapping regions; 
131 136
 #' ## the result has one sample for each input cell.
132 137
 #' 
133 138
 #' res = cover(exp, 2, 3, groupBy = list(DF("cell")), 
134 139
 #' avg_signal = AVG("signal") )
135 140
 #' 
136
-#' ## It copies all samples of DATA into OUT dataset, and then for each of 
137
-#' ## them it adds another metadata attribute, allScores, 
141
+#' ## This statement copies all samples of DATA dataset into OUT dataset, 
142
+#' ## and then for each of them it adds another metadata attribute, allScores, 
138 143
 #' ## which is the aggregation comma-separated list of all the values 
139 144
 #' ## that the region attribute score takes in the sample.
140 145
 #' 
141 146
 #' out = extend(exp, allScore = BAG("score"))
142 147
 #' 
143
-#' ## counts the regions in each sample and stores their number as value 
144
-#' ## of the new metadata RegionCount attribute of the sample.
148
+#' ## This statement counts the regions in each sample and stores their number 
149
+#' ## as value of the new metadata RegionCount attribute of the sample.
145 150
 #' 
146 151
 #' out = extend(exp, RegionCount = COUNT())
147 152
 #' 
148
-#' ## This statement copies all samples of exp into res dataset, 
149
-#' ## and then calculates new metadata attributes for each of them: 
153
+#' ## This statement copies all samples of exp dataset into res dataset, 
154
+#' ## and then calculates new metadata attribute for each of them: 
150 155
 #' ## std_score is the standard deviation score of the sample regions.
151 156
 #' 
152 157
 #' res = extend(exp, std_score = STD("score"))
153 158
 #' 
154
-#' ## This statement copies all samples of exp into res dataset, 
155
-#' ## and then calculates new metadata attributes for each of them: 
159
+#' ## This statement copies all samples of exp dataset into res dataset, 
160
+#' ## and then calculates new metadata attribute for each of them: 
156 161
 #' ## m_score is the median score of the sample regions.
157 162
 #' 
158 163
 #' res = extend(exp, m_score = MEDIAN("score"))
... ...
@@ -173,6 +178,25 @@ SUM <- function(value)
173 178
     return(list)
174 179
 }
175 180
 
181
+#' @name AGGREGATES-Object
182
+#' @aliases COUNT
183
+#' @rdname aggr-class
184
+#' @export
185
+#'
186
+COUNT <- function()
187
+{
188
+    list <- list()
189
+    ## Set the name for the class
190
+    class(list) <- c("COUNT","AGGREGATES","META_AGGREGATES")
191
+    return(list)
192
+}
193
+as.character.COUNT <- function(obj) {
194
+    class <- class(obj)[1]
195
+    c(class,"")
196
+}
197
+check.COUNT <- function(obj){}
198
+
199
+
176 200
 #' @name AGGREGATES-Object
177 201
 #' @aliases MIN
178 202
 #' @rdname aggr-class
... ...
@@ -220,66 +244,63 @@ AVG <- function(value)
220 244
 }
221 245
 
222 246
 #' @name AGGREGATES-Object
223
-#' @aliases BAG
247
+#' @aliases MEDIAN
224 248
 #' @rdname aggr-class
225 249
 #' @export
226 250
 #'
227
-BAG <- function(value)
251
+MEDIAN <- function(value)
228 252
 {
229 253
     check.META_AGGREGATES(value)
230 254
     
231 255
     list <- list(value = value)
232 256
     ## Set the name for the class
233
-    class(list) <- c("BAG","AGGREGATES","META_AGGREGATES")
257
+    class(list) <- c("MEDIAN","AGGREGATES","META_AGGREGATES")
234 258
     return(list)
235 259
 }
236 260
 
261
+
237 262
 #' @name AGGREGATES-Object
238
-#' @aliases COUNT
263
+#' @aliases STD
239 264
 #' @rdname aggr-class
240 265
 #' @export
241 266
 #'
242
-COUNT <- function()
267
+STD <- function(value)
243 268
 {
244
-    list <- list()
269
+    check.META_AGGREGATES(value)
270
+    
271
+    list <- list(value = value)
245 272
     ## Set the name for the class
246
-    class(list) <- c("COUNT","AGGREGATES","META_AGGREGATES")
273
+    class(list) <- c("STD","META_AGGREGATES")
247 274
     return(list)
248 275
 }
249
-as.character.COUNT <- function(obj) {
250
-    class <- class(obj)[1]
251
-    c(class,"")
252
-}
253
-check.COUNT <- function(obj){}
254 276
 
255 277
 #' @name AGGREGATES-Object
256
-#' @aliases STD
278
+#' @aliases BAG
257 279
 #' @rdname aggr-class
258 280
 #' @export
259 281
 #'
260
-STD <- function(value)
282
+BAG <- function(value)
261 283
 {
262 284
     check.META_AGGREGATES(value)
263 285
     
264 286
     list <- list(value = value)
265 287
     ## Set the name for the class
266
-    class(list) <- c("STD","META_AGGREGATES")
288
+    class(list) <- c("BAG","AGGREGATES","META_AGGREGATES")
267 289
     return(list)
268 290
 }
269 291
 
270
-
271 292
 #' @name AGGREGATES-Object
272
-#' @aliases MEDIAN
293
+#' @aliases BAGD
273 294
 #' @rdname aggr-class
274 295
 #' @export
275 296
 #'
276
-MEDIAN <- function(value)
297
+BAGD <- function(value)
277 298
 {
278 299
     check.META_AGGREGATES(value)
279 300
     
280 301
     list <- list(value = value)
281 302
     ## Set the name for the class
282
-    class(list) <- c("MEDIAN","AGGREGATES","META_AGGREGATES")
303
+    class(list) <- c("BAGD","AGGREGATES","META_AGGREGATES")
283 304
     return(list)
284 305
 }
285 306
 
... ...
@@ -327,18 +348,4 @@ Q3 <- function(value)
327 348
     return(list)
328 349
 }
329 350
 
330
-#' @name AGGREGATES-Object
331
-#' @aliases BAGD
332
-#' @rdname aggr-class
333
-#' @export
334
-#'
335
-BAGD <- function(value)
336
-{
337
-    check.META_AGGREGATES(value)
338
-    
339
-    list <- list(value = value)
340
-    ## Set the name for the class
341
-    class(list) <- c("BAGD","AGGREGATES","META_AGGREGATES")
342
-    return(list)
343
-}
344 351
 
... ...
@@ -31,71 +31,80 @@ check.DISTAL <- function(value)
31 31
 #' DISTAL object class constructor
32 32
 #'
33 33
 #' This class constructor is used to create instances of DISTAL object
34
-#' to be used in GMQL functions that use genometric predicate parameter
35
-#' requiring distal condition on value
34
+#' to be used in GMQL JOIN operations (RGMQL merge functions) that use 
35
+#' genometric predicate parameter requiring distal condition on value
36 36
 #' 
37 37
 #' \itemize{
38 38
 #' \item{DL: It denotes the less distance clause, 
39
-#' which selects all the regions of the experiment such that their distance 
40
-#' from the anchor region is less than 'value' bases.}
39
+#' which selects all the regions of a joined experiment dataset sample such 
40
+#' that their distance from the anchor region of a joined reference dataset 
41
+#' sample is less than 'value' bases.}
41 42
 #' \item{DLE: It denotes the less distance clause, 
42
-#' which selects all the regions of the experiment such that their distance 
43
-#' from the anchor region is less than, or equal to, 'value' bases.}
44
-#' \item{DG: it denotes the less distance clause, 
45
-#' which selects all the regions of the experiment such that their distance 
46
-#' from the anchor region is greater than 'value' bases. }
47
-#' \item{DGE: It denotes the less distance clause, which selects all the 
48
-#' regions of the experiment such that their distance from the anchor region 
49
-#' is greater than, or equal to, 'value' bases.}
43
+#' which selects all the regions of a joined experiment dataset sample such 
44
+#' that their distance from the anchor region of a joined reference dataset 
45
+#' sample is less than, or equal to, 'value' bases.}
46
+#' \item{DG: It denotes the less distance clause, 
47
+#' which selects all the regions of a joined experiment dataset sample such 
48
+#' that their distance from the anchor region of a joined reference dataset 
49
+#' sample is greater than 'value' bases. }
50
+#' \item{DGE: It denotes the less distance clause, 
51
+#' which selects all the regions of a joined experiment dataset sample such 
52
+#' that their distance from the anchor region of a joined reference dataset 
53
+#' sample is greater than, or equal to, 'value' bases.}
50 54
 #' \item{MD: It denotes the minimum distance clause, which selects 
51
-#' the 'value' regions of the experiment at minimial distance from the 
52
-#' anchor region.}
55
+#' the first 'value' regions of a joined experiment at minimial distance 
56
+#' from the anchor region of a joined reference dataset sample.}
53 57
 #' \item{UP: It denotes the upstream direction of the genome.
54
-#' They are interpreted as predicates that must hold on the regions 
55
-#' of the experiment.
56
-#' UP is true when region of experiment is in the upstream genome 
57
-#' of the anchor region.
58
+#' It makes predicates to be hold on the upstream of the regions of a joined 
59
+#' experiment dataset sample.
60
+#' UP is true when region of a joined experiment dataset sample is in the
61
+#' upstream genome of the anchor region of a joined reference dataset sample.
58 62
 #' When this clause is not present, distal conditions apply to both 
59
-#' the directions of the genome.}
60
-#' \item{DOWN: It denotes the downstream direction of the genome.
61
-#' They are interpreted as predicates that must hold on the regions of 
62
-#' the experiment.
63
-#' DOWN is true when region of experiment is in the downstream genome of 
64
-#' the anchor region.
65
-#' When this clause is not present, distal conditions apply to both the 
66
-#' directions of the genome. }
63
+#' directions of the genome.}
64
+#' \item{DOWN:  It denotes the downstream direction of the genome.
65
+#' It makes predicates to be hold on the downstream of the regions of a joined 
66
+#' experiment dataset sample.
67
+#' UP is true when region of a joined experiment dataset sample is in the
68
+#' downstream genome of the anchor region of a joined reference dataset sample.
69
+#' When this clause is not present, distal conditions apply to both 
70
+#' directions of the genome.}
67 71
 #' }
68 72
 #' 
69 73
 #' @param value string identifying distance between genomic regions 
70
-#' in base pairs, 
74
+#' in base pair, 
71 75
 #'
72
-#' @return distal object
76
+#' @return Distal object
73 77
 #' 
74 78
 #' @examples
79
+#' ## Thi statement initializes and runs the GMQL server for local execution 
80
+#' ## and creation of results on disk. Then, with system.file() it defines 
81
+#' ## the path to the folders "DATASET" and "DATASET_GDM" in the subdirectory 
82
+#' ## "example" of the package "RGMQL" and opens such folder as a GMQL 
83
+#' ## dataset named "TSS" and "HM" respectively using customParser
75 84
 #' 
76 85
 #' init_gmql()
77
-#' test_path <- system.file("example","DATASET",package = "RGMQL")
78
-#' test_path2 <- system.file("example","DATASET_GDM",package = "RGMQL")
86
+#' test_path <- system.file("example", "DATASET", package = "RGMQL")
87
+#' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL")
79 88
 #' TSS = read_dataset(test_path)
80 89
 #' HM = read_dataset(test_path2)
81 90
 #' 
82 91
 #' ## Given a dataset HM and one called TSS with a sample including 
83
-#' # Transcription Start Site annotations, it searches for those regions of hm 
84
-#' # that are at a minimal distance from a transcription start site (TSS) 
85
-#' # and takes the first/closest one for each TSS, 
86
-#' # provided that such distance is lesser than 1200 bases and joined TSS 
87
-#' # and HM samples are obtained from the same provider (joinby clause).
92
+#' ## Transcription Start Site annotations, it searches for those regions of HM 
93
+#' ## that are at a minimal distance from a transcription start site (TSS) 
94
+#' ## and takes the first/closest one for each TSS, provided that such 
95
+#' ## distance is lesser than 1200 bases and joined TSS and HM samples are 
96
+#' ## obtained from the same provider (joinby clause).
88 97
 #' 
89 98
 #' join_data = merge(TSS, HM, 
90 99
 #' genometric_predicate = list(MD(1), DL(1200)), DF("provider"), 
91 100
 #' region_output = "RIGHT")
92 101
 #'
93
-#' #' # Given a dataset 'hm' and one called 'tss' with a sample including 
94
-#' # Transcription Start Site annotations, it searches for those regions of hm 
95
-#' # that are at a minimal distance from a transcription start site (TSS) 
96
-#' # and takes the first/closest one for each TSS, provided that such distance 
97
-#' # is greater than 12K bases and joined 'tss' and 'hm' samples are obtained 
98
-#' # from the same provider (joinby clause).
102
+#' ## Given a dataset 'HM' and one called 'TSS' with a sample including 
103
+#' ## Transcription Start Site annotations, it searches for those regions of HM 
104
+#' ## that are at a minimal distance from a transcription start site (TSS) 
105
+#' ## and takes the first/closest one for each TSS, provided that such distance 
106
+#' ## is greater than 12K bases and joined 'tss' and 'hm' samples are obtained 
107
+#' ## from the same provider (joinby clause).
99 108
 #' 
100 109
 #' join_data = merge(TSS, HM, 
101 110
 #' genometric_predicate = list(MD(1), DGE(12000), DOWN()), 
... ...
@@ -39,37 +39,41 @@ as.character.OPERATOR <- function(obj) {
39 39
 #' to be used in GMQL functions that require operator on value.
40 40
 #' 
41 41
 #' \itemize{
42
-#' \item{META: It prepared input parameter to be passed to library function 
43
-#' meta, performing all the type conversion needed}
44
-#' \item{SQRT: It prepared input parameter to be passed to library function 
45
-#' sqrt, performing all the type conversion needed}
46
-#' \item{NIL: It prepared input parameter to be passed to library function 
47
-#' null, performing all the type conversion needed}
42
+#' \item{META: It prepares input parameter to be passed to library function 
43
+#' meta, performing all the type conversions needed}
44
+#' \item{SQRT: It prepares input parameter to be passed to library function 
45
+#' sqrt, performing all the type conversions needed}
46
+#' \item{NIL: It prepares input parameter to be passed to library function 
47
+#' null, performing all the type conversions needed}
48 48
 #' }
49 49
 #' 
50
-#' @param value string identifying name of metadata attribute or region 
51
-#' attribute
52
-#' @param type string identifying the type of the attribute value
53
-#' must be: integer, double or string
50
+#' @param value string identifying name of metadata attribute
51
+#' @param type string identifying the type of the attribute value;
52
+#' it must be: INTEGER, DOUBLE or STRING
54 53
 #'
55
-#' @return operator object
54
+#' @return Operator object
56 55
 #' 
57 56
 #' 
58 57
 #' @examples
58
+#' ## Thi statement initializes and runs the GMQL server for local execution 
59
+#' ## and creation of results on disk. Then, with system.file() it defines 
60
+#' ## the path to the folders "DATASET" in the subdirectory "example" 
61
+#' ## of the package "RGMQL" and opens such folder as a GMQL dataset 
62
+#' ## named "exp"
59 63
 #' 
60 64
 #' init_gmql()
61 65
 #' test_path <- system.file("example", "DATASET", package = "RGMQL")
62 66
 #' exp = read_dataset(test_path)
63 67
 #' 
64
-#' ## allows to select, in all input sample, all those regions for which the 
65
-#' ## region attribute score has a value which is greater than the metadata 
66
-#' ## attribute value "avg_score" in their sample.
68
+#' ## This statement allows to select, in all input sample, all those regions 
69
+#' ## for which the region attribute score has a value which is greater 
70
+#' ## than the metadata attribute value "avg_score" in their sample.
67 71
 #' 
68 72
 #' data = filter(exp, r_predicate = score > META("avg_score"))
69 73
 #' 
70
-#' ## It define a new numeric region attribute with "null" value. 
74
+#' ## This statement defines a new numeric region attribute with "null" value. 
71 75
 #' ## The syntax for creating a new attribute with null value is 
72
-#' ## attribute_name = NULL(TYPE), where type may be INTEGER or DOUBLE.
76
+#' ## attribute_name = NULL(TYPE), where type may be INTEGER, DOUBLE or STRING.
73 77
 #' 
74 78
 #' out = select(exp, regions_update = list(signal = NIL("INTEGER"), 
75 79
 #' pvalue = NIL("DOUBLE")))
... ...
@@ -82,9 +86,6 @@ as.character.OPERATOR <- function(obj) {
82 86
 #' ## with value correspondent to the mathematical squared root 
83 87
 #' ## of the pre-existing metadata attribute concentration.
84 88
 #' 
85
-#' init_gmql()
86
-#' test_path <- system.file("example", "DATASET", package = "RGMQL")
87
-#' exp = read_dataset(test_path)
88 89
 #' out = select(exp, metadata_update = list(concSq = SQRT("concentration")))
89 90
 #' 
90 91
 #' @name OPERATOR-Object
... ...
@@ -92,7 +93,7 @@ as.character.OPERATOR <- function(obj) {
92 93
 #' @rdname operator-class
93 94
 #' @export
94 95
 #'
95
-META <- function(value,type=NULL)
96
+META <- function(value, type = NULL)
96 97
 {
97 98
     check.OPERATOR(value)
98 99
     if(!is.null(type))
... ...
@@ -128,11 +129,11 @@ check.META <- function(type)
128 129
 #' @rdname operator-class
129 130
 #' @export
130 131
 #'
131
-NIL <- function(value)
132
+NIL <- function(type)
132 133
 {
133
-    check.NIL(value)
134
+    check.NIL(type)
134 135
     
135
-    list <- list(value = value)
136
+    list <- list(value = type)
136 137
     ## Set the name for the class
137 138
     class(list) <- c("NIL","OPERATOR")
138 139
     return(list)
... ...
@@ -5,7 +5,7 @@
5 5
 #' Condition evaluation functions
6 6
 #'
7 7
 #' These functions are used to support joinBy and/or groupBy function parameter.
8
-#' It create a list of one element: matrix containing the two coloumn:
8
+#' They create a 2-D array made up by two coloumn:
9 9
 #' type of condition evaluation and the metadata attribute name
10 10
 #'
11 11
 #' \itemize{
... ...
@@ -13,16 +13,16 @@
13 13
 #' FULL evaluation: two attributes match if they both end with value and,
14 14
 #' if they have further prefixes, the two prefix sequences are identical}
15 15
 #' \item{EX: It defines a EXACT evaluation of the input values.
16
-#' EXACT evaluation: only attributes exactly as value will match; 
16
+#' EXACT evaluation: only attributes exactly as value match; 
17 17
 #' no further prefixes are allowed. }
18 18
 #' \item{DF: It defines a DEFAULT evaluation of the input values.
19 19
 #' DEFAULT evaluation: the two attributes match if both end with value.}
20 20
 #' }
21 21
 #'
22
-#' @param ... series of string identifying name of metadata attribute 
22
+#' @param ... series of string identifying a name of metadata attribute 
23 23
 #' to be evaluated
24 24
 #' 
25
-#' @return list of 2-D array containing method of evaluation and metadata
25
+#' @return 2-D array containing method of evaluation and metadata
26 26
 #' 
27 27
 #' @examples
28 28
 #' 
... ...
@@ -20,9 +20,9 @@
20 20
 #' this condition is logically "ANDed" with prefix filtering (see below)
21 21
 #' if NULL no filtering action occures
22 22
 #' (i.e every sample is taken for region filtering)
23
-#' @param metadata_prefix vector of strings that will filter metadata
24
-#' containing rispectively every element of this vector.
25
-#' number of element in both vector must match
23
+#' @param metadata_prefix vector of strings that will support the metadata
24
+#' filtering. If defined every defined 'metadata' are concatenated with the 
25
+#' corresponding prefix.
26 26
 #' @param regions vector of strings that extracts only region attribute 
27 27
 #' specified; if NULL no regions attribute is taken and the output is only 
28 28
 #' GRanges made up by the region coordinate attributes 
... ...
@@ -33,18 +33,26 @@
33 33
 #' selected regions
34 34
 #'
35 35
 #' @details
36
-#' This function works only with datatset or GRangesList that has the same
37
-#' information about regions attribute (but of course different value)
36
+#' This function works only with datatset or GRangesList which samples or 
37
+#' Granges have the same regions coordinates (chr, ranges, strand)
38
+#' 
38 39
 #' In case of Grangeslist data input the function will search for metadata
39 40
 #' into metadata() function associated to Grangeslist.
40 41
 #'
41
-#' @return Granges with selected regions (if any) in elementMetadata
42
+#' @return GRanges with selected regions
42 43
 #'
43 44
 #' @examples
44
-#'
45
+#' 
46
+#' ## This statement defines the path to the folders "DATASET" in the 
47
+#' ## subdirectory "example" of the package "RGMQL" and filter such folder 
48
+#' ## dataset including at output only "pvalue" and "peak" regions
49
+#' 
45 50
 #' test_path <- system.file("example", "DATASET", package = "RGMQL")
46
-#' filter_and_extract(test_path,regions = c("pvalue", "peak"))
47
-#'
51
+#' filter_and_extract(test_path, regions = c("pvalue", "peak"))
52
+#' 
53
+#' ## This statement import a GMQL dataset as GRangesList and filter it 
54
+#' ## including at output only "pvalue" and "peak" regions
55
+#' 
48 56
 #' grl = import_gmql(test_path, TRUE)
49 57
 #' filter_and_extract(grl, regions = c("pvalue", "peak"))
50 58
 #'
... ...
@@ -1,13 +1,13 @@
1 1
 #' Method setdiff
2 2
 #' 
3
-#' @description Wrapper to GMQL difference function
3
+#' @description Wrapper to GMQL DIFFERENCE operator
4 4
 #' 
5 5
 #' @description It produces one sample in the result for each sample of the 
6 6
 #' left operand, by keeping the same metadata of the left input sample 
7 7
 #' and only those regions (with their schema and values) of the left input 
8 8
 #' sample which do not intersect with any region in the right operand sample.
9 9
 #' The optional \emph{joinby} clause is used to extract a subset of couples
10
-#' from the cartesian product of two dataset \emph{x} and \emph{y} 
10
+#' from the Cartesian product of two dataset \emph{x} and \emph{y} 
11 11
 #' on which to apply the DIFFERENCE operator:
12 12
 #' only those samples that have the same value for each attribute
13 13
 #' are considered when performing the difference.
... ...
@@ -17,19 +17,16 @@
17 17
 #' 
18 18
 #' @param x GMQLDataset class object
19 19
 #' @param y GMQLDataset class object
20
-#' @param ... Additional arguments for use in specific methods.
21
-#' 
22
-#' This method accept a function to define condition evaluation on metadata.
20
+#' @param joinBy list of evalation functions to define evaluation on metadata:
23 21
 #' \itemize{
24
-#' \item{\code{\link{FN}}: Fullname evaluation, two attributes match 
25
-#' if they both end with value and, if they have a further prefixes,
22
+#' \item{\code{\link{FN}}(value): Fullname evaluation, two attributes match 
23
+#' if they both end with \emph{value} and, if they have further prefixes,
26 24
 #' the two prefix sequence are identical}
27
-#' \item{\code{\link{EX}}: Exact evaluation, only attributes exactly 
28
-#' as value will match; no further prefixes are allowed. }
29
-#' \item{\code{\link{DF}}: Default evaluation, the two attributes match 
30
-#' if both end with value.}
25
+#' \item{\code{\link{EX}}(value): Exact evaluation, only attributes exactly 
26
+#' as \emph{value} match; no further prefixes are allowed. }
27
+#' \item{\code{\link{DF}}(value): Default evaluation, the two attributes match 
28
+#' if both end with \emph{value}.}
31 29
 #' }
32
-#' 
33 30
 #' @param is_exact single logical value: TRUE means that the region difference 
34 31
 #' is executed only on regions in left_input_data with exactly the same 
35 32
 #' coordinates of at least one region present in right_input_data; 
... ...
@@ -42,47 +39,48 @@
42 39
 #' 
43 40
 #'
44 41
 #' @examples
45
-#'
46
-#' ## This GMQL statement returns all the regions in the first dataset 
47
-#' ## that do not overlap any region in the second dataset.
42
+#' ## This statement initializes and runs the GMQL server for local execution 
43
+#' ## and creation of results on disk. Then, with system.file() it defines 
44
+#' ## the path to the folders "DATASET" and "DATASET_GDM" in the subdirectory 
45
+#' ## "example" of the package "RGMQL" and opens such folder as a GMQL 
46
+#' ## dataset named "data1" and "data2" respectively using customParser
48 47
 #' 
49 48
 #' init_gmql()
50 49
 #' test_path <- system.file("example", "DATASET", package = "RGMQL")
51 50
 #' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL")
52
-#' r_left = read_dataset(test_path)
53
-#' r_right = read_dataset(test_path2)
54
-#' out = setdiff(r_left, r_right)
51
+#' data1 = read_dataset(test_path)
52
+#' data2 = read_dataset(test_path2)
53
+#' 
54
+#' ## This GMQL statement returns all the regions in the first dataset 
55
+#' ## that do not overlap any region in the second dataset.
56
+#' 
57
+#' out = setdiff(data1, data2)
55 58
 #' 
56
-#' \dontrun{
57 59
 #' ## This GMQL statement extracts for every pair of samples s1 in EXP1 
58 60
 #' ## and s2 in EXP2 having the same value of the metadata 
59 61
 #' ## attribute 'antibody_target' the regions that appear in s1 but 
60 62
 #' ## do not overlap any region in s2; 
61 63
 #' ## metadata of the result are the same as the metadata of s1.
62 64
 #' 
63
-#' init_gmql()
64
-#' test_path <- system.file("example", "DATASET", package = "RGMQL")
65
-#' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL")
66
-#' exp1 = read_dataset(test_path)
67
-#' exp2 = read_dataset(test_path2)
68
-#' out = setdiff(exp1, exp2, DF("antibody_target"))
65
+#' out_t = setdiff(data1, data2, DF("antibody_target"))
69 66
 #'
70
-#' }
71 67
 #' @name setdiff
72 68
 #' @aliases setdiff,GMQLDataset,GMQLDataset-method
73 69
 #' @aliases setdiff-method
74 70
 #' @export
75 71
 setMethod("setdiff", c("GMQLDataset","GMQLDataset"),
76
-            function(x, y, ..., is_exact = FALSE)
72
+            function(x, y, joinBy = NULL, is_exact = FALSE)
77 73
             {
78 74
                 ptr_data_x = x@value
79 75
                 ptr_data_y = y@value
80
-                joinBy = list(...)
81 76
                 gmql_difference(ptr_data_x, ptr_data_y, is_exact, joinBy)
82 77
             })
83 78
 
84 79
 gmql_difference <- function(left_data, right_data, is_exact, joinBy)
85 80
 {
81
+    if(!is.list(joinBy))
82
+        stop("joinBy: must be a list")
83
+    
86 84
     if(!is.null(joinBy) && !length(joinBy) == 0)
87 85
     {
88 86
         cond <- .join_condition(joinBy)
... ...
@@ -5,24 +5,20 @@
5 5
 #' and adds them to the existing metadata attributes of the sample.
6 6
 #' Aggregate functions are applied sample by sample.
7 7
 #'
8
-#' @importFrom rJava .jnull
9
-#' @importFrom rJava J
10
-#' @importFrom rJava .jarray
8
+#' @importFrom rJava J .jnull .jarray
11 9
 #'
12 10
 #' @param .data GMQLDataset class object 
13
-#' @param ... Additional arguments for use in specific methods.
14
-#' It accept a series of aggregate function on region attribute.
15
-#' All the element in the form \emph{key} = \emph{aggregate}.
16
-#' The \emph{aggregate} is an object of class AGGREGATES
17
-#' The aggregate functions available are: \code{\link{SUM}}, 
11
+#' @param ... a series of expressions separated by comma in the form 
12
+#' \emph{key} = \emph{aggregate}. The \emph{aggregate} is an object of 
13
+#' class AGGREGATES. The aggregate functions available are: \code{\link{SUM}}, 
18 14
 #' \code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, 
19 15
 #' \code{\link{AVG}}, \code{\link{MEDIAN}}, \code{\link{STD}}, 
20 16
 #' \code{\link{BAG}}, \code{\link{BAGD}}, \code{\link{Q1}}, 
21 17
 #' \code{\link{Q2}}, \code{\link{Q3}}.
22
-#' Every aggregate accepts a string value, execet for COUNT, which does not 
18
+#' Every aggregate accepts a string value, except for COUNT, which does not 
23 19
 #' have any value.
24 20
 #' Argument of 'aggregate function' must exist in schema, i.e. among region 
25
-#' attributes. Two style are allowed:
21
+#' attributes. Two styles are allowed:
26 22
 #' \itemize{
27 23
 #' \item list of key-value pairs: e.g. sum = SUM("pvalue")
28 24
 #' \item list of values: e.g. SUM("pvalue")
... ...
@@ -33,28 +29,29 @@
33 29
 #' for the subsequent GMQLDataset method
34 30
 #' 
35 31
 #' @examples
36
-#'
37
-#' ## it counts the regions in each sample and stores their number as value 
38
-#' ## of the new metadata RegionCount attribute of the sample.
32
+#' 
33
+#' ## This statement initializes and runs the GMQL server for local execution 
34
+#' ## and creation of results on disk. Then, with system.file() it defines 
35
+#' ## the path to the folders "DATASET" in the subdirectory "example" 
36
+#' ## of the package "RGMQL" and opens such folder as a GMQL dataset 
37
+#' ## named "data"
38
+#' 
39 39
 #' init_gmql()
40 40
 #' test_path <- system.file("example", "DATASET", package = "RGMQL")
41
-#' r <- read_dataset(test_path)
42
-#' e <- extend(r, RegionCount = COUNT())
41
+#' data <- read_dataset(test_path)
42
+#' 
43
+#' ## This statement counts the regions in each sample and stores their number 
44
+#' ## as value of the new metadata attribute RegionCount of the sample.
43 45
 #' 
44
-#' \dontrun{
46
+#' e <- extend(data, RegionCount = COUNT())
45 47
 #' 
46
-#' ## it copies all samples of exp dataset into res dataset, 
48
+#' ## This statement copies all samples of data dataset into 'res' dataset, 
47 49
 #' ## and then calculates for each of them two new metadata attributes:
48 50
 #' ##  1. RegionCount is the number of sample regions;
49 51
 #' ##  2. MinP is the minimum pvalue of the sample regions.
50
-#' ## res sample regions are the same as the ones in exp.
52
+#' ## res sample regions are the same as the ones in data.
51 53
 #' 
52
-#' init_gmql()
53
-#' test_path <- system.file("example", "DATASET", package = "RGMQL")
54
-#' exp = read_dataset(test_path)
55
-#' res = extend(exp, RegionCount = COUNT(), MinP = MIN("pvalue"))
56
-#' 
57
-#' }
54
+#' res = extend(data, RegionCount = COUNT(), MinP = MIN("pvalue"))
58 55
 #' 
59 56
 #' @name extend
60 57
 #' @rdname extend
61 58
new file mode 100644
... ...
@@ -0,0 +1,121 @@
1
+#' Init GMQL server
2
+#'
3
+#' It initializes and runs GMQL server for executing GMQL query
4
+#' It also performs a login to GMQL REST services suite, if needed
5
+#' 
6
+#' @importFrom rJava J
7
+#' 
8
+#' @param output_format string identifies the output format of sample files.
9
+#' It can be TAB, GTF or COLLECT:
10
+#' \itemize{
11
+#' \item{TAB: tab delimited file format}
12
+#' \item{GTF: tab-delimited text fstandard ormat based on the general 
13
+#' feature format}
14
+#' \item{COLLECT: used for storing output in memory}
15
+#' }
16
+#' @param remote_processing logical value specifying the processing mode.
17
+#' True for processing on cluster (remote), false for local processing.
18
+#' 
19
+#' @param url string url of server: It must contain the server address 
20
+#' and base url; service name is added automatically.
21
+#' If NULL, no login is performed.
22
+#' You can always perform it by calling the function \code{\link{login_gmql}} 
23
+#' explicitly
24
+#' 
25
+#' @param username string name used during signup 
26
+#' @param password string password used during signup
27
+#' 
28
+#' @return None
29
+#'
30
+#' @examples
31
+#'
32
+#' ## This statement initializes GMQL with local processing with sample files 
33
+#' ## output format as tab delimited
34
+#' 
35
+#' init_gmql("tab", FALSE)
36
+#' 
37
+#' ## initializes GMQL with remote processing
38
+#' 
39
+#' remote_url = "http://genomic.deib.polimi.it/gmql-rest-r/"
40
+#' init_gmql(remote_processing = TRUE, url = remote_url)
41
+#' 
42
+#' @export
43
+#'
44
+init_gmql <- function(output_format = "GTF", remote_processing = FALSE, 
45
+                    url = NULL, username = NULL, password = NULL)
46
+{
47
+    out_format <- toupper(output_format)
48
+    if(!identical(out_format,"TAB") && !identical(out_format,"GTF") && 
49
+                !identical(out_format,"COLLECT"))
50
+        stop("output_format must be TAB, GTF or COLLECT")
51
+    .check_logical(remote_processing)
52
+    
53
+    # mettere attesa da input keyboard, controllare se token già esiste 
54
+    # da sessione precedente
55
+    if(!is.null(url) && !exists("authToken",envir = .GlobalEnv))
56
+        login_gmql(url,username,password)
57
+    
58
+    WrappeR <- J("it/polimi/genomics/r/Wrapper")
59
+    WrappeR$initGMQL(out_format,remote_processing)
60
+}
61
+
62
+#' Stop GMQL server
63
+#'
64
+#' Stop GMQL server
65
+#' 
66
+#' @importFrom rJava J
67
+#' 
68
+#' @return None
69
+#'
70
+#' @examples
71
+#'
72
+#' ## These statements initializes GMQL with local processing with sample files 
73
+#' ## output format as tab delimited and then stop it
74
+#' 
75
+#' init_gmql("tab", FALSE)
76
+#' 
77
+#' stop_gmql()
78
+#' 
79
+#' @export
80
+#'
81
+stop_gmql <- function()
82
+{
83
+    WrappeR <- J("it/polimi/genomics/r/Wrapper")
84
+    WrappeR$stopGMQL()
85
+}
86
+
87
+
88
+#' Disable or Enable remote processing
89
+#'
90
+#' It allows to enable or disable remote processing 
91
+#' 
92
+#' @details 
93
+#' The invocation of this function allows to change mode of processing.
94
+#' After invoking collect() it is not possbile to switch the processing mode. 
95
+#' 
96
+#' @importFrom rJava J
97
+#' 
98
+#' @param is_remote logical value used in order to set the processing mode.
99
+#' TRUE you set a remote query processing mode, otherwise it will be local,
100
+#' 
101
+#' @return None
102
+#' 
103
+#' @examples
104
+#' 
105
+#' ## These statements initializes GMQL with local processing with sample files 
106
+#' ## output format as tab delimited and then change processing mode to remote
107
+#' 
108
+#' init_gmql("tab", remote_processing = FALSE)
109
+#' 
110
+#' remote_processing(TRUE)
111
+#'
112
+#' @export
113
+#' 
114
+remote_processing<-function(is_remote)
115
+{
116
+    WrappeR <- J("it/polimi/genomics/r/Wrapper")
117
+    .check_logical(is_remote)
118
+    response <- WrappeR$remote_processing(is_remote)
119
+    print(response)
120
+}
121
+
... ...
@@ -1,14 +1,17 @@
1 1
 #' Method merge
2 2
 #'
3
-#' It takes in input two datasets, respectively known as nchor (left) 
4
-#' and experiment (right) and returns a dataset of samples consisting of 
5
-#' regions extracted from the operands according to the specified condition
3
+#' @description Wrapper to GMQL JOIN operator
4
+#' 
5
+#' @description It takes in input two datasets, respectively known as anchor 
6
+#' (left) and experiment (right) and returns a dataset of samples consisting 
7
+#' of regions extracted from the operands according to the specified condition
6 8
 #' (a.k.a \emph{genometric_predicate}).
7 9
 #' The number of generated output samples is the Cartesian product 
8 10
 #' of the number of samples in the anchor and in the experiment dataset 
9
-#' (if \emph{by} is not specified).
11
+#' (if \emph{joinBy} is not specified).
10 12
 #' The output metadata are the union of the input metadata, 
11
-#' with their attribute names prefixed with left or right respectively.
13
+#' with their attribute names prefixed with left or right dataset name, 
14
+#' respectively.
12 15
 #'
13 16
 #' @importFrom rJava J .jnull .jarray
14 17
 #' @importFrom S4Vectors merge
... ...
@@ -16,41 +19,37 @@
16 19
 #' @param x GMQLDataset class object
17 20
 #' @param y GMQLDataset class object
18 21
 #' 
19
-#' @param genometric_predicate is a list of DISTAL object
22
+#' @param genometric_predicate it is a list of DISTAL objects
20 23
 #' For details of DISTAL objects see:
21 24
 #' \code{\link{DLE}}, \code{\link{DGE}}, \code{\link{DL}}, \code{\link{DG}},
22 25
 #' \code{\link{MD}}, \code{\link{UP}}, \code{\link{DOWN}}
23 26
 #' 
24
-#' @param ... Additional arguments for use in specific methods.
25
-#' 
26
-#' This method accept a function to define condition evaluation on metadata.
27
+#' @param joinBy list of evalation functions to define evaluation on metadata:
27 28
 #' \itemize{
28
-#' \item{\code{\link{FN}}: Fullname evaluation, two attributes match 
29
-#' if they both end with value and, if they have a further prefixes,
30
-#' the two prefix sequence are identical}
31
-#' \item{\code{\link{EX}}: Exact evaluation, only attributes exactly 
32
-#' as value will match; no further prefixes are allowed. }
33
-#' \item{\code{\link{DF}}: Default evaluation, the two attributes match 
34
-#' if both end with value.}
29
+#' \item{ \code{\link{FN}}(value): Fullname evaluation, two attributes match 
30
+#' if they both end with \emph{value} and, if they have further prefixes,
31
+#' the two prefix sequence are identical.}
32
+#' \item{ \code{\link{EX}}(value): Exact evaluation, only attributes exactly 
33
+#' as \emph{value} match; no further prefixes are allowed.}
34
+#' \item{ \code{\link{DF}}(value): Default evaluation, the two attributes match 
35
+#' if both end with \emph{value}.}
35 36
 #' }
36 37
 #' 
37
-#' @param region_output single string that declare which region is given in 
38
-#' output for each input pair of left dataset right dataset regions 
38
+#' @param region_output single string that declares which region is given in 
39
+#' output for each input pair of left dataset and right dataset regions 
39 40
 #' satisfying the genometric predicate:
40 41
 #' \itemize{
41
-#' \item{left: outputs the anchor regions from left_input_data that satisfy 
42
-#' the genometric predicate}
43
-#' \item{right: outputs the experiment regions from right_input_data that 
44
-#' satisfy the genometric predicate}
45
-#' \item{int (intersection): outputs the overlapping part (intersection) 
46
-#' of the left_input_data and right_input_data regions that satisfy 
47
-#' the genometric predicate; if the intersection is empty, 
48
-#' no output is produced}
49
-#' \item{contig: outputs the concatenation between the left_input_data and 
50
-#' right_input_data regions that satisfy the genometric predicate, 
51
-#' (i.e. the output regionis defined as having left (right) coordinates
52
-#' equal to the minimum (maximum) of the corresponding coordinate values 
53
-#' in the left_input_data and right_input_data regions satisfying 
42
+#' \item{LEFT: It outputs the anchor regions from 'x' that satisfy the 
43
+#' genometric predicate}
44
+#' \item{RIGHT: It outputs the experiment regions from 'y' that satisfy the 
45
+#' genometric predicate}
46
+#' \item{INT (intersection): It outputs the overlapping part (intersection) 
47
+#' of the 'x' and 'y' regions that satisfy the genometric predicate; if the 
48
+#' intersection is empty, no output is produced}
49
+#' \item{CAT: It outputs the concatenation between the 'x' and 'y' regions 
50
+#' that satisfy the genometric predicate, (i.e. the output regionis defined as 
51
+#' having left (right) coordinates equal to the minimum (maximum) of the 
52
+#' corresponding coordinate values in the 'x' and 'y' regions satisfying 
54 53
 #' the genometric predicate)}
55 54
 #' }
56 55
 #'
... ...
@@ -59,18 +58,26 @@
59 58
 #' 
60 59
 #' @examples
61 60
 #' 
62
-#' # Given a dataset 'hm' and one called 'tss' with a sample including 
63
-#' # Transcription Start Site annotations, it searches for those regions of hm 
64
-#' # that are at a minimal distance from a transcription start site (TSS) 
65
-#' # and takes the first/closest one for each TSS, provided that such distance 
66
-#' # is lesser than 120K bases and joined 'tss' and 'hm' samples are obtained 
67
-#' # from the same provider (joinby clause).
61
+#' ## Thi statement initializes and runs the GMQL server for local execution 
62
+#' ## and creation of results on disk. Then, with system.file() it defines 
63
+#' ## the path to the folders "DATASET" and "DATASET_GDM" in the subdirectory 
64
+#' ## "example" of the package "RGMQL" and opens such folder as a GMQL 
65
+#' ## dataset named "exp" and "ref" respectively using customParser
68 66
 #' 
69 67
 #' init_gmql()
70 68
 #' test_path <- system.file("example", "DATASET", package = "RGMQL")
71 69
 #' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL")
72 70
 #' TSS = read_dataset(test_path)
73 71
 #' HM = read_dataset(test_path2)
72
+#' 
73
+#' ## Given a dataset 'HM' and one called 'TSS' with a sample including 
74
+#' ## Transcription Start Site annotations, it searches for those regions of HM 
75
+#' ## that are at a minimal distance from a transcription start site (TSS) 
76
+#' ## and takes the first/closest one for each TSS, provided that such distance 
77
+#' ## is lesser than 120K bases and joined 'tss' and 'hm' samples are obtained 
78
+#' ## from the same provider (joinby clause).
79
+#' 
80
+#' 
74 81
 #' join_data = merge(TSS, HM, 
75 82
 #' genometric_predicate = list(MD(1), DLE(120000)), DF("provider"), 
76 83
 #' region_output = "RIGHT")
... ...
@@ -82,7 +89,7 @@
82 89
 #' @export
83 90
 setMethod("merge", c("GMQLDataset","GMQLDataset"),
84 91
                 function(x, y, genometric_predicate = NULL, 
85
-                    region_output = "contig", ...)
92
+                    region_output = "contig", joinBy = NULL)
86 93
                 {
87 94
                     ptr_data_x <- x@value
88 95
                     ptr_data_y <- y@value
... ...
@@ -123,9 +130,12 @@ gmql_join <- function(left_data, right_data, genometric_predicate, joinBy,
123 130
         join_condition_matrix <- .jnull("java/lang/String")
124 131
     
125 132
     ouput <- toupper(region_output)
126
-    if(!identical(ouput,"CONTIG") && !identical(ouput,"LEFT") && 
127
-        !identical(ouput,"RIGHT") && !identical(ouput,"INT"))
128
-        stop("region_output must be contig,left,right or int (intersection)")
133
+    if(!identical(ouput,"CAT") && !identical(ouput,"LEFT") && 
134
+        !identical(ouput,"RIGHT") && !identical(ouput,"INT") && 
135
+        !identical(ouput,"RIGHT_DIST") && !identical(ouput,"BOTH") &&
136
+        !identical(ouput,"LEFT_DIST"))
137
+        stop("region_output must be cat, left, right, right_dist, left_dist 
138
+                or int (intersection)")
129 139
     
130 140
     WrappeR <- J("it/polimi/genomics/r/Wrapper")
131 141
     response <- WrappeR$join(genomatrix,join_condition_matrix, 
... ...
@@ -1,12 +1,12 @@
1 1
 #' Method map
2 2
 #'
3 3
 #' It computes, for each sample in the right dataset, aggregates over the 
4
-#' values of the right regions that intersect with a region in a left sample, 
5
-#' for each region of each sample in the left dataset;
4
+#' values of the right dataset regions that intersect with a region in a left 
5
+#' dataset sample, for each region of each sample in the left dataset;
6 6
 #' The number of generated output samples is the Cartesian product 
7 7
 #' of the samples in the two input datasets;
8
-#' each output sample has the same regions as the related input left sample, 
9
-#' with their attributes and values, plus the attributes computed as 
8
+#' each output sample has the same regions as the related input left dataset 
9
+#' sample, with their attributes and values, plus the attributes computed as 
10 10
 #' aggregates over right region values.
11 11
 #' Output sample metadata are the union of the related input sample metadata,
12 12
 #' whose attribute names are prefixed with "left" or "right" respectively.
... ...
@@ -22,41 +22,51 @@
22 22
 #' @param x GMQLDataset class object
23 23
 #' @param y GMQLDataset class object 
24 24
 #' 
25
-#' @param ... Additional arguments for use in specific methods.
26
-#' 
27
-#' In this case a series of element in the form \emph{key} = \emph{aggregate}.
28
-#' The \emph{aggregate} is an object of class AGGREGATES
29
-#' The aggregate functions available are: \code{\link{SUM}}, 
25
+#' @param ... a series of expressions separated by comma in the form 
26
+#' \emph{key} = \emph{aggregate}. The \emph{aggregate} is an object of 
27
+#' class AGGREGATES. The aggregate functions available are: \code{\link{SUM}}, 
30 28
 #' \code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, 
31 29
 #' \code{\link{AVG}}, \code{\link{MEDIAN}}, \code{\link{STD}}, 
32 30
 #' \code{\link{BAG}}, \code{\link{BAGD}}, \code{\link{Q1}}, 
33 31
 #' \code{\link{Q2}}, \code{\link{Q3}}.
34
-#' Every aggregate accepts a string value, execet for COUNT
35
-#' Argument of 'aggregate' must exist in schema
36
-#' Two style are allowed:
32
+#' Every aggregate accepts a string value, except for COUNT, which does not 
33
+#' have any value.
34
+#' Argument of 'aggregate function' must exist in schema, i.e. among region 
35
+#' attributes. Two styles are allowed:
37 36
 #' \itemize{
38 37
 #' \item list of key-value pairs: e.g. sum = SUM("pvalue")
39 38
 #' \item list of values: e.g. SUM("pvalue")
40 39
 #' }
41 40
 #' "mixed style" is not allowed
42 41
 #'
43
-#' @param joinBy list of evalation function to define condition 
44
-#' evaluation on metadata:
42
+#' @param joinBy list of evalation functions to define evaluation on metadata:
45 43
 #' \itemize{
46
-#' \item{\code{\link{FN}}: Fullname evaluation, two attributes match 
47
-#' if they both end with value and, if they have a further prefixes,
48
-#' the two prefix sequence are identical}
49
-#' \item{\code{\link{EX}}: Exact evaluation, only attributes exactly 
50
-#' as value will match; no further prefixes are allowed. }
51
-#' \item{\code{\link{DF}}: Default evaluation, the two attributes match 
52
-#' if both end with value.}
44
+#' \item{ \code{\link{FN}}(value): Fullname evaluation, two attributes match 
45
+#' if they both end with \emph{value} and, if they have further prefixes,
46
+#' the two prefix sequence are identical.}
47
+#' \item{ \code{\link{EX}}(value): Exact evaluation, only attributes exactly 
48
+#' as \emph{value} match; no further prefixes are allowed.}
49
+#' \item{ \code{\link{DF}}(value): Default evaluation, the two attributes match 
50
+#' if both end with \emph{value}.}
53 51
 #' }
54 52
 #' 
55 53
 #' @return GMQLDataset object. It contains the value to use as input 
56 54
 #' for the subsequent GMQLDataset method
57 55
 #' 
58 56
 #' @examples
59
-#'
57
+#' 
58
+#' ## Thi statement initializes and runs the GMQL server for local execution 
59
+#' ## and creation of results on disk. Then, with system.file() it defines 
60
+#' ## the path to the folders "DATASET" and "DATASET_GDM" in the subdirectory 
61
+#' ## "example" of the package "RGMQL" and opens such folder as a GMQL 
62
+#' ## dataset named "exp" and "ref" respectively using customParser
63
+#' 
64
+#' init_gmql()
65
+#' test_path <- system.file("example", "DATASET", package = "RGMQL")
66
+#' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL")
67
+#' exp = read_dataset(test_path)
68
+#' ref = read_dataset(test_path2)
69
+#' 
60 70
 #' # It counts the number of regions in each sample from exp that overlap with 
61 71
 #' # a ref region, and for each ref region it computes the minimum score 
62 72
 #' # of all the regions in each exp sample that overlap with it. 
... ...
@@ -66,12 +76,7 @@
66 76
 #' # but with a different value from the one(s) of ref sample(s), 
67 77
 #' # are disregarded.
68 78
 #' 
69
-#' init_gmql()
70
-#' test_path <- system.file("example", "DATASET", package = "RGMQL")
71
-#' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL")
72
-#' exp = read_dataset(test_path)
73
-#' ref = read_dataset(test_path2)
74
-#' out = map(ref,exp, minScore = MIN("score"), 
79
+#' out = map(ref, exp, minScore = MIN("score"), 
75 80
 #' joinBy = list(DF("cell_tissue")))
76 81
 #' 
77 82
 #' @name map
... ...
@@ -1,6 +1,6 @@
1 1
 #' GMQL Function: EXECUTE
2 2
 #'
3
-#' Execute GMQL query.
3
+#' It executes GMQL query.
4 4
 #' The function works only after invoking at least one collect
5 5
 #' 
6 6
 #' @importFrom rJava J
... ...
@@ -8,15 +8,24 @@
8 8
 #' @return None
9 9
 #'
10 10
 #' @examples
11
-#'
11
+#' ## Thi statement initializes and runs the GMQL server for local execution 
12
+#' ## and creation of results on disk. Then, with system.file() it defines 
13
+#' ## the path to the folders "DATASET" in the subdirectory "example" 
14
+#' ## of the package "RGMQL" and opens such folder as a GMQL dataset 
15
+#' ## named "data"
16
+#' 
12 17
 #' init_gmql()
13 18
 #' test_path <- system.file("example","DATASET",package = "RGMQL")
14
-#' rd = read_dataset(test_path)
15
-#' filtered = filter(rd)
16
-#' aggr = aggregate(filtered, list(DF("antibody_targer","cell_karyotype")))
17
-#' collect(aggr, dir_out = test_path)
19
+#' data = read_dataset(test_path)
20
+#' 
21
+#' ## The following statement materialize the dataset, previoulsy read, at 
22
+#' ## th specific destination path into local folder "ds1" opportunely created
23
+#' 
24
+#' collect(data, dir_out = test_path)
18 25
 #' 
26
+#' ## This statement executes GMQL query.
19 27
 #' \dontrun{
28
+#' 
20 29
 #' execute()
21 30
 #' }
22 31
 #' @export
... ...
@@ -147,32 +156,40 @@ gmql_materialize <- function(input_data, dir_out, name)
147 156
 
148 157
 #' Method take
149 158
 #'
150
-#' It saves the contents of a dataset that contains samples metadata 
151
-#' and samples regions as GrangesList.
152
-#' It is normally used to store in memory the contents of any dataset 
159
+#' It saves the content of a dataset that contains samples metadata 
160
+#' and samples regions as GRangesList.
161
+#' It is normally used to store in memory the content of any dataset 
153 162
 #' generated during a GMQL query. The operation can be very time-consuming.
154 163
 #' If you have invoked any materialization before take function, 
155
-#' all those dataset will be materialized as folder.
164
+#' all those datasets are materialized as folders.
156 165
 #'
157
-#' @import GenomicRanges
166
+#' @importFrom GenomicRanges makeGRangesFromDataFrame
167
+#' @importFrom S4Vectors metadata
158 168
 #' @importFrom stats setNames
159
-#' @importFrom rJava J
160
-#' @importFrom rJava .jevalArray
169
+#' @importFrom rJava J .jevalArray
170
+#' @importFrom GenomicRanges GRangesList
161 171
 #' 
162 172
 #' @param data returned object from any GMQL function
163 173
 #' @param rows number of rows for each sample regions that you want to 
164
-#' retrieve and stored in memory.
165
-#' by default is 0 that means take all rows for each sample
174
+#' retrieve and store in memory.
175
+#' By default it is 0 that means take all rows for each sample
166 176
 #' 
167
-#' @param ... Additional arguments for use in specific methods
177
+#' @param ... Additional arguments for use in other specific methods of the 
178
+#' generic take function
168 179
 #' 
169
-#' @return GrangesList with associated metadata
180
+#' @return GRangesList with associated metadata
170 181
 #'
171 182
 #' @examples
172
-#'
183
+#' ## This statement initializes and runs the GMQL server for local execution 
184
+#' ## and creation of results on disk. Then, with system.file() it defines 
185
+#' ## the path to the folder "DATASET" in the subdirectory "example"
186
+#' ## of the package "RGMQL" and opens such folder as a GMQL dataset 
187
+#' ## named "rd" using customParser
188
+#' 
173 189
 #' init_gmql()
174 190
 #' test_path <- system.file("example", "DATASET", package = "RGMQL")
175 191
 #' rd = read_dataset(test_path)
192
+#' 
176 193
 #' aggr = aggregate(rd, list(DF("antibody_target", "cell_karyotype")))
177 194
 #' taken <- take(aggr, rows = 45)
178 195
 #' 
... ...
@@ -36,7 +36,7 @@ select.GMQLDataset <- function(.data, metadata = NULL, metadata_update = NULL,
36 36
 #' those in the input dataset. It allows to:
37 37
 #' \itemize{
38 38
 #' \item{Remove existing metadata and/or region attributes from a dataset}
39
-#' \item{Update new metadata and/or region attributes in the result}
39
+#' \item{Update or set new metadata and/or region attributes in the result}
40 40
 #' }
41 41
 #' 
42 42
 #' @importFrom rJava J .jnull .jarray
... ...
@@ -44,25 +44,25 @@ select.GMQLDataset <- function(.data, metadata = NULL, metadata_update = NULL,
44 44
 #' 
45 45
 #' @param .data GMQLDataset class object
46 46
 #' 
47
-#' @param metadata vector of string made up by metadata attribute
48
-#' @param regions vector of string made up by schema field attribute
49
-#' @param all_but_reg logical value indicating which schema field attribute 
50
-#' you want to exclude; if FALSE only the regions you choose is kept 
51
-#' in the output of the project operation, if TRUE the schema region 
52
-#' are all except ones include in region parameter.
53
-#' if regions is not defined \emph{all_but_reg} is not considerd.
47
+#' @param metadata vector of string made up by metadata attributes
48
+#' @param regions vector of string made up by region attributes
49
+#' @param all_but_reg logical value indicating which region attributes
50
+#' you want to exclude; if FALSE, only the regions you choose are kept 
51
+#' in the output of the operation; if TRUE the regions
52
+#' are all kept except those in region parameter.
53
+#' If regions is not defined, \emph{all_but_reg} is not considerd.
54 54
 #' @param all_but_meta logical value indicating which metadata 
55
-#' you want to exclude; If FALSE only the metadata you choose is kept 
56
-#' in the output of the project operation, if TRUE the metadata 
57
-#' are all except ones include in region parameter.
58
-#' if metadata is not defined \emph{all_but_meta} is not considerd.
55
+#' you want to exclude; If FALSE only the metadata you choose are kept 
56
+#' in the output of the operation; if TRUE the metadata 
57
+#' are all kept except those in metadata.
58
+#' If metadata is not defined \emph{all_but_meta} is not considerd.
59 59
 #' @param regions_update list of updating rules in the form of 
60
-#' key = value generating new genomic region attributes.
60
+#' key = value generating new genomic region attributes and values.
61 61
 #' To specify the new values, the following options are available:
62 62
 #' \itemize{
63 63
 #' \item{All aggregation functions already defined by AGGREGATES object}
64 64
 #' \item{All basic mathematical operations (+, -, *, /), including parenthesis}
65
-#' \item{SQRT, META, NIL constructor object defined by OPERATOR object}
65
+#' \item{SQRT, META, NIL constructor objects defined by OPERATOR object}
66 66
 #' }
67 67
 #' @param metadata_update list of updating rules in the form of 
68 68
 #' key = value generating new metadata.
... ...
@@ -70,13 +70,22 @@ select.GMQLDataset <- function(.data, metadata = NULL, metadata_update = NULL,
70 70
 #' \itemize{
71 71
 #' \item{All aggregation functions already defined by AGGREGATES object}
72 72
 #' \item{All basic mathematical operations (+, -, *, /), including parenthesis}
73
-#' \item{SQRT, META, NIL constructor object defined by OPERATOR object}
73
+#' \item{SQRT, META, NIL constructor objects defined by OPERATOR object}
74 74
 #' }
75 75
 #'  
76 76
 #' @return GMQLDataset object. It contains the value to use as input 
77 77
 #' for the subsequent GMQLDataset method
78 78
 #'
79 79
 #' @examples
80
+#' ## This statement initializes and runs the GMQL server for local execution 
81
+#' ## and creation of results on disk. Then, with system.file() it defines 
82
+#' ## the path to the folders "DATASET" in the subdirectory "example" 
83
+#' ## of the package "RGMQL" and opens such folder as a GMQL dataset 
84
+#' ## named "data"
85
+#' 
86
+#' init_gmql()
87
+#' test_path <- system.file("example", "DATASET", package = "RGMQL")
88
+#' data = read_dataset(test_path)
80 89
 #' 
81 90
 #' ## It creates a new dataset called CTCF_NORM_SCORE by preserving all 
82 91
 #' ## region attributes apart from score, and creating a new region attribute 
... ...
@@ -86,16 +95,12 @@ select.GMQLDataset <- function(.data, metadata = NULL, metadata_update = NULL,
86 95
 #' ## a new metadata attribute called normalized with value 1, 
87 96
 #' ## which can be used in future selections.
88 97
 #' 
89
-#' init_gmql()
90
-#' test_path <- system.file("example", "DATASET", package = "RGMQL")
91
-#' input = read_dataset(test_path)
92
-#' CTCF_NORM_SCORE = select(input, metadata_update = list(normalized = 1), 
98
+#' 
99
+#' CTCF_NORM_SCORE = select(data, metadata_update = list(normalized = 1), 
93 100
 #' regions_update = list(new_score = (score / 1000.0) + 100), 
94 101
 #' regions = c("score"), all_but_reg = TRUE)
95 102
 #' 
96 103
 #' 
97
-#' \dontrun{
98
-#' 
99 104
 #' ## It produces an output dataset that contains the same samples 
100 105
 #' ## as the input dataset. 
101 106
 #' ## Each output sample only contains, as region attributes, 
... ...
@@ -104,14 +109,10 @@ select.GMQLDataset <- function(.data, metadata = NULL, metadata_update = NULL,
104 109
 #' ## and as metadata attributes only the specified ones, 
105 110
 #' ## i.e. manually_curated_tissue_status and manually_curated_tumor_tag.
106 111
 #' 
107
-#' init_gmql()
108
-#' test_path <- system.file("example", "DATASET", package = "RGMQL")
109
-#' DS_in = read_dataset(test_path)
110
-#' DS_out = select(DS_in, regions = c("variant_classification", 
112
+#' DS_out = select(data, regions = c("variant_classification", 
111 113
 #' "variant_type"), metadata = c("manually_curated_tissue_status", 
112 114
 #' "manually_curated_tumor_tag"))
113
-#' 
114
-#' }
115
+#'
115 116
 #' 
116 117
 #' @name select
117 118
 #' @rdname select
... ...
@@ -1,131 +1,67 @@
1
-#' Init GMQL Server
2
-#'
3
-#' Initialize and run GMQL server for executing GMQL query
4
-#' It is also perform a login to GMQL REST services suite if needed
5
-#' 
6
-#' @importFrom rJava J
7
-#' 
8
-#' @param output_format string identifies the output format of sample files.
9
-#' Can be TAB, GTF or COLLECT
10
-#' \itemize{
11
-#' \item{TAB: tab delimited file format}
12
-#' \item{GTF: file format used to hold information about gene structure.
13
-#' It is a tab-delimited text format based on the general feature format}
14
-#' \item{COLLECT: used for storing output in memory}
15
-#' }
16
-#' @param remote_processing logical value specifying the processing mode.
17
-#' True for processing on cluster (remote), false for local processing.
18
-#' 
19
-#' @param url string url of server: It must contain the server address 
20
-#' and base url; service name is added automatically.
21
-#' If null, no login is performed.
22
-#' You can always perform it, calling the function \code{\link{login_gmql}} 
23
-#' explicitly
24
-#' 
25
-#' @param username string name used during signup 
26
-#' @param password string password used during signup
27
-#' 
28
-#' @return None
29
-#'
30
-#' @examples
31
-#'
32
-#' ## initialize GMQL with local processing with sample files output format 
33
-#' ## as Tab delimited
34
-#' 
35
-#' init_gmql("tab", FALSE)
36
-#' 
37
-#' \dontrun{
38
-#' 
39
-#' ## initialize GMQL with remote processing
40
-#' remote_url = "http://130.186.13.219/gmql-rest"
41
-#' init_gmql(remote_processing = TRUE, url = remote_url)
42
-#' 
43
-#' }
44
-#' 
45
-#' @export
46
-#'
47
-init_gmql <- function(output_format = "gtf", remote_processing = FALSE, 
48
-                        url = NULL, username = NULL, password = NULL)
49
-{
50
-    out_format <- toupper(output_format)
51
-    if(!identical(out_format,"TAB") && !identical(out_format,"GTF") && 
52
-        !identical(out_format,"COLLECT"))
53
-        stop("output_format must be TAB, GTF or COLLECT")
54
-    .check_logical(remote_processing)
55
-    
56
-    # mettere attesa da input keyboard, controllare se token già esiste 
57
-    # da sessione precedente
58
-    if(!is.null(url) && !exists("authToken",envir = .GlobalEnv))
59
-        login_gmql(url,username,password)
60
-    
61
-    WrappeR <- J("it/polimi/genomics/r/Wrapper")
62
-    WrappeR$initGMQL(out_format,remote_processing)
63
-}
64
-
65 1
 #' Function read
66 2
 #'
67
-#' Read a GMQL dataset, folder containig some homogenus sample from disk 
68
-#' or GrangesList saving in Scala memory that can be referenced in R.
69
-#' Also used to read a repository dataset in case of remote processing.
3
+#' It reads a GMQL dataset, as a folder containig some homogenus samples on 
4
+#' disk or as a GrangesList; it saving in Scala memory in a way that can be 
5
+#' referenced in R. It is also used to read a repository dataset in case of
6
+#' remote processing.
70 7
 #' 
71
-#' @importFrom rJava .jnull
72
-#' @importFrom rJava .jarray
8
+#' @importFrom rJava J .jnull .jarray
73 9
 #' @importFrom methods is
74
-#' @importFrom rJava J
75 10
 #' 
76
-#' @param dataset folder path for GMQL dataset or datasetname on repository
11
+#' @param dataset folder path for GMQL dataset or dataset name on repository
77 12
 #' @param parser string used to parsing dataset files
78 13
 #' The Parser's available are:
79 14
 #' \itemize{
80
-#' \item{BedParser}
81 15
 #' \item{ANNParser}
82 16
 #' \item{BroadProjParser}
83
-#' \item{BedParser}
84 17
 #' \item{NarrowPeakParser}
85 18
 #' \item{RnaSeqParser}
86 19
 #' \item{CustomParser.}
87 20
 #' }
88 21
 #' Default is CustomParser.
89 22
 #' @param is_local logical value indicating local or remote dataset
90
-#' @param is_GMQL logical value indicating if is a GMQL dataset or not 
23
+#' @param is_GMQL logical value indicating GMQL dataset or not 
91 24
 #' 
92 25
 #' @return GMQLDataset object. It contains the value to use as input 
93 26
 #' for the subsequent GMQLDataset method
94 27
 #' 
95 28
 #' @details
96 29
 #' Normally a GMQL dataset contains an XML schema file that contains
97
-#' name of column header. (e.g chr, start, stop, strand)
98
-#' The CustomParser read this XML schema; 
99
-#' if you already know what kind of schema your files are, use one of the 
100
-#' parser defined without reading any XML schema
30
+#' name of region attributes. (e.g chr, start, stop, strand)
31
+#' The CustomParser reads this XML schema; 
32
+#' if you already know what kind of schema your files have, use one of the 
33
+#' parsers defined, without reading any XML schema.
101 34
 #' 
102
-#' If GrangesList has no metadata: i.e. metadata() is empty, two metadata are
35
+#' If GRangesList has no metadata: i.e. metadata() is empty, two metadata are
103 36
 #' generated.
104 37
 #' \itemize{
105
-#' \item{"Provider" = "Polimi"}
106
-#' \item{"Application" = "RGMQL"}
38
+#' \item{"provider" = "PoliMi"}
39
+#' \item{"application" = "RGMQL"}
107 40
 #' }
108 41
 #'
109 42
 #' @examples
110 43
 #' 
111
-#' ## read local dataset with CustomParser
44
+#' ## Thi statement initializes and runs the GMQL server for local execution 
45
+#' ## and creation of results on disk. Then, with system.file() it defines 
46
+#' ## the path to the folders "DATASET" in the subdirectory "example" 
47
+#' ## of the package "RGMQL" and opens such folder as a GMQL dataset 
48
+#' ## named "data"
49
+#' 
112 50
 #' init_gmql()
113 51
 #' test_path <- system.file("example", "DATASET", package = "RGMQL")
114
-#' r = read_dataset(test_path)
52
+#' data = read_dataset(test_path)
115 53
 #' 
116
-#' \dontrun{
54
+#' ## This statement opens such folder as a GMQL dataset named "data" using 
55
+#' ## "NarrowPeakParser" 
56
+#' dataPeak = read_dataset(test_path,"NarrowPeakParser")
117 57
 #' 
118
-#' ## read local dataset with other Parser
119
-#' init_gmql()
120
-#' test_path <- system.file("example", "DATASET", package = "RGMQL")
121
-#' r = read_dataset(test_path,"ANNParser")
58
+#' ## This statement reads a remote public dataset stored into GMQL system 
59
+#' ## repository. For a public dataset in a (remote) GMQL repository the 
60
+#' ## prefix "public." is needed before dataset name
122 61
 #' 
123
-#' ## read remote public dataset stored into GMQL system repository 
124
-#' ## If public dataset a prefix "public." is needed before dataset name
125
-#' r2 = read_dataset("public.HG19_TCGA_dnaseq",is_local = FALSE)
62
+#' data1 = read_dataset("public.Example_Dataset1",is_local = FALSE)
126 63
 #' 
127
-#' }
128
-#' @name read
64
+#' @name read_dataset
129 65
 #' @rdname read-function
130 66
 #' @export
131 67
 #'
... ...
@@ -188,12 +124,11 @@ read_dataset <- function(dataset, parser = "CustomParser", is_local=TRUE,
188 124
 
189 125
 
190 126
 #' @importFrom S4Vectors metadata
191
-#' @importFrom rJava J
192
-#' @importFrom rJava .jarray
127
+#' @importFrom rJava J .jarray
193 128
 #' 
194
-#' @param samples GrangesList
129
+#' @param samples GRangesList
195 130
 #' 
196
-#' @name read
131
+#' @name read_dataset
197 132
 #' @rdname read-function
198 133
 #' @export
199 134
 #'
... ...
@@ -210,7 +145,7 @@ read <- function(samples)
210 145
         warning("GrangesList has no metadata.
211 146
 We provide two metadata for you")
212 147
         index_meta <- rep(1:len,each = len)
213
-        rep_meta <- rep(c("Provider","Polimi", "Application", "R-GMQL"),
148
+        rep_meta <- rep(c("provider","PoliMi", "application", "RGMQL"),
214 149
                             times=len)
215 150
         meta_matrix <- matrix(rep_meta,ncol = 2,byrow = TRUE)
216 151
         meta_matrix <- cbind(index_meta,meta_matrix)
... ...
@@ -275,37 +210,3 @@ We provide two metadata for you")
275 210
     parser
276 211
 }
277 212
 
278
-#' Disable or Enable remote processing
279
-#'
280
-#' It allows to enable or disable remote processing 
281
-#' 
282
-#' @details 
283
-#' The invocation of this function allow to change mode of processing.
284
-#' after invoking collect() is not possbile to switch the processing mode, 
285
-#' 
286
-#' @importFrom rJava J
287
-#' 
288
-#' @param is_remote logical value used in order to set the processing mode.
289
-#' TRUE you will set a remote query processing mode otherwise will be local,
290
-#' 
291
-#' @return None
292
-#' 
293
-#' @examples
294
-#' 
295
-#' # initialize with remote processing off
296
-#' init_gmql("tab",remote_processing = FALSE)
297
-#' 
298
-#' # change processing mode to remote
299
-#' remote_processing(TRUE)
300
-#'
301
-#' @export
302
-#' 
303
-remote_processing<-function(is_remote)
304
-{
305
-    WrappeR <- J("it/polimi/genomics/r/Wrapper")
306
-    .check_logical(is_remote)
307
-    response <- WrappeR$remote_processing(is_remote)
308
-    print(response)
309
-}
310
-
311
-
... ...
@@ -29,12 +29,13 @@ filter.GMQLDateset <- function(.data, m_predicate = NULL, r_predicate = NULL,
29 29
 
30 30
 #' Method filter
31 31
 #' 
32
-#' It creates a new dataset from an existing one by extracting a subset of 
33
-#' samples and/or regions from the input dataset according to their predicate.
34
-#' each sample in the output dataset has the same region attributes, 
32
+#' @description Wrapper to GMQL SELECT operator 
33
+#' @description It creates a new dataset from an existing one by extracting a 
34
+#' subset of samples and/or regions from the input dataset according to their 
35
+#' predicate. Each sample in the output dataset has the same region attributes, 
35 36
 #' values, and metadata as in the input dataset.
36 37
 #' When semijoin function is defined, it extracts those samples containing 
37
-#' all metadata attribute defined in semijoin clause with at least 
38
+#' all metadata attributes defined in semijoin clause with at least 
38 39
 #' one metadata value in common with semijoin dataset.
39 40
 #' If no metadata in common between input dataset and semijoin dataset, 
40 41
 #' no sample is extracted.
... ...
@@ -45,11 +46,11 @@ filter.GMQLDateset <- function(.data, m_predicate = NULL, r_predicate = NULL,
45 46
 #' @importFrom dplyr filter
46 47
 #' 
47 48
 #' @param .data GMQLDataset class object
48
-#' @param m_predicate logical predicate made up by R logical operation 
49
-#' on metadata attribute. 
49
+#' @param m_predicate logical predicate made up by R logical operations 
50
+#' on metadata attributes. 
50 51
 #' Only !, |, ||, &, && are admitted.
51
-#' @param r_predicate logical predicate made up by R logical operation 
52
-#' on schema region values. 
52
+#' @param r_predicate logical predicate made up by R logical operations 
53
+#' on region attributes. 
53 54
 #' Only !, |, ||, &, && are admitted.
54 55
 #' 
55 56
 #' @param semijoin \code{\link{semijoin}} function to define filter method 
... ...
@@ -61,38 +62,45 @@ filter.GMQLDateset <- function(.data, m_predicate = NULL, r_predicate = NULL,
61 62
 #' 
62 63
 #' @examples
63 64
 #' 
64
-#' ## It selects from input data samples of patients younger than 70 years old, 
65
-#' ## based on filtering on sample metadata attribute Patient_age
65
+#' ## This statement initializes and runs the GMQL server for local execution 
66
+#' ## and creation of results on disk. Then, with system.file() it defines 
67
+#' ## the path to the folders "DATASET" in the subdirectory "example" 
68
+#' ## of the package "RGMQL" and opens such folder as a GMQL dataset 
69
+#' ## named "data"
66 70
 #' 
67 71
 #' init_gmql()
68 72
 #' test_path <- system.file("example", "DATASET", package = "RGMQL")
69
-#' input <- read_dataset(test_path)
70
-#' s <- filter(input, Patient_age < 70)
73
+#' data <- read_dataset(test_path) 
71 74
 #' 
72
-#' \dontrun{
75
+#' ## This statement selects from input data samples of patients younger 
76
+#' ## than 70 years old, based on filtering on sample metadata attribute 
77
+#' ## 'patient_age'
73 78
 #' 
74
-#' ## It creates a new dataset called 'jun_tf' by selecting those samples and 
75
-#' ## their regions from the existing 'data' dataset such that:
79
+#' filter_data <- filter(data, patient_age < 70)
80
+#' 
81
+#' ## This statement defines the path to the folders "DATASET_GDM" in the 
82
+#' ## subdirectory "example" of the package "RGMQL" and opens such folder 
83
+#' ## as a GMQL dataset named "join_data"
84
+#' 
85
+#' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL")
86
+#' join_data <- read_dataset(test_path2) 
87
+#' 
88
+#' ## This statement creates a new dataset called 'jun_tf' by selecting those 
89
+#' ## samples and their regions from the existing 'data' dataset such that:
76 90
 #' ## Each output sample has a metadata attribute called antibody_target 
77 91
 #' ## with value JUN.
78 92
 #' ## Each output sample also has not a metadata attribute called "cell" 
79 93
 #' ## that has the same value of at least one of the values that a metadata 
80 94
 #' ## attribute equally called cell has in at least one sample 
81 95
 #' ## of the 'join_data' dataset.
82
-#' ## For each sample satisfying previous condition,only its regions that 
96
+#' ## For each sample satisfying previous conditions, only its regions that 
83 97
 #' ## have a region attribute called pValue with the associated value 
84 98
 #' ## less than 0.01 are conserved in output
85 99
 #' 
86
-#' 
87
-#' init_gmql()
88
-#' test_path <- system.file("example", "DATASET", package = "RGMQL")
89
-#' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL")
90
-#' data <- read_dataset(test_path)
91
-#' join_data <- read_dataset(test_path2)
92 100
 #' jun_tf <- filter(data, antibody_target == "JUN", pValue < 0.01, 
93 101
 #' semijoin(join_data, TRUE, list(DF("cell"))))
94 102
 #' 
95
-#' }
103
+#' 
96 104
 #' @name filter
97 105
 #' @rdname filter
98 106
 #' @aliases filter,GMQLDataset-method
... ...
@@ -124,30 +132,45 @@ gmql_select <- function(input_data, predicate, region_predicate, s_join)
124 132
         
125 133
 }
126 134
 
127
-#' Semijoin Condtion
135
+#' Semijoin condtion
128 136
 #' 
129
-#' This function is use as support to filter method to define 
137
+#' This function is used as support to the filter method to define 
130 138
 #' semijoin conditions on metadata  
131 139
 #' 
132 140
 #' @param data GMQLDataset class object
133 141
 #' 
134
-#' @param not_in logical value: T => semijoin is perfomed 
135
-#' considering semi_join NOT IN semi_join_dataset, F => semijoin is performed 
136
-#' considering semi_join IN semi_join_dataset
142
+#' @param not_in logical value: TRUE => for a given sample of input dataset
143
+#' ".data" in \code{\link{filter}} method if and only if there exists at 
144
+#' least one sample in dataset 'data' with metadata attributes defined 
145
+#' in groupBy and these attributes of 'data' have at least one value in 
146
+#' common with the same attributes defined in one sample of '.data'
147
+#' FALSE => semijoin condition is evaluated accordingly.
137 148
 #' 
138
-#' @param groupBy it define condition evaluation on metadata.
149
+#' @param groupBy list of evalation functions to define evaluation on metadata:
139 150
 #' \itemize{
140
-#' \item{\code{\link{FN}}: Fullname evaluation, two attributes match 
141
-#' if they both end with value and, if they have a further prefixes,
142
-#' the two prefix sequence are identical}
143
-#' \item{\code{\link{EX}}: Exact evaluation, only attributes exactly 
144
-#' as value will match; no further prefixes are allowed. }
145
-#' \item{\code{\link{DF}}: Default evaluation, the two attributes match 
146
-#' if both end with value.}
151
+#' \item{ \code{\link{FN}}(value): Fullname evaluation, two attributes match 
152
+#' if they both end with \emph{value} and, if they have further prefixes,
153
+#' the two prefix sequence are identical.}
154
+#' \item{ \code{\link{EX}}(value): Exact evaluation, only attributes exactly 
155
+#' as \emph{value} match; no further prefixes are allowed.}
156
+#' \item{ \code{\link{DF}}(value): Default evaluation, the two attributes match 
157
+#' if both end with \emph{value}.}
147 158
 #' }
148 159
 #' 
149 160
 #' @examples
150 161
 #' 
162
+#' ## These statements initializes and runs the GMQL server for local execution 
163
+#' ## and creation of results on disk. Then, with system.file() it defines 
164
+#' ## the path to the folders "DATASET" and "DATASET_GDM" in the subdirectory 
165
+#' ## "example" of the package "RGMQL" and opens such folders as a GMQL dataset 
166
+#' ## named "data" and "join_data" respectively
167
+#' 
168
+#' init_gmql()
169
+#' test_path <- system.file("example", "DATASET", package = "RGMQL")
170
+#' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL")
171
+#' data <- read_dataset(test_path)
172
+#' join_data <-  read_dataset(test_path2)
173
+#' 
151 174
 #' # It creates a new dataset called 'jun_tf' by selecting those samples and 
152 175
 #' # their regions from the existing 'data' dataset such that:
153 176
 #' # Each output sample has a metadata attribute called antibody_target 
... ...
@@ -156,21 +179,16 @@ gmql_select <- function(input_data, predicate, region_predicate, s_join)
156 179
 #' # that has the same value of at least one of the values that a metadata 
157 180
 #' # attribute equally called cell has in at least one sample 
158 181
 #' # of the 'join_data' dataset.
159
-#' # For each sample satisfying previous condition,only its regions that 
182
+#' # For each sample satisfying previous conditions, only its regions that 
160 183
 #' # have a region attribute called pValue with the associated value 
161 184
 #' # less than 0.01 are conserved in output
162 185
 #' 
163
-#' 
164
-#' init_gmql()
165
-#' test_path <- system.file("example", "DATASET", package = "RGMQL")
166
-#' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL")
167
-#' data <- read_dataset(test_path)
168
-#' join_data <-  read_dataset(test_path2)
169
-#' jun_tf <- filter(data,NULL,NULL, semijoin(join_data, TRUE, 
170
-#' list(DF("cell"))))
186
+#' jun_tf <- filter(data, antibody_target == "JUN", pValue < 0.01, 
187
+#' semijoin(join_data, TRUE, list(DF("cell"))))
171 188
 #' 
172 189
 #' @return semijoin condition as list
173 190
 #' @export
191
+#' 
174 192
 semijoin <- function(data, not_in = FALSE, groupBy = NULL)
175 193
 {
176 194
     if(!is.list(groupBy))
... ...
@@ -1,25 +1,21 @@
1 1
 #' Method union
2 2
 #' 
3
-#' @description Wrapper to GMQL union function
3
+#' @description Wrapper to GMQL UNION operator
4 4
 #' 
5 5
 #' @description It is used to integrate homogeneous or heterogeneous samples 
6 6
 #' of two datasets within a single dataset; for each sample of either input 
7 7
 #' dataset, a result sample is created as follows:
8 8
 #' \itemize{
9 9
 #' \item {Metadata are the same as in the original sample.}
10
-#' \item {Resulting schema is obtained by projecting the schema 
11
-#' of the right dataset over the schema of the left one
12
-#' (more properly, it will be performed by adding to the schema of the 
13
-#' left dataset the region attributes of the right dataset which are not 
14
-#' identical to those of the left dataset)}
10
+#' \item {Resulting schema is is the schema of the left input dataset. }
15 11
 #' \item {Regions are the same (in coordinates and attribute values) 
16
-#' as in the original sample.
12
+#' as in the original sample if it is from the left input dataset; 
13
+#' if it is from the right input dataset, its regions are the same in 
14
+#' coordinates, but only region attributes identical (in name and type) to 
15
+#' those of the first input dataset are retained, with the same values.
17 16
 #' Region attributes which are missing in an input dataset sample 
18 17
 #' w.r.t. the merged schema are set to null.}
19 18
 #' }
20
-#' For what concerns metadata, attributes of samples from the left (right) 
21
-#' input dataset are prefixed with the strings LEFT (RIGHT), so as to trace 
22
-#' the dataset to which they originally belonged.
23 19
 #' 
24 20
 #' @importFrom rJava J
25 21
 #' @importFrom BiocGenerics union
... ...
@@ -32,10 +28,11 @@
32 28
 #'
33 29
 #' @examples
34 30
 #' 
35
-#' ## It creates a dataset called full which contains all samples from the 
36
-#' ## datasets data1 and data2 whose schema is defined by merging the two 
37
-#' ## dataset schemas.
38
-#' ## (union of all the attributes present in the two input datasets).
31
+#' ## Thi statement initializes and runs the GMQL server for local execution 
32
+#' ## and creation of results on disk. Then, with system.file() it defines 
33
+#' ## the path to the folders "DATASET" and "DATASET_GDM" in the subdirectory 
34
+#' ## "example" of the package "RGMQL" and opens such folder as a GMQL 
35
+#' ## dataset named "data1" and "data2" respectively using customParser
39 36
 #' 
40 37
 #' init_gmql()
41 38
 #' test_path <- system.file("example", "DATASET", package = "RGMQL")
... ...
@@ -43,6 +40,9 @@
43 40
 #' data1 <- read_dataset(test_path)
44 41
 #' data2 <- read_dataset(test_path2)
45 42
 #' 
43
+#' ## This statement creates a dataset called 'full' which contains all samples 
44
+#' ## from the datasets 'data1' and 'data2'
45
+#' 
46 46
 #' res <- union(data1, data2)
47 47
 #' 
48 48
 #' 
... ...
@@ -13,7 +13,7 @@ if(getRversion() >= "3.1.0")
13 13
 #' Login to GMQL
14 14
 #'
15 15
 #' Login to GMQL REST services suite as a registered user, specifying username 
16
-#' and password, or as guest using the proper GMQL web service available 
16
+#' and password, or as guest, using the proper GMQL web service available 
17 17
 #' on a remote server
18 18
 #' 
19 19
 #' @import httr
... ...
@@ -25,19 +25,20 @@ if(getRversion() >= "3.1.0")
25 25
 #' @param password string password used during signup
26 26
 #'
27 27
 #' @details
28
-#' if both username and password are NULL you will be logged as guest
28
+#' If both username and password are NULL you will be logged as guest.
29 29
 #' After login you will receive an authentication token.
30 30
 #' As token remains vaild on server (until the next login / registration)