Simone authored on 02/01/2018 22:01:09
Showing47 changed files

... ...
@@ -7,6 +7,7 @@ export(AVG)
7 7
 export(BAG)
8 8
 export(BAGD)
9 9
 export(COUNT)
10
+export(COUNTSAMP)
10 11
 export(DESC)
11 12
 export(DG)
12 13
 export(DGE)
... ...
@@ -39,8 +40,8 @@ export(import_gmql)
39 40
 export(init_gmql)
40 41
 export(login_gmql)
41 42
 export(logout_gmql)
42
-export(read)
43
-export(read_dataset)
43
+export(read_GMQL)
44
+export(read_GRangesList)
44 45
 export(remote_processing)
45 46
 export(run_query)
46 47
 export(run_query_fromfile)
... ...
@@ -73,6 +74,7 @@ exportMethods(setdiff)
73 74
 exportMethods(take)
74 75
 exportMethods(union)
75 76
 import(GenomicRanges)
77
+import(RGMQLScalaLib)
76 78
 import(httr)
77 79
 import(xml2)
78 80
 importClassesFrom(GenomicRanges,GRangesList)
... ...
@@ -96,6 +98,7 @@ importFrom(methods,is)
96 98
 importFrom(methods,isClass)
97 99
 importFrom(methods,new)
98 100
 importFrom(plyr,revalue)
101
+importFrom(rJava,.jaddClassPath)
99 102
 importFrom(rJava,.jarray)
100 103
 importFrom(rJava,.jevalArray)
101 104
 importFrom(rJava,.jinit)
... ...
@@ -112,7 +112,7 @@ take_value.META_AGGREGATES <- function(obj){
112 112
 #' 
113 113
 #' init_gmql()
114 114
 #' test_path <- system.file("example", "DATASET", package = "RGMQL")
115
-#' exp = read_dataset(test_path)
115
+#' exp = read_GMQL(test_path)
116 116
 #' 
117 117
 #' ## This statement copies all samples of exp dataset into res dataset, and 
118 118
 #' ## then calculates new metadata attribute for each of them: 
... ...
@@ -137,8 +137,7 @@ take_value.META_AGGREGATES <- function(obj){
137 137
 #' ## attribute the average signal of the overlapping regions; 
138 138
 #' ## the result has one sample for each input cell.
139 139
 #' 
140
-#' res = cover(exp, 2, 3, groupBy = list(DF("cell")), 
141
-#' avg_signal = AVG("signal") )
140
+#' res = cover(exp, 2, 3, groupBy = conds("cell"), avg_signal = AVG("signal"))
142 141
 #' 
143 142
 #' ## This statement copies all samples of DATA dataset into OUT dataset, 
144 143
 #' ## and then for each of them it adds another metadata attribute, allScores, 
... ...
@@ -42,7 +42,7 @@ print.PARAMETER <- function(obj){
42 42
 #' 
43 43
 #' init_gmql()
44 44
 #' test_path <- system.file("example", "DATASET", package = "RGMQL")
45
-#' exp = read_dataset(test_path)
45
+#' exp = read_GMQL(test_path)
46 46
 #' 
47 47
 #' ## The following statement produces an output dataset with a single 
48 48
 #' ## output sample. The COVER operation considers all areas defined by 
... ...
@@ -85,8 +85,8 @@ check.DISTAL <- function(value)
85 85
 #' init_gmql()
86 86
 #' test_path <- system.file("example", "DATASET", package = "RGMQL")
87 87
 #' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL")
88
-#' TSS = read_dataset(test_path)
89
-#' HM = read_dataset(test_path2)
88
+#' TSS = read_GMQL(test_path)
89
+#' HM = read_GMQL(test_path2)
90 90
 #' 
91 91
 #' ## Given a dataset HM and one called TSS with a sample including 
92 92
 #' ## Transcription Start Site annotations, it searches for those regions of HM 
... ...
@@ -96,7 +96,7 @@ check.DISTAL <- function(value)
96 96
 #' ## obtained from the same provider (joinby clause).
97 97
 #' 
98 98
 #' join_data = merge(TSS, HM, 
99
-#' genometric_predicate = list(MD(1), DL(1200)), DF("provider"), 
99
+#' genometric_predicate = list(MD(1), DL(1200)), conds("provider"), 
100 100
 #' region_output = "RIGHT")
101 101
 #'
102 102
 #' ## Given a dataset 'HM' and one called 'TSS' with a sample including 
... ...
@@ -107,8 +107,8 @@ check.DISTAL <- function(value)
107 107
 #' ## from the same provider (joinby clause).
108 108
 #' 
109 109
 #' join_data = merge(TSS, HM, 
110
-#' genometric_predicate = list(MD(1), DGE(12000), DOWN()), 
111
-#' DF("provider"), region_output = "RIGHT")
110
+#' genometric_predicate = list(MD(1), DGE(12000), DOWN()), conds("provider"), 
111
+#' region_output = "RIGHT")
112 112
 #'
113 113
 #' @name DISTAL-Object
114 114
 #' @aliases DL
... ...
@@ -63,7 +63,7 @@ as.character.OPERATOR <- function(obj) {
63 63
 #' 
64 64
 #' init_gmql()
65 65
 #' test_path <- system.file("example", "DATASET", package = "RGMQL")
66
-#' exp = read_dataset(test_path)
66
+#' exp = read_GMQL(test_path)
67 67
 #' 
68 68
 #' ## This statement allows to select, in all input sample, all those regions 
69 69
 #' ## for which the region attribute score has a value which is greater 
... ...
@@ -26,9 +26,6 @@
26 26
 #' @return list of 2-D array containing method of evaluation and metadata 
27 27
 #' attribute name
28 28
 #' 
29
-#' @examples
30
-#' 
31
-#' "Where is my example?"
32 29
 #' 
33 30
 #' @name Evaluation-Function
34 31
 #' @aliases condition_evaluation
... ...
@@ -98,7 +98,7 @@
98 98
 #' 
99 99
 #' init_gmql()
100 100
 #' test_path <- system.file("example","DATASET",package = "RGMQL")
101
-#' exp = read_dataset(test_path)
101
+#' exp = read_GMQL(test_path)
102 102
 #'   
103 103
 #' ## the following statement produces an output dataset with a single output 
104 104
 #' ## sample. The COVER operation considers all areas defined by a minimum 
... ...
@@ -123,7 +123,7 @@
123 123
 #' @aliases cover-method
124 124
 #' @export
125 125
 setMethod("cover", "GMQLDataset",
126
-            function(.data, min_acc, max_acc, groupBy = NULL, 
126
+            function(.data, min_acc, max_acc, groupBy = conds(), 
127 127
                     variation = "cover", ...)
128 128
             {
129 129
                 val <- value(.data)
... ...
@@ -40,8 +40,8 @@
40 40
 #' init_gmql()
41 41
 #' test_path <- system.file("example", "DATASET", package = "RGMQL")
42 42
 #' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL")
43
-#' data1 = read_dataset(test_path)
44
-#' data2 = read_dataset(test_path2)
43
+#' data1 = read_GMQL(test_path)
44
+#' data2 = read_GMQL(test_path2)
45 45
 #' 
46 46
 #' ## This GMQL statement returns all the regions in the first dataset 
47 47
 #' ## that do not overlap any region in the second dataset.
... ...
@@ -61,7 +61,7 @@
61 61
 #' @aliases setdiff-method
62 62
 #' @export
63 63
 setMethod("setdiff", c("GMQLDataset","GMQLDataset"),
64
-            function(x, y, joinBy = NULL, is_exact = FALSE)
64
+            function(x, y, joinBy = conds(), is_exact = FALSE)
65 65
             {
66 66
                 ptr_data_x = value(x)
67 67
                 ptr_data_y = value(y)
... ...
@@ -38,7 +38,7 @@
38 38
 #' 
39 39
 #' init_gmql()
40 40
 #' test_path <- system.file("example", "DATASET", package = "RGMQL")
41
-#' data <- read_dataset(test_path)
41
+#' data <- read_GMQL(test_path)
42 42
 #' 
43 43
 #' ## This statement counts the regions in each sample and stores their number 
44 44
 #' ## as value of the new metadata attribute RegionCount of the sample.
... ...
@@ -1,5 +1,5 @@
1
-group_by.GMQLDateset <- function(.data, groupBy_meta = NULL, 
2
-    groupBy_regions = NULL, region_aggregates = NULL, meta_aggregates = NULL)
1
+group_by.GMQLDateset <- function(.data, groupBy_meta = conds(), 
2
+    groupBy_regions = c(""), region_aggregates = NULL, meta_aggregates = NULL)
3 3
 {
4 4
     ptr_data = value(.data)
5 5
     gmql_group(ptr_data, groupBy_meta, groupBy_regions, region_aggregates, 
... ...
@@ -18,7 +18,7 @@ group_by.GMQLDateset <- function(.data, groupBy_meta = NULL,
18 18
 #' groupBy or JoinBy input parameter
19 19
 #' 
20 20
 #' @param groupBy_regions vector of string made up by schema field attribute
21
-#' @param region_aggregates It accept a series of aggregate function on 
21
+#' @param region_aggregates It accept a list of aggregate function on 
22 22
 #' region attribute. 
23 23
 #' All the element in the form \emph{key} = \emph{aggregate}.
24 24
 #' The \emph{aggregate} is an object of class AGGREGATES
... ...
@@ -36,7 +36,7 @@ group_by.GMQLDateset <- function(.data, groupBy_meta = NULL,
36 36
 #' \item list of values: e.g. SUM("pvalue")
37 37
 #' }
38 38
 #' "mixed style" is not allowed
39
-#' @param meta_aggregates It accept a series of aggregate function on 
39
+#' @param meta_aggregates It accept a list of aggregate function on 
40 40
 #' metadata attribute.
41 41
 #' All the element in the form \emph{key} = \emph{aggregate}.
42 42
 #' The \emph{aggregate} is an object of class AGGREGATES
... ...
@@ -61,6 +61,57 @@ group_by.GMQLDateset <- function(.data, groupBy_meta = NULL,
61 61
 #'
62 62
 #' @examples
63 63
 #' 
64
+#' ## This statement initializes and runs the GMQL server for local execution 
65
+#' ## and creation of results on disk. Then, with system.file() it defines 
66
+#' ## the path to the folder "DATASET" in the subdirectory "example"
67
+#' ## of the package "RGMQL" and opens such file as a GMQL dataset named "exp" 
68
+#' ## using customParser
69
+#'
70
+#' init_gmql()
71
+#' test_path <- system.file("example","DATASET",package = "RGMQL")
72
+#' exp = read_GMQL(test_path)
73
+#' 
74
+#' ## This GMQL statement groups samples of the input 'exp' dataset according to 
75
+#' ## their value of the metadata attribute 'tumor_type' and computes the 
76
+#' ## maximum value that the metadata attribute size takes inside the samples 
77
+#' ## belonging to each group. The samples in the output GROUPS_T dataset 
78
+#' ## have a new _group metadata attribute which indicates which group they 
79
+#' ## belong to, based on the grouping on the metadata attribute tumor_type. 
80
+#' ## In addition, they present the new metadata aggregate attribute MaxSize. 
81
+#' ## Note that the samples without metadata attribute tumor_type are assigned 
82
+#' ## to a single group with _group value equal 0
83
+#' 
84
+#' GROUPS_T = group_by(exp, conds("tumor_type"), 
85
+#' meta_aggregates = list(max_size = MAX("size")))
86
+#' 
87
+#' ## This GMQL statement takes as input dataset the same input dataset as 
88
+#' ## the previous example. Yet, it calculates new _group values based on the 
89
+#' ## grouping attribute 'cell', and adds the metadata aggregate attribute 
90
+#' ## 'n_samp', which counts the number of samples belonging to the respective 
91
+#' ## group. It has the following output GROUPS_C dataset samples 
92
+#' ## (note that now no sample has metadata attribute _group with value equal 0 
93
+#' ## since all input samples include the metadata attribute cell, 
94
+#' ## with different values, on which the new grouping is based)
95
+#' 
96
+#' GROUPS_C = group_by(exp, conds("cell"),
97
+#' meta_aggregates = list(n_samp AS COUNTSAMP()))
98
+#' 
99
+#' ## This GMQL statement groups the regions of each 'exp' dataset sample by 
100
+#' ## region coordinates chr, left, right, strand  (these are implicitly 
101
+#' ## considered) and the additional region attribute score (which is explicitly 
102
+#' ## specified), and keeps only one region for each group. 
103
+#' ## In the output GROUPS dataset schema, the new region attributes 
104
+#' ## avg_pvalue and max_qvalue are added, respectively computed as the 
105
+#' ## average of the values taken by the pvalue and the maximum of the values 
106
+#' ## taken by the qvalue region attributes in the regions grouped together, 
107
+#' ## and the computed value is assigned to each region of each output sample. 
108
+#' ## Note that the region attributes which are not coordinates or score are 
109
+#' ## discarded.
110
+#' 
111
+#' GROUPS = group_by(exp, group_reg = "score", 
112
+#' region_aggregates = list(avg_pvalue = AVG("pvalue"), 
113
+#' max_qvalue = MAX("qvalue")))
114
+#' 
64 115
 #' @name group_by
65 116
 #' @rdname group_by
66 117
 #' @aliases group_by,GMQLDataset-method
... ...
@@ -73,8 +73,8 @@
73 73
 #' init_gmql()
74 74
 #' test_path <- system.file("example", "DATASET", package = "RGMQL")
75 75
 #' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL")
76
-#' TSS = read_dataset(test_path)
77
-#' HM = read_dataset(test_path2)
76
+#' TSS = read_GMQL(test_path)
77
+#' HM = read_GMQL(test_path2)
78 78
 #' 
79 79
 #' ## Given a dataset 'HM' and one called 'TSS' with a sample including 
80 80
 #' ## Transcription Start Site annotations, it searches for those regions of HM 
... ...
@@ -83,10 +83,8 @@
83 83
 #' ## is lesser than 120K bases and joined 'tss' and 'hm' samples are obtained 
84 84
 #' ## from the same provider (joinby clause).
85 85
 #' 
86
-#' 
87
-#' join_data = merge(TSS, HM, 
88
-#' genometric_predicate = list(MD(1), DLE(120000)), DF("provider"), 
89
-#' region_output = "RIGHT")
86
+#' join_data = merge(TSS, HM, genometric_predicate = list(MD(1), DLE(120000)), 
87
+#' conds("provider"), region_output = "RIGHT")
90 88
 #' 
91 89
 #' 
92 90
 #' @name merge
... ...
@@ -95,7 +93,7 @@
95 93
 #' @export
96 94
 setMethod("merge", c("GMQLDataset","GMQLDataset"),
97 95
                 function(x, y, genometric_predicate = NULL, 
98
-                    region_output = "CAT", joinBy = NULL, reg_attr = NULL)
96
+                    region_output = "CAT", joinBy = conds(), reg_attr = c(""))
99 97
                 {
100 98
                     ptr_data_x <- value(x)
101 99
                     ptr_data_y <- value(y)
... ...
@@ -150,8 +148,8 @@ gmql_join <- function(left_data, right_data, genometric_predicate, joinBy,
150 148
         
151 149
         if(!length(reg_attributes))
152 150
             reg_attributes <- .jnull("java/lang/String")
153
-        
154
-        reg_attributes <- .jarray(reg_attributes)
151
+        else
152
+            reg_attributes <- .jarray(reg_attributes, dispatch = TRUE)
155 153
     }
156 154
     else
157 155
         reg_attributes <- .jnull("java/lang/String")
... ...
@@ -58,8 +58,8 @@
58 58
 #' init_gmql()
59 59
 #' test_path <- system.file("example", "DATASET", package = "RGMQL")
60 60
 #' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL")
61
-#' exp = read_dataset(test_path)
62
-#' ref = read_dataset(test_path2)
61
+#' exp = read_GMQL(test_path)
62
+#' ref = read_GMQL(test_path2)
63 63
 #' 
64 64
 #' # It counts the number of regions in each sample from exp that overlap with 
65 65
 #' # a ref region, and for each ref region it computes the minimum score 
... ...
@@ -77,7 +77,7 @@
77 77
 #' @aliases map-method
78 78
 #' @export
79 79
 setMethod("map", "GMQLDataset",
80
-            function(x, y, ..., joinBy = NULL, count_name = "")
80
+            function(x, y, ..., joinBy = conds(), count_name = "")
81 81
             {
82 82
                 left_data <- value(x)
83 83
                 right_data <- value(y)
... ...
@@ -16,7 +16,7 @@
16 16
 #' 
17 17
 #' init_gmql()
18 18
 #' test_path <- system.file("example","DATASET",package = "RGMQL")
19
-#' data = read_dataset(test_path)
19
+#' data = read_GMQL(test_path)
20 20
 #' 
21 21
 #' ## The following statement materialize the dataset, previoulsy read, at 
22 22
 #' ## th specific destination path into local folder "ds1" opportunely created
... ...
@@ -116,7 +116,7 @@ collect.GMQLDataset <- function(x, dir_out = getwd(), name = "ds1")
116 116
 #'
117 117
 #' init_gmql()
118 118
 #' test_path <- system.file("example", "DATASET", package = "RGMQL")
119
-#' data = read_dataset(test_path)
119
+#' data = read_GMQL(test_path)
120 120
 #' 
121 121
 #' ## The following statement materialize the dataset, previoulsy read, at 
122 122
 #' ## th specific destination path into local folder "ds1" opportunely created
... ...
@@ -188,9 +188,9 @@ gmql_materialize <- function(input_data, dir_out, name)
188 188
 #' 
189 189
 #' init_gmql()
190 190
 #' test_path <- system.file("example", "DATASET", package = "RGMQL")
191
-#' rd = read_dataset(test_path)
191
+#' rd = read_GMQL(test_path)
192 192
 #' 
193
-#' aggr = aggregate(rd, list(DF("antibody_target", "cell_karyotype")))
193
+#' aggr = aggregate(rd, conds(c("antibody_target", "cell_karyotype")))
194 194
 #' taken <- take(aggr, rows = 45)
195 195
 #' 
196 196
 #' @name take
... ...
@@ -31,7 +31,7 @@
31 31
 #'
32 32
 #' init_gmql()
33 33
 #' test_path <- system.file("example","DATASET",package = "RGMQL")
34
-#' exp = read_dataset(test_path)
34
+#' exp = read_GMQL(test_path)
35 35
 #'
36 36
 #' ## This statement creates a dataset called merged which contains one 
37 37
 #' ## sample for each antibody_target and cell value found within the metadata 
... ...
@@ -49,7 +49,7 @@
49 49
 #' @export
50 50
 #' 
51 51
 setMethod("aggregate", "GMQLDataset",
52
-            function(x, groupBy = NULL)
52
+            function(x, groupBy = conds())
53 53
             {
54 54
                 ptr_data = value(x)
55 55
                 gmql_merge(ptr_data, groupBy)
... ...
@@ -1,6 +1,6 @@
1 1
 arrange.GMQLDataset <- function(.data, metadata_ordering = NULL, 
2
-        regions_ordering = NULL, fetch_opt = NULL, num_fetch = 0L, 
3
-        reg_fetch_opt = NULL, reg_num_fetch = 0L)
2
+        regions_ordering = NULL, fetch_opt = "", num_fetch = 0L, 
3
+        reg_fetch_opt = "", reg_num_fetch = 0L)
4 4
 {
5 5
     ptr_data <- value(.data)
6 6
     gmql_order(ptr_data, metadata_ordering, regions_ordering, 
... ...
@@ -70,7 +70,7 @@ arrange.GMQLDataset <- function(.data, metadata_ordering = NULL,
70 70
 #' 
71 71
 #' init_gmql()
72 72
 #' test_path <- system.file("example", "DATASET", package = "RGMQL")
73
-#' data = read_dataset(test_path)
73
+#' data = read_GMQL(test_path)
74 74
 #' 
75 75
 #' ## The following statement orders the samples according to the Region_Count 
76 76
 #' ## metadata attribute and takes the two samples that have the highest count. 
... ...
@@ -88,7 +88,7 @@ setMethod("arrange", "GMQLDataset", arrange.GMQLDataset)
88 88
 gmql_order <- function(input_data, metadata_ordering, regions_ordering,
89 89
                     fetch_opt, num_fetch, reg_fetch_opt, reg_num_fetch)
90 90
 {
91
-    if(!is.null(fetch_opt))
91
+    if(!is.null(fetch_opt) && !identical(fetch_opt,""))
92 92
         fetch_opt <- .check_option(fetch_opt)
93 93
     else
94 94
         fetch_opt <- .jnull("java/lang/String")
... ...
@@ -103,7 +103,7 @@ gmql_order <- function(input_data, metadata_ordering, regions_ordering,
103 103
     else
104 104
         reg_num_fetch <- 0L
105 105
     
106
-    if(!is.null(reg_fetch_opt))
106
+    if(!is.null(reg_fetch_opt) && !identical(reg_fetch_opt,""))
107 107
         reg_fetch_opt <- .check_option(reg_fetch_opt)
108 108
     else
109 109
         reg_fetch_opt <- .jnull("java/lang/String")
... ...
@@ -85,7 +85,7 @@ select.GMQLDataset <- function(.data, metadata = NULL, metadata_update = NULL,
85 85
 #' 
86 86
 #' init_gmql()
87 87
 #' test_path <- system.file("example", "DATASET", package = "RGMQL")
88
-#' data = read_dataset(test_path)
88
+#' data = read_GMQL(test_path)
89 89
 #' 
90 90
 #' ## It creates a new dataset called CTCF_NORM_SCORE by preserving all 
91 91
 #' ## region attributes apart from score, and creating a new region attribute 
... ...
@@ -49,24 +49,26 @@
49 49
 #' 
50 50
 #' init_gmql()
51 51
 #' test_path <- system.file("example", "DATASET", package = "RGMQL")
52
-#' data = read_dataset(test_path)
52
+#' data = read_GMQL(test_path)
53 53
 #' 
54 54
 #' ## This statement opens such folder as a GMQL dataset named "data" using 
55 55
 #' ## "NarrowPeakParser" 
56
-#' dataPeak = read_dataset(test_path,"NarrowPeakParser")
56
+#' dataPeak = read_GMQL(test_path,"NarrowPeakParser")
57 57
 #' 
58 58
 #' ## This statement reads a remote public dataset stored into GMQL system 
59 59
 #' ## repository. For a public dataset in a (remote) GMQL repository the 
60 60
 #' ## prefix "public." is needed before dataset name
61 61
 #' 
62
-#' data1 = read_dataset("public.Example_Dataset1",is_local = FALSE)
62
+#' remote_url = "http://genomic.deib.polimi.it/gmql-rest-r/"
63
+#' login_gmql(remote_url)
64
+#' data1 = read_GMQL("public.Example_Dataset1",is_local = FALSE)
63 65
 #' 
64
-#' @name read_dataset
66
+#' @name read_GMQL
65 67
 #' @rdname read-function
66 68
 #' @export
67 69
 #'
68
-read_dataset <- function(dataset, parser = "CustomParser", is_local=TRUE, 
69
-                            is_GMQL=TRUE)
70
+read_GMQL <- function(dataset, parser = "CustomParser", is_local = TRUE, 
71
+                            is_GMQL = TRUE)
70 72
 {
71 73
     .check_input(dataset)
72 74
     .check_logical(is_local)
... ...
@@ -128,11 +130,11 @@ read_dataset <- function(dataset, parser = "CustomParser", is_local=TRUE,
128 130
 #' 
129 131
 #' @param samples GRangesList
130 132
 #' 
131
-#' @name read_dataset
133
+#' @name read_GMQL
132 134
 #' @rdname read-function
133 135
 #' @export
134 136
 #'
135
-read <- function(samples)
137
+read_GRangesList <- function(samples)
136 138
 {
137 139
     if(!is(samples,"GRangesList"))
138 140
         stop("only GrangesList")
... ...
@@ -70,7 +70,7 @@ filter.GMQLDateset <- function(.data, m_predicate = NULL, r_predicate = NULL,
70 70
 #' 
71 71
 #' init_gmql()
72 72
 #' test_path <- system.file("example", "DATASET", package = "RGMQL")
73
-#' data <- read_dataset(test_path) 
73
+#' data <- read_GMQL(test_path) 
74 74
 #' 
75 75
 #' ## This statement selects from input data samples of patients younger 
76 76
 #' ## than 70 years old, based on filtering on sample metadata attribute 
... ...
@@ -83,7 +83,7 @@ filter.GMQLDateset <- function(.data, m_predicate = NULL, r_predicate = NULL,
83 83
 #' ## as a GMQL dataset named "join_data"
84 84
 #' 
85 85
 #' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL")
86
-#' join_data <- read_dataset(test_path2) 
86
+#' join_data <- read_GMQL(test_path2) 
87 87
 #' 
88 88
 #' ## This statement creates a new dataset called 'jun_tf' by selecting those 
89 89
 #' ## samples and their regions from the existing 'data' dataset such that:
... ...
@@ -94,10 +94,10 @@ filter.GMQLDateset <- function(.data, m_predicate = NULL, r_predicate = NULL,
94 94
 #' ## attribute equally called cell has in at least one sample 
95 95
 #' ## of the 'join_data' dataset.
96 96
 #' ## For each sample satisfying previous conditions, only its regions that 
97
-#' ## have a region attribute called pValue with the associated value 
97
+#' ## have a region attribute called 'pvalue' with the associated value 
98 98
 #' ## less than 0.01 are conserved in output
99 99
 #' 
100
-#' jun_tf <- filter(data, antibody_target == "JUN", pValue < 0.01, 
100
+#' jun_tf <- filter(data, antibody_target == "JUN", pvalue < 0.01, 
101 101
 #' semijoin(join_data, TRUE, conds("cell")))
102 102
 #' 
103 103
 #' 
... ...
@@ -147,16 +147,8 @@ gmql_select <- function(input_data, predicate, region_predicate, s_join)
147 147
 #' common with the same attributes defined in one sample of '.data'
148 148
 #' FALSE => semijoin condition is evaluated accordingly.
149 149
 #' 
150
-#' @param groupBy list of evalation functions to define evaluation on metadata:
151
-#' \itemize{
152
-#' \item{ \code{\link{FN}}(value): Fullname evaluation, two attributes match 
153
-#' if they both end with \emph{value} and, if they have further prefixes,
154
-#' the two prefix sequence are identical.}
155
-#' \item{ \code{\link{EX}}(value): Exact evaluation, only attributes exactly 
156
-#' as \emph{value} match; no further prefixes are allowed.}
157
-#' \item{ \code{\link{DF}}(value): Default evaluation, the two attributes match 
158
-#' if both end with \emph{value}.}
159
-#' }
150
+#' @param groupBy \code{\link{condition_evaluation}} function to support 
151
+#' methods with groupBy or JoinBy input paramter
160 152
 #' 
161 153
 #' @examples
162 154
 #' 
... ...
@@ -169,8 +161,8 @@ gmql_select <- function(input_data, predicate, region_predicate, s_join)
169 161
 #' init_gmql()
170 162
 #' test_path <- system.file("example", "DATASET", package = "RGMQL")
171 163
 #' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL")
172
-#' data <- read_dataset(test_path)
173
-#' join_data <-  read_dataset(test_path2)
164
+#' data <- read_GMQL(test_path)
165
+#' join_data <-  read_GMQL(test_path2)
174 166
 #' 
175 167
 #' # It creates a new dataset called 'jun_tf' by selecting those samples and 
176 168
 #' # their regions from the existing 'data' dataset such that:
... ...
@@ -184,7 +176,7 @@ gmql_select <- function(input_data, predicate, region_predicate, s_join)
184 176
 #' # have a region attribute called pValue with the associated value 
185 177
 #' # less than 0.01 are conserved in output
186 178
 #' 
187
-#' jun_tf <- filter(data, antibody_target == "JUN", pValue < 0.01, 
179
+#' jun_tf <- filter(data, antibody_target == "JUN", pvalue < 0.01, 
188 180
 #' semijoin(join_data, TRUE, conds("cell")))
189 181
 #' 
190 182
 #' @return semijoin condition as list
... ...
@@ -37,8 +37,8 @@
37 37
 #' init_gmql()
38 38
 #' test_path <- system.file("example", "DATASET", package = "RGMQL")
39 39
 #' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL")
40
-#' data1 <- read_dataset(test_path)
41
-#' data2 <- read_dataset(test_path2)
40
+#' data1 <- read_GMQL(test_path)
41
+#' data2 <- read_GMQL(test_path2)
42 42
 #' 
43 43
 #' ## This statement creates a dataset called 'full' which contains all samples 
44 44
 #' ## from the datasets 'data1' and 'data2'
... ...
@@ -1,6 +1,6 @@
1
-#' @importFrom rJava .jpackage
2
-#' @importFrom rJava .jinit
3
-
1
+#' @importFrom rJava .jpackage .jinit
2
+#' @import RGMQLScalaLib
3
+#' 
4 4
 .onLoad <- function(libname, pkgname) {
5 5
     .jpackage(pkgname, lib.loc = libname)
6 6
     # tools::vignetteEngine("knitr", pattern = "[.]Rmd$", package = "knitr")
... ...
@@ -16,7 +16,6 @@
16 16
 }
17 17
 
18 18
 
19
-#' @importFrom utils download.file
20 19
 #' @importFrom rJava .jinit .jaddClassPath
21 20
 #' 
22 21
 initGMQLscalaAPI <- function(libLoc, mem = "12G") {
... ...
@@ -28,7 +28,7 @@
28 28
 #' 
29 29
 #' init_gmql()
30 30
 #' test_path <- system.file("example", "DATASET", package = "RGMQL")
31
-#' data = read_dataset(test_path)
31
+#' data = read_GMQL(test_path)
32 32
 #' 
33 33
 #' ## It orders the samples according to the Region_Count metadata attribute 
34 34
 #' ## and takes the two samples that have the lowest count. 
... ...
@@ -25,7 +25,7 @@ if(getRversion() >= "3.1.0")
25 25
 #' @param password string password used during signup
26 26
 #'
27 27
 #' @details
28
-#' If both username and password are NULL you will be logged as guest.
28
+#' If both username and password are missing you will be logged as guest.
29 29
 #' After login you will receive an authentication token.
30 30
 #' As token remains vaild on server (until the next login / registration) 
31 31
 #' a user can safely use a token for a previous session as a convenience;
... ...
@@ -202,7 +202,7 @@ show_queries_list <- function(url)
202 202
 #' @param url string url of server: It must contain the server address 
203 203
 #' and base url; service name is added automatically
204 204
 #' @param queryName string name of query
205
-#' @param query string text of GMQL query
205
+#' @param queryTxt string text of GMQL query
206 206
 #'
207 207
 #' @return None
208 208
 #'
... ...
@@ -6,6 +6,8 @@
6 6
 \alias{AGGREGATES-Object}
7 7
 \alias{COUNT}
8 8
 \alias{AGGREGATES-Object}
9
+\alias{COUNTSAMP}
10
+\alias{AGGREGATES-Object}
9 11
 \alias{MIN}
10 12
 \alias{AGGREGATES-Object}
11 13
 \alias{MAX}
... ...
@@ -31,6 +33,8 @@ SUM(value)
31 33
 
32 34
 COUNT()
33 35
 
36
+COUNTSAMP()
37
+
34 38
 MIN(value)
35 39
 
36 40
 MAX(value)
... ...
@@ -67,6 +71,8 @@ to be used in GMQL functions that require aggregate on value.
67 71
 function sum, performing all the type conversions needed  }
68 72
 \item{COUNT: It prepares input parameter to be passed to the library 
69 73
 function count, performing all the type conversions needed }
74
+\item{COUNTSAMP: It prepares input parameter to be passed to the library 
75
+function third quartile, performing all the type conversions needed }
70 76
 \item{MIN: It prepares input parameter to be passed to the library 
71 77
 function minimum, performing all the type conversions needed  }
72 78
 \item{MAX: It prepares input parameter to be passed to the library 
... ...
@@ -101,7 +107,7 @@ function third quartile, performing all the type conversions needed }
101 107
 
102 108
 init_gmql()
103 109
 test_path <- system.file("example", "DATASET", package = "RGMQL")
104
-exp = read_dataset(test_path)
110
+exp = read_GMQL(test_path)
105 111
 
106 112
 ## This statement copies all samples of exp dataset into res dataset, and 
107 113
 ## then calculates new metadata attribute for each of them: 
... ...
@@ -126,8 +132,7 @@ res = extend(exp, max_score = MAX("score"))
126 132
 ## attribute the average signal of the overlapping regions; 
127 133
 ## the result has one sample for each input cell.
128 134
 
129
-res = cover(exp, 2, 3, groupBy = list(DF("cell")), 
130
-avg_signal = AVG("signal") )
135
+res = cover(exp, 2, 3, groupBy = conds("cell"), avg_signal = AVG("signal"))
131 136
 
132 137
 ## This statement copies all samples of DATA dataset into OUT dataset, 
133 138
 ## and then for each of them it adds another metadata attribute, allScores, 
... ...
@@ -6,7 +6,7 @@
6 6
 \alias{aggregate,GMQLDataset-method}
7 7
 \title{Method aggregate}
8 8
 \usage{
9
-\S4method{aggregate}{GMQLDataset}(x, groupBy = NULL)
9
+\S4method{aggregate}{GMQLDataset}(x, groupBy = conds())
10 10
 }
11 11
 \arguments{
12 12
 \item{x}{GMQLDataset class object}
... ...
@@ -40,7 +40,7 @@ not present in the grouping metadata parameter are disregarded.
40 40
 
41 41
 init_gmql()
42 42
 test_path <- system.file("example","DATASET",package = "RGMQL")
43
-exp = read_dataset(test_path)
43
+exp = read_GMQL(test_path)
44 44
 
45 45
 ## This statement creates a dataset called merged which contains one 
46 46
 ## sample for each antibody_target and cell value found within the metadata 
... ...
@@ -49,6 +49,6 @@ exp = read_dataset(test_path)
49 49
 ## antibody_target and cell metadata
50 50
 ## attributes.
51 51
 
52
-merged = aggregate(exp, condition_evaluation(c("antibody_target","cell")))
52
+merged = aggregate(exp, conds(c("antibody_target","cell")))
53 53
 
54 54
 }
... ...
@@ -7,8 +7,8 @@
7 7
 \title{Method arrange}
8 8
 \usage{
9 9
 \S4method{arrange}{GMQLDataset}(.data, metadata_ordering = NULL,
10
-  regions_ordering = NULL, fetch_opt = NULL, num_fetch = 0L,
11
-  reg_fetch_opt = NULL, reg_num_fetch = 0L)
10
+  regions_ordering = NULL, fetch_opt = "", num_fetch = 0L,
11
+  reg_fetch_opt = "", reg_num_fetch = 0L)
12 12
 }
13 13
 \arguments{
14 14
 \item{.data}{GMQLDataset class object}
... ...
@@ -25,8 +25,8 @@ The functions available are: \code{\link{ASC}}, \code{\link{DESC}}.}
25 25
 first k samples; it can assume the values:
26 26
 \itemize{
27 27
 \item{mtop: it fetches the first k samples}
28
-\item{mtopg: it fetches the percentage of samples.}
29
-\item{mtopp: it fetches the first k samples in each group.}
28
+\item{mtopp: it fetches the first k percentage of samples.}
29
+\item{mtopg: it fetches the first k samples in each group.}
30 30
 
31 31
 }
32 32
 if NULL, \emph{num_fetch} is not considered}
... ...
@@ -39,8 +39,8 @@ s}
39 39
 first k regions; it can assume the values:
40 40
 \itemize{
41 41
 \item{rtop: it fetches the first k regions.}
42
-\item{rtopg: it fetches the first k percentage of regions.}
43
-\item{rtopp: it fetches the first k regions in each group.}
42
+\item{rtopp: it fetches the first k percentage of regions.}
43
+\item{rtopg: it fetches the first k regions in each group.}
44 44
 }
45 45
 if NULL, \emph{reg_num_fetch} is not considered}
46 46
 
... ...
@@ -73,7 +73,7 @@ added to either metadata, or regions, or both of them as specified in inputs
73 73
 
74 74
 init_gmql()
75 75
 test_path <- system.file("example", "DATASET", package = "RGMQL")
76
-data = read_dataset(test_path)
76
+data = read_GMQL(test_path)
77 77
 
78 78
 ## The following statement orders the samples according to the Region_Count 
79 79
 ## metadata attribute and takes the two samples that have the highest count. 
... ...
@@ -42,7 +42,7 @@ folder path
42 42
 
43 43
 init_gmql()
44 44
 test_path <- system.file("example", "DATASET", package = "RGMQL")
45
-data = read_dataset(test_path)
45
+data = read_GMQL(test_path)
46 46
 
47 47
 ## The following statement materialize the dataset, previoulsy read, at 
48 48
 ## th specific destination path into local folder "ds1" opportunely created
... ...
@@ -33,8 +33,3 @@ attribute name
33 33
 \description{
34 34
 This function is used to support joinBy and/or groupBy function parameter.
35 35
 }
36
-\examples{
37
-
38
-"where is my example?"
39
-
40
-}
... ...
@@ -35,7 +35,7 @@ considered.}
35 35
 
36 36
 init_gmql()
37 37
 test_path <- system.file("example", "DATASET", package = "RGMQL")
38
-exp = read_dataset(test_path)
38
+exp = read_GMQL(test_path)
39 39
 
40 40
 ## The following statement produces an output dataset with a single 
41 41
 ## output sample. The COVER operation considers all areas defined by 
... ...
@@ -9,7 +9,7 @@
9 9
 \usage{
10 10
 cover(.data, ...)
11 11
 
12
-\S4method{cover}{GMQLDataset}(.data, min_acc, max_acc, groupBy = NULL,
12
+\S4method{cover}{GMQLDataset}(.data, min_acc, max_acc, groupBy = conds(),
13 13
   variation = "cover", ...)
14 14
 }
15 15
 \arguments{
... ...
@@ -54,8 +54,8 @@ considering any amount of overlapping regions.
54 54
 ALL() / K, with N and K integer values  }
55 55
 }}
56 56
 
57
-\item{groupBy}{\code{\link{condition_evaluation}} function to support 
58
-methods with groupBy or JoinBy input paramter}
57
+\item{groupBy}{\code{\link{conds}} function to support methods with 
58
+groupBy or JoinBy input parameter}
59 59
 
60 60
 \item{variation}{string identifying the cover GMQL operator variation.
61 61
 The admissible strings are:
... ...
@@ -114,7 +114,7 @@ are disregarded.
114 114
 
115 115
 init_gmql()
116 116
 test_path <- system.file("example","DATASET",package = "RGMQL")
117
-exp = read_dataset(test_path)
117
+exp = read_GMQL(test_path)
118 118
   
119 119
 ## the following statement produces an output dataset with a single output 
120 120
 ## sample. The COVER operation considers all areas defined by a minimum 
... ...
@@ -131,7 +131,6 @@ res = cover(exp, 2, ANY())
131 131
 ## regions the minimum pvalue of the overlapping regions (min_pvalue) 
132 132
 ## and their Jaccard indexes (JaccardIntersect and JaccardResult).
133 133
 
134
-res = cover(exp, 2, 3, groupBy = condition_evaluation(c("cell")), 
135
-min_pValue = MIN("pvalue"))
134
+res = cover(exp, 2, 3, groupBy = conds("cell"), min_pValue = MIN("pvalue"))
136 135
 
137 136
 }
... ...
@@ -89,8 +89,8 @@ directions of the genome.}
89 89
 init_gmql()
90 90
 test_path <- system.file("example", "DATASET", package = "RGMQL")
91 91
 test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL")
92
-TSS = read_dataset(test_path)
93
-HM = read_dataset(test_path2)
92
+TSS = read_GMQL(test_path)
93
+HM = read_GMQL(test_path2)
94 94
 
95 95
 ## Given a dataset HM and one called TSS with a sample including 
96 96
 ## Transcription Start Site annotations, it searches for those regions of HM 
... ...
@@ -100,7 +100,7 @@ HM = read_dataset(test_path2)
100 100
 ## obtained from the same provider (joinby clause).
101 101
 
102 102
 join_data = merge(TSS, HM, 
103
-genometric_predicate = list(MD(1), DL(1200)), DF("provider"), 
103
+genometric_predicate = list(MD(1), DL(1200)), conds("provider"), 
104 104
 region_output = "RIGHT")
105 105
 
106 106
 ## Given a dataset 'HM' and one called 'TSS' with a sample including 
... ...
@@ -111,7 +111,7 @@ region_output = "RIGHT")
111 111
 ## from the same provider (joinby clause).
112 112
 
113 113
 join_data = merge(TSS, HM, 
114
-genometric_predicate = list(MD(1), DGE(12000), DOWN()), 
115
-DF("provider"), region_output = "RIGHT")
114
+genometric_predicate = list(MD(1), DGE(12000), DOWN()), conds("provider"), 
115
+region_output = "RIGHT")
116 116
 
117 117
 }
... ...
@@ -22,7 +22,7 @@ The function works only after invoking at least one collect
22 22
 
23 23
 init_gmql()
24 24
 test_path <- system.file("example","DATASET",package = "RGMQL")
25
-data = read_dataset(test_path)
25
+data = read_GMQL(test_path)
26 26
 
27 27
 ## The following statement materialize the dataset, previoulsy read, at 
28 28
 ## th specific destination path into local folder "ds1" opportunely created
... ...
@@ -54,7 +54,7 @@ Aggregate functions are applied sample by sample.
54 54
 
55 55
 init_gmql()
56 56
 test_path <- system.file("example", "DATASET", package = "RGMQL")
57
-data <- read_dataset(test_path)
57
+data <- read_GMQL(test_path)
58 58
 
59 59
 ## This statement counts the regions in each sample and stores their number 
60 60
 ## as value of the new metadata attribute RegionCount of the sample.
... ...
@@ -50,7 +50,7 @@ no sample is extracted.
50 50
 
51 51
 init_gmql()
52 52
 test_path <- system.file("example", "DATASET", package = "RGMQL")
53
-data <- read_dataset(test_path) 
53
+data <- read_GMQL(test_path) 
54 54
 
55 55
 ## This statement selects from input data samples of patients younger 
56 56
 ## than 70 years old, based on filtering on sample metadata attribute 
... ...
@@ -63,7 +63,7 @@ filter_data <- filter(data, patient_age < 70)
63 63
 ## as a GMQL dataset named "join_data"
64 64
 
65 65
 test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL")
66
-join_data <- read_dataset(test_path2) 
66
+join_data <- read_GMQL(test_path2) 
67 67
 
68 68
 ## This statement creates a new dataset called 'jun_tf' by selecting those 
69 69
 ## samples and their regions from the existing 'data' dataset such that:
... ...
@@ -74,11 +74,11 @@ join_data <- read_dataset(test_path2)
74 74
 ## attribute equally called cell has in at least one sample 
75 75
 ## of the 'join_data' dataset.
76 76
 ## For each sample satisfying previous conditions, only its regions that 
77
-## have a region attribute called pValue with the associated value 
77
+## have a region attribute called 'pvalue' with the associated value 
78 78
 ## less than 0.01 are conserved in output
79 79
 
80
-jun_tf <- filter(data, antibody_target == "JUN", pValue < 0.01, 
81
-semijoin(join_data, TRUE, list(DF("cell"))))
80
+jun_tf <- filter(data, antibody_target == "JUN", pvalue < 0.01, 
81
+semijoin(join_data, TRUE, conds("cell")))
82 82
 
83 83
 
84 84
 }
... ...
@@ -6,27 +6,19 @@
6 6
 \alias{group_by,GMQLDataset-method}
7 7
 \title{Method group_by}
8 8
 \usage{
9
-\S4method{group_by}{GMQLDataset}(.data, groupBy_meta = NULL,
10
-  groupBy_regions = NULL, region_aggregates = NULL,
9
+\S4method{group_by}{GMQLDataset}(.data, groupBy_meta = conds(),
10
+  groupBy_regions = c(""), region_aggregates = NULL,
11 11
   meta_aggregates = NULL)
12 12
 }
13 13
 \arguments{
14 14
 \item{.data}{GMQLDataset object}
15 15
 
16
-\item{groupBy_meta}{it define condition evaluation on metadata.
17
-\itemize{
18
-\item{\code{\link{FN}}: Fullname evaluation, two attributes match 
19
-if they both end with value and, if they have a further prefixes,
20
-the two prefix sequence are identical}
21
-\item{\code{\link{EX}}: Exact evaluation, only attributes exactly 
22
-as value will match; no further prefixes are allowed. }
23
-\item{\code{\link{DF}}: Default evaluation, the two attributes match 
24
-if both end with value.}
25
-}}
16
+\item{groupBy_meta}{\code{\link{conds}} function to support methods with 
17
+groupBy or JoinBy input parameter}
26 18
 
27 19
 \item{groupBy_regions}{vector of string made up by schema field attribute}
28 20
 
29
-\item{region_aggregates}{It accept a series of aggregate function on 
21
+\item{region_aggregates}{It accept a list of aggregate function on 
30 22
 region attribute. 
31 23
 All the element in the form \emph{key} = \emph{aggregate}.
32 24
 The \emph{aggregate} is an object of class AGGREGATES
... ...
@@ -45,7 +37,7 @@ attributes. Two style are allowed:
45 37
 }
46 38
 "mixed style" is not allowed}
47 39
 
48
-\item{meta_aggregates}{It accept a series of aggregate function on 
40
+\item{meta_aggregates}{It accept a list of aggregate function on 
49 41
 metadata attribute.
50 42
 All the element in the form \emph{key} = \emph{aggregate}.
51 43
 The \emph{aggregate} is an object of class AGGREGATES
... ...
@@ -73,4 +65,55 @@ Wrapper to GMQL GROUP operator
73 65
 }
74 66
 \examples{
75 67
 
68
+## This statement initializes and runs the GMQL server for local execution 
69
+## and creation of results on disk. Then, with system.file() it defines 
70
+## the path to the folder "DATASET" in the subdirectory "example"
71
+## of the package "RGMQL" and opens such file as a GMQL dataset named "exp" 
72
+## using customParser
73
+
74
+init_gmql()
75
+test_path <- system.file("example","DATASET",package = "RGMQL")
76
+exp = read_GMQL(test_path)
77
+
78
+## This GMQL statement groups samples of the input 'exp' dataset according to 
79
+## their value of the metadata attribute 'tumor_type' and computes the 
80
+## maximum value that the metadata attribute size takes inside the samples 
81
+## belonging to each group. The samples in the output GROUPS_T dataset 
82
+## have a new _group metadata attribute which indicates which group they 
83
+## belong to, based on the grouping on the metadata attribute tumor_type. 
84
+## In addition, they present the new metadata aggregate attribute MaxSize. 
85
+## Note that the samples without metadata attribute tumor_type are assigned 
86
+## to a single group with _group value equal 0
87
+
88
+GROUPS_T = group_by(exp, conds("tumor_type"), 
89
+meta_aggregates = list(max_size = MAX("size")))
90
+
91
+## This GMQL statement takes as input dataset the same input dataset as 
92
+## the previous example. Yet, it calculates new _group values based on the 
93
+## grouping attribute 'cell', and adds the metadata aggregate attribute 
94
+## 'n_samp', which counts the number of samples belonging to the respective 
95
+## group. It has the following output GROUPS_C dataset samples 
96
+## (note that now no sample has metadata attribute _group with value equal 0 
97
+## since all input samples include the metadata attribute cell, 
98
+## with different values, on which the new grouping is based)
99
+
100
+GROUPS_C = group_by(exp, conds("cell"),
101
+meta_aggregates = list(n_samp AS COUNTSAMP()))
102
+
103
+## This GMQL statement groups the regions of each 'exp' dataset sample by 
104
+## region coordinates chr, left, right, strand  (these are implicitly 
105
+## considered) and the additional region attribute score (which is explicitly 
106
+## specified), and keeps only one region for each group. 
107
+## In the output GROUPS dataset schema, the new region attributes 
108
+## avg_pvalue and max_qvalue are added, respectively computed as the 
109
+## average of the values taken by the pvalue and the maximum of the values 
110
+## taken by the qvalue region attributes in the regions grouped together, 
111
+## and the computed value is assigned to each region of each output sample. 
112
+## Note that the region attributes which are not coordinates or score are 
113
+## discarded.
114
+
115
+GROUPS = group_by(exp, group_reg = "score", 
116
+region_aggregates = list(avg_pvalue = AVG("pvalue"), 
117
+max_qvalue = MAX("qvalue")))
118
+
76 119
 }
... ...
@@ -23,7 +23,7 @@ and password, or as guest, using the proper GMQL web service available
23 23
 on a remote server
24 24
 }
25 25
 \details{
26
-If both username and password are NULL you will be logged as guest.
26
+If both username and password are missing you will be logged as guest.
27 27
 After login you will receive an authentication token.
28 28
 As token remains vaild on server (until the next login / registration) 
29 29
 a user can safely use a token for a previous session as a convenience;
... ...
@@ -10,7 +10,7 @@
10 10
 \usage{
11 11
 map(x, y, ...)
12 12
 
13
-\S4method{map}{GMQLDataset}(x, y, ..., joinBy = NULL, count_name = NULL)
13
+\S4method{map}{GMQLDataset}(x, y, ..., joinBy = conds(), count_name = "")
14 14
 }
15 15
 \arguments{
16 16
 \item{x}{GMQLDataset class object}
... ...
@@ -34,16 +34,8 @@ attributes. Two styles are allowed:
34 34
 }
35 35
 "mixed style" is not allowed}
36 36
 
37
-\item{joinBy}{list of evalation functions to define evaluation on metadata:
38
-\itemize{
39
-\item{ \code{\link{FN}}(value): Fullname evaluation, two attributes match 
40
-if they both end with \emph{value} and, if they have further prefixes,
41
-the two prefix sequence are identical.}
42
-\item{ \code{\link{EX}}(value): Exact evaluation, only attributes exactly 
43
-as \emph{value} match; no further prefixes are allowed.}
44
-\item{ \code{\link{DF}}(value): Default evaluation, the two attributes match 
45
-if both end with \emph{value}.}
46
-}}
37
+\item{joinBy}{\code{\link{conds}} function to support methods with 
38
+groupBy or JoinBy input parameter}
47 39
 
48 40
 \item{count_name}{string defining the metadata count name; if it is 
49 41
 not specifying the name is "count_left_right"}
... ...
@@ -85,8 +77,8 @@ present with equal values in both M1 and  M2
85 77
 init_gmql()
86 78
 test_path <- system.file("example", "DATASET", package = "RGMQL")
87 79
 test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL")
88
-exp = read_dataset(test_path)
89
-ref = read_dataset(test_path2)
80
+exp = read_GMQL(test_path)
81
+ref = read_GMQL(test_path2)
90 82
 
91 83
 # It counts the number of regions in each sample from exp that overlap with 
92 84
 # a ref region, and for each ref region it computes the minimum score 
... ...
@@ -97,7 +89,6 @@ ref = read_dataset(test_path2)
97 89
 # but with a different value from the one(s) of ref sample(s), 
98 90
 # are disregarded.
99 91
 
100
-out = map(ref, exp, minScore = MIN("score"), 
101
-joinBy = list(DF("cell_tissue")))
92
+out = map(ref, exp, minScore = MIN("score"), joinBy = conds("cell_tissue"))
102 93
 
103 94
 }
... ...
@@ -7,7 +7,7 @@
7 7
 \title{Method merge}
8 8
 \usage{
9 9
 \S4method{merge}{GMQLDataset,GMQLDataset}(x, y, genometric_predicate = NULL,
10
-  region_output = "CAT", joinBy = NULL, reg_attr = NULL)
10
+  region_output = "CAT", joinBy = conds(), reg_attr = c(""))
11 11
 }
12 12
 \arguments{
13 13
 \item{x}{GMQLDataset class object}
... ...
@@ -35,17 +35,17 @@ that satisfy the genometric predicate, (i.e. the output regionis defined as
35 35
 having left (right) coordinates equal to the minimum (maximum) of the 
36 36
 corresponding coordinate values in the 'x' and 'y' regions satisfying 
37 37
 the genometric predicate)}
38
-\item{LEFT_DISTINCT: It outputs the duplicate elimination of "x" output 
38
+\item{LEFT_DIST: It outputs the duplicate elimination of "x" output 
39 39
 regions with the same values, regardless the "y" paired region and its 
40 40
 values. In this case, the output regions attributes and their values are 
41 41
 all those of "x", and the output metadata are equal to the "x" metadata, 
42 42
 without additional prefixes}
43
-\item{RIGHT_DISTINCT: It outputs the duplicate elimination of "y" output 
43
+\item{RIGHT_DIST: It outputs the duplicate elimination of "y" output 
44 44
 regions with the same values, regardless the "x" paired region and its 
45 45
 values. In this case, the output regions attributes and their values are 
46 46
 all those of "y", and the output metadata are equal to the "y" metadata, 
47 47
 without additional prefixes}
48
-\item{BOTH: outputs the same regions as LEFT, but it adds in the output 
48
+\item{BOTH: It outputs the same regions as LEFT, but it adds in the output 
49 49
 region attributes the coordinates of the "y" dataset region that, 
50 50
 together with the output "x" dataset region, satisfies the equi predicate 
51 51
 and the genometric predicate}
... ...
@@ -85,8 +85,8 @@ respectively.
85 85
 init_gmql()
86 86
 test_path <- system.file("example", "DATASET", package = "RGMQL")
87 87
 test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL")
88
-TSS = read_dataset(test_path)
89
-HM = read_dataset(test_path2)
88
+TSS = read_GMQL(test_path)
89
+HM = read_GMQL(test_path2)
90 90
 
91 91
 ## Given a dataset 'HM' and one called 'TSS' with a sample including 
92 92
 ## Transcription Start Site annotations, it searches for those regions of HM 
... ...
@@ -95,10 +95,8 @@ HM = read_dataset(test_path2)
95 95
 ## is lesser than 120K bases and joined 'tss' and 'hm' samples are obtained 
96 96
 ## from the same provider (joinby clause).
97 97
 
98
-
99
-join_data = merge(TSS, HM, 
100
-genometric_predicate = list(MD(1), DLE(120000)), DF("provider"), 
101
-region_output = "RIGHT")
98
+join_data = merge(TSS, HM, genometric_predicate = list(MD(1), DLE(120000)), 
99
+conds("provider"), region_output = "RIGHT")
102 100
 
103 101
 
104 102
 }
... ...
@@ -47,7 +47,7 @@ null, performing all the type conversions needed}
47 47
 
48 48
 init_gmql()
49 49
 test_path <- system.file("example", "DATASET", package = "RGMQL")
50
-exp = read_dataset(test_path)
50
+exp = read_GMQL(test_path)
51 51
 
52 52
 ## This statement allows to select, in all input sample, all those regions 
53 53
 ## for which the region attribute score has a value which is greater 
... ...
@@ -38,7 +38,7 @@ that require ordering on value; is used only in arrange method
38 38
 
39 39
 init_gmql()
40 40
 test_path <- system.file("example", "DATASET", package = "RGMQL")
41
-data = read_dataset(test_path)
41
+data = read_GMQL(test_path)
42 42
 
43 43
 ## It orders the samples according to the Region_Count metadata attribute 
44 44
 ## and takes the two samples that have the lowest count. 
... ...
@@ -1,15 +1,15 @@
1 1
 % Generated by roxygen2: do not edit by hand
2 2
 % Please edit documentation in R/gmql_read.R
3
-\name{read_dataset}
4
-\alias{read_dataset}
5
-\alias{read_dataset}
6
-\alias{read}
3
+\name{read_GMQL}
4
+\alias{read_GMQL}
5
+\alias{read_GMQL}
6
+\alias{read_GRangesList}
7 7
 \title{Function read}
8 8
 \usage{
9
-read_dataset(dataset, parser = "CustomParser", is_local = TRUE,
9
+read_GMQL(dataset, parser = "CustomParser", is_local = TRUE,
10 10
   is_GMQL = TRUE)
11 11
 
12
-read(samples)
12
+read_GRangesList(samples)
13 13
 }
14 14
 \arguments{
15 15
 \item{dataset}{folder path for GMQL dataset or dataset name on repository}
... ...
@@ -65,16 +65,18 @@ generated.
65 65
 
66 66
 init_gmql()
67 67
 test_path <- system.file("example", "DATASET", package = "RGMQL")
68
-data = read_dataset(test_path)
68
+data = read_GMQL(test_path)
69 69
 
70 70
 ## This statement opens such folder as a GMQL dataset named "data" using 
71 71
 ## "NarrowPeakParser" 
72
-dataPeak = read_dataset(test_path,"NarrowPeakParser")
72
+dataPeak = read_GMQL(test_path,"NarrowPeakParser")
73 73
 
74 74
 ## This statement reads a remote public dataset stored into GMQL system 
75 75
 ## repository. For a public dataset in a (remote) GMQL repository the 
76 76
 ## prefix "public." is needed before dataset name
77 77
 
78
-data1 = read_dataset("public.Example_Dataset1",is_local = FALSE)
78
+remote_url = "http://genomic.deib.polimi.it/gmql-rest-r/"
79
+login_gmql(remote_url)
80
+data1 = read_GMQL("public.Example_Dataset1",is_local = FALSE)
79 81
 
80 82
 }
... ...
@@ -15,9 +15,9 @@ and base url; service name is added automatically}
15 15
 
16 16
 \item{queryName}{string name of query}
17 17
 
18
-\item{filePath}{string local file path of txt file containing a GMQL query}
18
+\item{queryTxt}{string text of GMQL query}
19 19
 
20
-\item{query}{string text of GMQL query}
20
+\item{filePath}{string local file path of txt file containing a GMQL query}
21 21
 }
22 22
 \value{
23 23
 None
... ...
@@ -73,7 +73,7 @@ those in the input dataset. It allows to:
73 73
 
74 74
 init_gmql()
75 75
 test_path <- system.file("example", "DATASET", package = "RGMQL")
76
-data = read_dataset(test_path)
76
+data = read_GMQL(test_path)
77 77
 
78 78
 ## It creates a new dataset called CTCF_NORM_SCORE by preserving all 
79 79
 ## region attributes apart from score, and creating a new region attribute 
... ...
@@ -4,7 +4,7 @@
4 4
 \alias{semijoin}
5 5
 \title{Semijoin condtion}
6 6
 \usage{
7
-semijoin(.data, not_in = FALSE, groupBy = NULL)
7
+semijoin(.data, not_in = FALSE, groupBy)
8 8
 }
9 9
 \arguments{
10 10
 \item{.data}{GMQLDataset class object}
... ...
@@ -16,16 +16,8 @@ in groupBy and these attributes of 'data' have at least one value in
16 16
 common with the same attributes defined in one sample of '.data'
17 17
 FALSE => semijoin condition is evaluated accordingly.}
18 18
 
19
-\item{groupBy}{list of evalation functions to define evaluation on metadata:
20
-\itemize{
21
-\item{ \code{\link{FN}}(value): Fullname evaluation, two attributes match 
22
-if they both end with \emph{value} and, if they have further prefixes,
23
-the two prefix sequence are identical.}
24
-\item{ \code{\link{EX}}(value): Exact evaluation, only attributes exactly 
25
-as \emph{value} match; no further prefixes are allowed.}
26
-\item{ \code{\link{DF}}(value): Default evaluation, the two attributes match 
27
-if both end with \emph{value}.}
28
-}}
19
+\item{groupBy}{\code{\link{condition_evaluation}} function to support 
20
+methods with groupBy or JoinBy input paramter}
29 21
 }
30 22
 \value{
31 23
 semijoin condition as list
... ...
@@ -45,8 +37,8 @@ semijoin conditions on metadata
45 37
 init_gmql()
46 38
 test_path <- system.file("example", "DATASET", package = "RGMQL")
47 39
 test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL")
48
-data <- read_dataset(test_path)
49
-join_data <-  read_dataset(test_path2)
40
+data <- read_GMQL(test_path)
41
+join_data <-  read_GMQL(test_path2)
50 42
 
51 43
 # It creates a new dataset called 'jun_tf' by selecting those samples and 
52 44
 # their regions from the existing 'data' dataset such that:
... ...
@@ -60,7 +52,7 @@ join_data <-  read_dataset(test_path2)
60 52
 # have a region attribute called pValue with the associated value 
61 53
 # less than 0.01 are conserved in output
62 54
 
63
-jun_tf <- filter(data, antibody_target == "JUN", pValue < 0.01, 
64
-semijoin(join_data, TRUE, list(DF("cell"))))
55
+jun_tf <- filter(data, antibody_target == "JUN", pvalue < 0.01, 
56
+semijoin(join_data, TRUE, conds("cell")))
65 57
 
66 58
 }
... ...
@@ -6,7 +6,7 @@
6 6
 \alias{setdiff,GMQLDataset,GMQLDataset-method}
7 7
 \title{Method setdiff}
8 8
 \usage{
9
-\S4method{setdiff}{GMQLDataset,GMQLDataset}(x, y, joinBy = NULL,
9
+\S4method{setdiff}{GMQLDataset,GMQLDataset}(x, y, joinBy = conds(),
10 10
   is_exact = FALSE)
11 11
 }
12 12
 \arguments{
... ...
@@ -14,8 +14,8 @@
14 14
 
15 15
 \item{y}{GMQLDataset class object}
16 16
 
17
-\item{joinBy}{\code{\link{condition_evaluation}} function to support 
18
-methods with groupBy or JoinBy input paramter}
17
+\item{joinBy}{\code{\link{conds}} function to support methods with 
18
+groupBy or JoinBy input paramter}
19 19
 
20 20
 \item{is_exact}{single logical value: TRUE means that the region difference 
21 21
 is executed only on regions in left_input_data with exactly the same 
... ...
@@ -51,8 +51,8 @@ are considered when performing the difference.
51 51
 init_gmql()
52 52
 test_path <- system.file("example", "DATASET", package = "RGMQL")
53 53
 test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL")
54
-data1 = read_dataset(test_path)
55
-data2 = read_dataset(test_path2)
54
+data1 = read_GMQL(test_path)
55
+data2 = read_GMQL(test_path2)
56 56
 
57 57
 ## This GMQL statement returns all the regions in the first dataset 
58 58
 ## that do not overlap any region in the second dataset.
... ...
@@ -65,6 +65,6 @@ out = setdiff(data1, data2)
65 65
 ## do not overlap any region in s2; 
66 66
 ## metadata of the result are the same as the metadata of s1.
67 67
 
68
-out_t = setdiff(data1, data2, condition_evaluation(c("cell")))
68
+out_t = setdiff(data1, data2, conds("cell"))
69 69
 
70 70
 }
... ...
@@ -44,9 +44,9 @@ all those datasets are materialized as folders.
44 44
 
45 45
 init_gmql()
46 46
 test_path <- system.file("example", "DATASET", package = "RGMQL")
47
-rd = read_dataset(test_path)
47
+rd = read_GMQL(test_path)
48 48
 
49
-aggr = aggregate(rd, list(DF("antibody_target", "cell_karyotype")))
49
+aggr = aggregate(rd, conds(c("antibody_target", "cell_karyotype")))
50 50
 taken <- take(aggr, rows = 45)
51 51
 
52 52
 }
... ...
@@ -46,8 +46,8 @@ w.r.t. the merged schema are set to null.}
46 46
 init_gmql()
47 47
 test_path <- system.file("example", "DATASET", package = "RGMQL")
48 48
 test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL")
49
-data1 <- read_dataset(test_path)
50
-data2 <- read_dataset(test_path2)
49
+data1 <- read_GMQL(test_path)
50
+data2 <- read_GMQL(test_path2)
51 51
 
52 52
 ## This statement creates a dataset called 'full' which contains all samples 
53 53
 ## from the datasets 'data1' and 'data2'
... ...
@@ -247,17 +247,17 @@ As data are already in the user computer, we simply execute:
247 247
 
248 248
 ```{r, read GMQL dataset}
249 249
 gmql_dataset_path <- system.file("example", "EXON", package = "RGMQL")
250
-data_out = read_dataset(gmql_dataset_path)
250
+data_out = read_GMQL(gmql_dataset_path)
251 251
 ```
252 252
 In this case we are reading a GMQL dataset specified by the path of 
253 253
 its folder "EXON" within the subdirectory "example" of the package "RGMQL".
254
-It does not matter what kind of format the data are, *read_dataset()* reads 
254
+It does not matter what kind of format the data are, *read_GMQL()* reads 
255 255
 many standard tab-delimited text formats without the need of specifying any  
256 256
 additional input parameter.
257 257
 
258 258
 2. GRangesList:\newline
259 259
 For better integration in the R environment and with other R packages, 
260
-we provide the *read()* function to read directly from R memory/environment 
260
+we provide the *read_GRangesList()* function to read directly from R memory/environment
261 261
 using GRangesList as input.
262 262
 
263 263
 ```{r, read GRangesList}
... ...
@@ -270,12 +270,12 @@ gr2 <- GRanges(seqnames = c("chr1", "chr1"),
270 270
 	score = 3:4, GC = c(0.3, 0.5))
271 271
 
272 272
 grl <- GRangesList("txA" = gr1, "txB" = gr2)
273
-data_out <- read(grl)
273
+data_out <- read_GRangesList(grl)
274 274
 ```
275 275
 In this example we show how versatile the RGMQL package is.
276 276
 As specified above, we can directly read a list of GRanges previously created 
277 277
 starting from two GRanges.
278
-Both *read()* and *read_dataset()* functions return a result object, 
278
+Both *read_GRangesList()* and *read_GMQL()* functions return a result object, 
279 279
 in this case *data_out*: an instance of GMQLDataset class used as input 
280 280
 for executing the subsequent GMQL operation.
281 281
 
... ...
@@ -306,8 +306,8 @@ mut_path <- system.file("example", "MUT", package = "RGMQL")
306 306
 # sample with exon regions, and MUT folder as a GMQL dataset named "mut_ds" 
307 307
 # containing multiple samples with mutation regions
308 308
 
309
-exon_ds <- read_dataset(exon_path)
310
-mut_ds <- read_dataset(mut_path)
309
+exon_ds <- read_GMQL(exon_path)
310
+mut_ds <- read_GMQL(mut_path)
311 311
 
312 312
 # Filter out mut_ds based on predicate 
313 313
 
... ...
@@ -484,8 +484,8 @@ After initialization, we can start building our query:
484 484
 ## Read the remote dataset HG19_TCGA_dnaseq 
485 485
 ## Read the remote dataset HG19_BED_ANNOTATION 
486 486
 
487
-TCGA_dnaseq <- read_dataset("public.HG19_TCGA_dnaseq", is_local = FALSE)
488
-HG19_bed_ann <- read_dataset("public.HG19_BED_ANNOTATION", is_local = FALSE)
487
+TCGA_dnaseq <- read_GMQL("public.HG19_TCGA_dnaseq", is_local = FALSE)
488
+HG19_bed_ann <- read_GMQL("public.HG19_BED_ANNOTATION", is_local = FALSE)
489 489
 
490 490
 ## Filter out TCGA_dnaseq based on predicate 
491 491
 
... ...
@@ -535,7 +535,7 @@ remote_processing(TRUE)
535 535
 ```
536 536
 An user can switch processing mode until the first *collect()* has been performed.
537 537
 
538
-This kind of processing comes from the fact that the *read()* function can 
538
+This kind of processing comes from the fact that the *read_GMQL()* function can 
539 539
 accept either a local dataset or a remote repository dataset, 
540 540
 even in the same query as in the following example:
541 541
 ```{r, mixed query}
... ...
@@ -549,11 +549,11 @@ mut_path <- system.file("example", "MUT", package = "RGMQL")
549 549
 # Read MUT folder as a GMQL dataset named "mut_ds" containing a single 
550 550
 # sample with mutation regions
551 551
 
552
-mut_ds <- read_dataset(mut_path, is_local = TRUE)
552
+mut_ds <- read_GMQL(mut_path, is_local = TRUE)
553 553
 
554 554
 # Read the remote dataset HG19_BED_ANNOTATION 
555 555
 
556
-HG19_bed_ann <- read_dataset("public.HG19_BED_ANNOTATION", is_local = FALSE)
556
+HG19_bed_ann <- read_GMQL("public.HG19_BED_ANNOTATION", is_local = FALSE)
557 557
 
558 558
 # Filter out based on predicate
559 559
 
... ...
@@ -592,7 +592,7 @@ collect(exon_res)
592 592
 execute()
593 593
 ```
594 594
 
595
-As we can see, the two *read()* functions above read from different sources: 
595
+As we can see, the two *read_GMQL()* functions above read from different sources: 
596 596
 *mut_ds* from local dataset, *HG19_bed_ann* from remote repository.
597 597
 
598 598
 If we set remote processing to false (*remote_processing(FALSE)*),