... | ... |
@@ -7,6 +7,7 @@ export(AVG) |
7 | 7 |
export(BAG) |
8 | 8 |
export(BAGD) |
9 | 9 |
export(COUNT) |
10 |
+export(COUNTSAMP) |
|
10 | 11 |
export(DESC) |
11 | 12 |
export(DG) |
12 | 13 |
export(DGE) |
... | ... |
@@ -39,8 +40,8 @@ export(import_gmql) |
39 | 40 |
export(init_gmql) |
40 | 41 |
export(login_gmql) |
41 | 42 |
export(logout_gmql) |
42 |
-export(read) |
|
43 |
-export(read_dataset) |
|
43 |
+export(read_GMQL) |
|
44 |
+export(read_GRangesList) |
|
44 | 45 |
export(remote_processing) |
45 | 46 |
export(run_query) |
46 | 47 |
export(run_query_fromfile) |
... | ... |
@@ -73,6 +74,7 @@ exportMethods(setdiff) |
73 | 74 |
exportMethods(take) |
74 | 75 |
exportMethods(union) |
75 | 76 |
import(GenomicRanges) |
77 |
+import(RGMQLScalaLib) |
|
76 | 78 |
import(httr) |
77 | 79 |
import(xml2) |
78 | 80 |
importClassesFrom(GenomicRanges,GRangesList) |
... | ... |
@@ -96,6 +98,7 @@ importFrom(methods,is) |
96 | 98 |
importFrom(methods,isClass) |
97 | 99 |
importFrom(methods,new) |
98 | 100 |
importFrom(plyr,revalue) |
101 |
+importFrom(rJava,.jaddClassPath) |
|
99 | 102 |
importFrom(rJava,.jarray) |
100 | 103 |
importFrom(rJava,.jevalArray) |
101 | 104 |
importFrom(rJava,.jinit) |
... | ... |
@@ -112,7 +112,7 @@ take_value.META_AGGREGATES <- function(obj){ |
112 | 112 |
#' |
113 | 113 |
#' init_gmql() |
114 | 114 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
115 |
-#' exp = read_dataset(test_path) |
|
115 |
+#' exp = read_GMQL(test_path) |
|
116 | 116 |
#' |
117 | 117 |
#' ## This statement copies all samples of exp dataset into res dataset, and |
118 | 118 |
#' ## then calculates new metadata attribute for each of them: |
... | ... |
@@ -137,8 +137,7 @@ take_value.META_AGGREGATES <- function(obj){ |
137 | 137 |
#' ## attribute the average signal of the overlapping regions; |
138 | 138 |
#' ## the result has one sample for each input cell. |
139 | 139 |
#' |
140 |
-#' res = cover(exp, 2, 3, groupBy = list(DF("cell")), |
|
141 |
-#' avg_signal = AVG("signal") ) |
|
140 |
+#' res = cover(exp, 2, 3, groupBy = conds("cell"), avg_signal = AVG("signal")) |
|
142 | 141 |
#' |
143 | 142 |
#' ## This statement copies all samples of DATA dataset into OUT dataset, |
144 | 143 |
#' ## and then for each of them it adds another metadata attribute, allScores, |
... | ... |
@@ -42,7 +42,7 @@ print.PARAMETER <- function(obj){ |
42 | 42 |
#' |
43 | 43 |
#' init_gmql() |
44 | 44 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
45 |
-#' exp = read_dataset(test_path) |
|
45 |
+#' exp = read_GMQL(test_path) |
|
46 | 46 |
#' |
47 | 47 |
#' ## The following statement produces an output dataset with a single |
48 | 48 |
#' ## output sample. The COVER operation considers all areas defined by |
... | ... |
@@ -85,8 +85,8 @@ check.DISTAL <- function(value) |
85 | 85 |
#' init_gmql() |
86 | 86 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
87 | 87 |
#' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
88 |
-#' TSS = read_dataset(test_path) |
|
89 |
-#' HM = read_dataset(test_path2) |
|
88 |
+#' TSS = read_GMQL(test_path) |
|
89 |
+#' HM = read_GMQL(test_path2) |
|
90 | 90 |
#' |
91 | 91 |
#' ## Given a dataset HM and one called TSS with a sample including |
92 | 92 |
#' ## Transcription Start Site annotations, it searches for those regions of HM |
... | ... |
@@ -96,7 +96,7 @@ check.DISTAL <- function(value) |
96 | 96 |
#' ## obtained from the same provider (joinby clause). |
97 | 97 |
#' |
98 | 98 |
#' join_data = merge(TSS, HM, |
99 |
-#' genometric_predicate = list(MD(1), DL(1200)), DF("provider"), |
|
99 |
+#' genometric_predicate = list(MD(1), DL(1200)), conds("provider"), |
|
100 | 100 |
#' region_output = "RIGHT") |
101 | 101 |
#' |
102 | 102 |
#' ## Given a dataset 'HM' and one called 'TSS' with a sample including |
... | ... |
@@ -107,8 +107,8 @@ check.DISTAL <- function(value) |
107 | 107 |
#' ## from the same provider (joinby clause). |
108 | 108 |
#' |
109 | 109 |
#' join_data = merge(TSS, HM, |
110 |
-#' genometric_predicate = list(MD(1), DGE(12000), DOWN()), |
|
111 |
-#' DF("provider"), region_output = "RIGHT") |
|
110 |
+#' genometric_predicate = list(MD(1), DGE(12000), DOWN()), conds("provider"), |
|
111 |
+#' region_output = "RIGHT") |
|
112 | 112 |
#' |
113 | 113 |
#' @name DISTAL-Object |
114 | 114 |
#' @aliases DL |
... | ... |
@@ -63,7 +63,7 @@ as.character.OPERATOR <- function(obj) { |
63 | 63 |
#' |
64 | 64 |
#' init_gmql() |
65 | 65 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
66 |
-#' exp = read_dataset(test_path) |
|
66 |
+#' exp = read_GMQL(test_path) |
|
67 | 67 |
#' |
68 | 68 |
#' ## This statement allows to select, in all input sample, all those regions |
69 | 69 |
#' ## for which the region attribute score has a value which is greater |
... | ... |
@@ -98,7 +98,7 @@ |
98 | 98 |
#' |
99 | 99 |
#' init_gmql() |
100 | 100 |
#' test_path <- system.file("example","DATASET",package = "RGMQL") |
101 |
-#' exp = read_dataset(test_path) |
|
101 |
+#' exp = read_GMQL(test_path) |
|
102 | 102 |
#' |
103 | 103 |
#' ## the following statement produces an output dataset with a single output |
104 | 104 |
#' ## sample. The COVER operation considers all areas defined by a minimum |
... | ... |
@@ -123,7 +123,7 @@ |
123 | 123 |
#' @aliases cover-method |
124 | 124 |
#' @export |
125 | 125 |
setMethod("cover", "GMQLDataset", |
126 |
- function(.data, min_acc, max_acc, groupBy = NULL, |
|
126 |
+ function(.data, min_acc, max_acc, groupBy = conds(), |
|
127 | 127 |
variation = "cover", ...) |
128 | 128 |
{ |
129 | 129 |
val <- value(.data) |
... | ... |
@@ -40,8 +40,8 @@ |
40 | 40 |
#' init_gmql() |
41 | 41 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
42 | 42 |
#' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
43 |
-#' data1 = read_dataset(test_path) |
|
44 |
-#' data2 = read_dataset(test_path2) |
|
43 |
+#' data1 = read_GMQL(test_path) |
|
44 |
+#' data2 = read_GMQL(test_path2) |
|
45 | 45 |
#' |
46 | 46 |
#' ## This GMQL statement returns all the regions in the first dataset |
47 | 47 |
#' ## that do not overlap any region in the second dataset. |
... | ... |
@@ -61,7 +61,7 @@ |
61 | 61 |
#' @aliases setdiff-method |
62 | 62 |
#' @export |
63 | 63 |
setMethod("setdiff", c("GMQLDataset","GMQLDataset"), |
64 |
- function(x, y, joinBy = NULL, is_exact = FALSE) |
|
64 |
+ function(x, y, joinBy = conds(), is_exact = FALSE) |
|
65 | 65 |
{ |
66 | 66 |
ptr_data_x = value(x) |
67 | 67 |
ptr_data_y = value(y) |
... | ... |
@@ -38,7 +38,7 @@ |
38 | 38 |
#' |
39 | 39 |
#' init_gmql() |
40 | 40 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
41 |
-#' data <- read_dataset(test_path) |
|
41 |
+#' data <- read_GMQL(test_path) |
|
42 | 42 |
#' |
43 | 43 |
#' ## This statement counts the regions in each sample and stores their number |
44 | 44 |
#' ## as value of the new metadata attribute RegionCount of the sample. |
... | ... |
@@ -1,5 +1,5 @@ |
1 |
-group_by.GMQLDateset <- function(.data, groupBy_meta = NULL, |
|
2 |
- groupBy_regions = NULL, region_aggregates = NULL, meta_aggregates = NULL) |
|
1 |
+group_by.GMQLDateset <- function(.data, groupBy_meta = conds(), |
|
2 |
+ groupBy_regions = c(""), region_aggregates = NULL, meta_aggregates = NULL) |
|
3 | 3 |
{ |
4 | 4 |
ptr_data = value(.data) |
5 | 5 |
gmql_group(ptr_data, groupBy_meta, groupBy_regions, region_aggregates, |
... | ... |
@@ -18,7 +18,7 @@ group_by.GMQLDateset <- function(.data, groupBy_meta = NULL, |
18 | 18 |
#' groupBy or JoinBy input parameter |
19 | 19 |
#' |
20 | 20 |
#' @param groupBy_regions vector of string made up by schema field attribute |
21 |
-#' @param region_aggregates It accept a series of aggregate function on |
|
21 |
+#' @param region_aggregates It accept a list of aggregate function on |
|
22 | 22 |
#' region attribute. |
23 | 23 |
#' All the element in the form \emph{key} = \emph{aggregate}. |
24 | 24 |
#' The \emph{aggregate} is an object of class AGGREGATES |
... | ... |
@@ -36,7 +36,7 @@ group_by.GMQLDateset <- function(.data, groupBy_meta = NULL, |
36 | 36 |
#' \item list of values: e.g. SUM("pvalue") |
37 | 37 |
#' } |
38 | 38 |
#' "mixed style" is not allowed |
39 |
-#' @param meta_aggregates It accept a series of aggregate function on |
|
39 |
+#' @param meta_aggregates It accept a list of aggregate function on |
|
40 | 40 |
#' metadata attribute. |
41 | 41 |
#' All the element in the form \emph{key} = \emph{aggregate}. |
42 | 42 |
#' The \emph{aggregate} is an object of class AGGREGATES |
... | ... |
@@ -61,6 +61,57 @@ group_by.GMQLDateset <- function(.data, groupBy_meta = NULL, |
61 | 61 |
#' |
62 | 62 |
#' @examples |
63 | 63 |
#' |
64 |
+#' ## This statement initializes and runs the GMQL server for local execution |
|
65 |
+#' ## and creation of results on disk. Then, with system.file() it defines |
|
66 |
+#' ## the path to the folder "DATASET" in the subdirectory "example" |
|
67 |
+#' ## of the package "RGMQL" and opens such file as a GMQL dataset named "exp" |
|
68 |
+#' ## using customParser |
|
69 |
+#' |
|
70 |
+#' init_gmql() |
|
71 |
+#' test_path <- system.file("example","DATASET",package = "RGMQL") |
|
72 |
+#' exp = read_GMQL(test_path) |
|
73 |
+#' |
|
74 |
+#' ## This GMQL statement groups samples of the input 'exp' dataset according to |
|
75 |
+#' ## their value of the metadata attribute 'tumor_type' and computes the |
|
76 |
+#' ## maximum value that the metadata attribute size takes inside the samples |
|
77 |
+#' ## belonging to each group. The samples in the output GROUPS_T dataset |
|
78 |
+#' ## have a new _group metadata attribute which indicates which group they |
|
79 |
+#' ## belong to, based on the grouping on the metadata attribute tumor_type. |
|
80 |
+#' ## In addition, they present the new metadata aggregate attribute MaxSize. |
|
81 |
+#' ## Note that the samples without metadata attribute tumor_type are assigned |
|
82 |
+#' ## to a single group with _group value equal 0 |
|
83 |
+#' |
|
84 |
+#' GROUPS_T = group_by(exp, conds("tumor_type"), |
|
85 |
+#' meta_aggregates = list(max_size = MAX("size"))) |
|
86 |
+#' |
|
87 |
+#' ## This GMQL statement takes as input dataset the same input dataset as |
|
88 |
+#' ## the previous example. Yet, it calculates new _group values based on the |
|
89 |
+#' ## grouping attribute 'cell', and adds the metadata aggregate attribute |
|
90 |
+#' ## 'n_samp', which counts the number of samples belonging to the respective |
|
91 |
+#' ## group. It has the following output GROUPS_C dataset samples |
|
92 |
+#' ## (note that now no sample has metadata attribute _group with value equal 0 |
|
93 |
+#' ## since all input samples include the metadata attribute cell, |
|
94 |
+#' ## with different values, on which the new grouping is based) |
|
95 |
+#' |
|
96 |
+#' GROUPS_C = group_by(exp, conds("cell"), |
|
97 |
+#' meta_aggregates = list(n_samp AS COUNTSAMP())) |
|
98 |
+#' |
|
99 |
+#' ## This GMQL statement groups the regions of each 'exp' dataset sample by |
|
100 |
+#' ## region coordinates chr, left, right, strand (these are implicitly |
|
101 |
+#' ## considered) and the additional region attribute score (which is explicitly |
|
102 |
+#' ## specified), and keeps only one region for each group. |
|
103 |
+#' ## In the output GROUPS dataset schema, the new region attributes |
|
104 |
+#' ## avg_pvalue and max_qvalue are added, respectively computed as the |
|
105 |
+#' ## average of the values taken by the pvalue and the maximum of the values |
|
106 |
+#' ## taken by the qvalue region attributes in the regions grouped together, |
|
107 |
+#' ## and the computed value is assigned to each region of each output sample. |
|
108 |
+#' ## Note that the region attributes which are not coordinates or score are |
|
109 |
+#' ## discarded. |
|
110 |
+#' |
|
111 |
+#' GROUPS = group_by(exp, group_reg = "score", |
|
112 |
+#' region_aggregates = list(avg_pvalue = AVG("pvalue"), |
|
113 |
+#' max_qvalue = MAX("qvalue"))) |
|
114 |
+#' |
|
64 | 115 |
#' @name group_by |
65 | 116 |
#' @rdname group_by |
66 | 117 |
#' @aliases group_by,GMQLDataset-method |
... | ... |
@@ -73,8 +73,8 @@ |
73 | 73 |
#' init_gmql() |
74 | 74 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
75 | 75 |
#' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
76 |
-#' TSS = read_dataset(test_path) |
|
77 |
-#' HM = read_dataset(test_path2) |
|
76 |
+#' TSS = read_GMQL(test_path) |
|
77 |
+#' HM = read_GMQL(test_path2) |
|
78 | 78 |
#' |
79 | 79 |
#' ## Given a dataset 'HM' and one called 'TSS' with a sample including |
80 | 80 |
#' ## Transcription Start Site annotations, it searches for those regions of HM |
... | ... |
@@ -83,10 +83,8 @@ |
83 | 83 |
#' ## is lesser than 120K bases and joined 'tss' and 'hm' samples are obtained |
84 | 84 |
#' ## from the same provider (joinby clause). |
85 | 85 |
#' |
86 |
-#' |
|
87 |
-#' join_data = merge(TSS, HM, |
|
88 |
-#' genometric_predicate = list(MD(1), DLE(120000)), DF("provider"), |
|
89 |
-#' region_output = "RIGHT") |
|
86 |
+#' join_data = merge(TSS, HM, genometric_predicate = list(MD(1), DLE(120000)), |
|
87 |
+#' conds("provider"), region_output = "RIGHT") |
|
90 | 88 |
#' |
91 | 89 |
#' |
92 | 90 |
#' @name merge |
... | ... |
@@ -95,7 +93,7 @@ |
95 | 93 |
#' @export |
96 | 94 |
setMethod("merge", c("GMQLDataset","GMQLDataset"), |
97 | 95 |
function(x, y, genometric_predicate = NULL, |
98 |
- region_output = "CAT", joinBy = NULL, reg_attr = NULL) |
|
96 |
+ region_output = "CAT", joinBy = conds(), reg_attr = c("")) |
|
99 | 97 |
{ |
100 | 98 |
ptr_data_x <- value(x) |
101 | 99 |
ptr_data_y <- value(y) |
... | ... |
@@ -150,8 +148,8 @@ gmql_join <- function(left_data, right_data, genometric_predicate, joinBy, |
150 | 148 |
|
151 | 149 |
if(!length(reg_attributes)) |
152 | 150 |
reg_attributes <- .jnull("java/lang/String") |
153 |
- |
|
154 |
- reg_attributes <- .jarray(reg_attributes) |
|
151 |
+ else |
|
152 |
+ reg_attributes <- .jarray(reg_attributes, dispatch = TRUE) |
|
155 | 153 |
} |
156 | 154 |
else |
157 | 155 |
reg_attributes <- .jnull("java/lang/String") |
... | ... |
@@ -58,8 +58,8 @@ |
58 | 58 |
#' init_gmql() |
59 | 59 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
60 | 60 |
#' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
61 |
-#' exp = read_dataset(test_path) |
|
62 |
-#' ref = read_dataset(test_path2) |
|
61 |
+#' exp = read_GMQL(test_path) |
|
62 |
+#' ref = read_GMQL(test_path2) |
|
63 | 63 |
#' |
64 | 64 |
#' # It counts the number of regions in each sample from exp that overlap with |
65 | 65 |
#' # a ref region, and for each ref region it computes the minimum score |
... | ... |
@@ -77,7 +77,7 @@ |
77 | 77 |
#' @aliases map-method |
78 | 78 |
#' @export |
79 | 79 |
setMethod("map", "GMQLDataset", |
80 |
- function(x, y, ..., joinBy = NULL, count_name = "") |
|
80 |
+ function(x, y, ..., joinBy = conds(), count_name = "") |
|
81 | 81 |
{ |
82 | 82 |
left_data <- value(x) |
83 | 83 |
right_data <- value(y) |
... | ... |
@@ -16,7 +16,7 @@ |
16 | 16 |
#' |
17 | 17 |
#' init_gmql() |
18 | 18 |
#' test_path <- system.file("example","DATASET",package = "RGMQL") |
19 |
-#' data = read_dataset(test_path) |
|
19 |
+#' data = read_GMQL(test_path) |
|
20 | 20 |
#' |
21 | 21 |
#' ## The following statement materialize the dataset, previoulsy read, at |
22 | 22 |
#' ## th specific destination path into local folder "ds1" opportunely created |
... | ... |
@@ -116,7 +116,7 @@ collect.GMQLDataset <- function(x, dir_out = getwd(), name = "ds1") |
116 | 116 |
#' |
117 | 117 |
#' init_gmql() |
118 | 118 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
119 |
-#' data = read_dataset(test_path) |
|
119 |
+#' data = read_GMQL(test_path) |
|
120 | 120 |
#' |
121 | 121 |
#' ## The following statement materialize the dataset, previoulsy read, at |
122 | 122 |
#' ## th specific destination path into local folder "ds1" opportunely created |
... | ... |
@@ -188,9 +188,9 @@ gmql_materialize <- function(input_data, dir_out, name) |
188 | 188 |
#' |
189 | 189 |
#' init_gmql() |
190 | 190 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
191 |
-#' rd = read_dataset(test_path) |
|
191 |
+#' rd = read_GMQL(test_path) |
|
192 | 192 |
#' |
193 |
-#' aggr = aggregate(rd, list(DF("antibody_target", "cell_karyotype"))) |
|
193 |
+#' aggr = aggregate(rd, conds(c("antibody_target", "cell_karyotype"))) |
|
194 | 194 |
#' taken <- take(aggr, rows = 45) |
195 | 195 |
#' |
196 | 196 |
#' @name take |
... | ... |
@@ -31,7 +31,7 @@ |
31 | 31 |
#' |
32 | 32 |
#' init_gmql() |
33 | 33 |
#' test_path <- system.file("example","DATASET",package = "RGMQL") |
34 |
-#' exp = read_dataset(test_path) |
|
34 |
+#' exp = read_GMQL(test_path) |
|
35 | 35 |
#' |
36 | 36 |
#' ## This statement creates a dataset called merged which contains one |
37 | 37 |
#' ## sample for each antibody_target and cell value found within the metadata |
... | ... |
@@ -49,7 +49,7 @@ |
49 | 49 |
#' @export |
50 | 50 |
#' |
51 | 51 |
setMethod("aggregate", "GMQLDataset", |
52 |
- function(x, groupBy = NULL) |
|
52 |
+ function(x, groupBy = conds()) |
|
53 | 53 |
{ |
54 | 54 |
ptr_data = value(x) |
55 | 55 |
gmql_merge(ptr_data, groupBy) |
... | ... |
@@ -1,6 +1,6 @@ |
1 | 1 |
arrange.GMQLDataset <- function(.data, metadata_ordering = NULL, |
2 |
- regions_ordering = NULL, fetch_opt = NULL, num_fetch = 0L, |
|
3 |
- reg_fetch_opt = NULL, reg_num_fetch = 0L) |
|
2 |
+ regions_ordering = NULL, fetch_opt = "", num_fetch = 0L, |
|
3 |
+ reg_fetch_opt = "", reg_num_fetch = 0L) |
|
4 | 4 |
{ |
5 | 5 |
ptr_data <- value(.data) |
6 | 6 |
gmql_order(ptr_data, metadata_ordering, regions_ordering, |
... | ... |
@@ -70,7 +70,7 @@ arrange.GMQLDataset <- function(.data, metadata_ordering = NULL, |
70 | 70 |
#' |
71 | 71 |
#' init_gmql() |
72 | 72 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
73 |
-#' data = read_dataset(test_path) |
|
73 |
+#' data = read_GMQL(test_path) |
|
74 | 74 |
#' |
75 | 75 |
#' ## The following statement orders the samples according to the Region_Count |
76 | 76 |
#' ## metadata attribute and takes the two samples that have the highest count. |
... | ... |
@@ -88,7 +88,7 @@ setMethod("arrange", "GMQLDataset", arrange.GMQLDataset) |
88 | 88 |
gmql_order <- function(input_data, metadata_ordering, regions_ordering, |
89 | 89 |
fetch_opt, num_fetch, reg_fetch_opt, reg_num_fetch) |
90 | 90 |
{ |
91 |
- if(!is.null(fetch_opt)) |
|
91 |
+ if(!is.null(fetch_opt) && !identical(fetch_opt,"")) |
|
92 | 92 |
fetch_opt <- .check_option(fetch_opt) |
93 | 93 |
else |
94 | 94 |
fetch_opt <- .jnull("java/lang/String") |
... | ... |
@@ -103,7 +103,7 @@ gmql_order <- function(input_data, metadata_ordering, regions_ordering, |
103 | 103 |
else |
104 | 104 |
reg_num_fetch <- 0L |
105 | 105 |
|
106 |
- if(!is.null(reg_fetch_opt)) |
|
106 |
+ if(!is.null(reg_fetch_opt) && !identical(reg_fetch_opt,"")) |
|
107 | 107 |
reg_fetch_opt <- .check_option(reg_fetch_opt) |
108 | 108 |
else |
109 | 109 |
reg_fetch_opt <- .jnull("java/lang/String") |
... | ... |
@@ -85,7 +85,7 @@ select.GMQLDataset <- function(.data, metadata = NULL, metadata_update = NULL, |
85 | 85 |
#' |
86 | 86 |
#' init_gmql() |
87 | 87 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
88 |
-#' data = read_dataset(test_path) |
|
88 |
+#' data = read_GMQL(test_path) |
|
89 | 89 |
#' |
90 | 90 |
#' ## It creates a new dataset called CTCF_NORM_SCORE by preserving all |
91 | 91 |
#' ## region attributes apart from score, and creating a new region attribute |
... | ... |
@@ -49,24 +49,26 @@ |
49 | 49 |
#' |
50 | 50 |
#' init_gmql() |
51 | 51 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
52 |
-#' data = read_dataset(test_path) |
|
52 |
+#' data = read_GMQL(test_path) |
|
53 | 53 |
#' |
54 | 54 |
#' ## This statement opens such folder as a GMQL dataset named "data" using |
55 | 55 |
#' ## "NarrowPeakParser" |
56 |
-#' dataPeak = read_dataset(test_path,"NarrowPeakParser") |
|
56 |
+#' dataPeak = read_GMQL(test_path,"NarrowPeakParser") |
|
57 | 57 |
#' |
58 | 58 |
#' ## This statement reads a remote public dataset stored into GMQL system |
59 | 59 |
#' ## repository. For a public dataset in a (remote) GMQL repository the |
60 | 60 |
#' ## prefix "public." is needed before dataset name |
61 | 61 |
#' |
62 |
-#' data1 = read_dataset("public.Example_Dataset1",is_local = FALSE) |
|
62 |
+#' remote_url = "http://genomic.deib.polimi.it/gmql-rest-r/" |
|
63 |
+#' login_gmql(remote_url) |
|
64 |
+#' data1 = read_GMQL("public.Example_Dataset1",is_local = FALSE) |
|
63 | 65 |
#' |
64 |
-#' @name read_dataset |
|
66 |
+#' @name read_GMQL |
|
65 | 67 |
#' @rdname read-function |
66 | 68 |
#' @export |
67 | 69 |
#' |
68 |
-read_dataset <- function(dataset, parser = "CustomParser", is_local=TRUE, |
|
69 |
- is_GMQL=TRUE) |
|
70 |
+read_GMQL <- function(dataset, parser = "CustomParser", is_local = TRUE, |
|
71 |
+ is_GMQL = TRUE) |
|
70 | 72 |
{ |
71 | 73 |
.check_input(dataset) |
72 | 74 |
.check_logical(is_local) |
... | ... |
@@ -128,11 +130,11 @@ read_dataset <- function(dataset, parser = "CustomParser", is_local=TRUE, |
128 | 130 |
#' |
129 | 131 |
#' @param samples GRangesList |
130 | 132 |
#' |
131 |
-#' @name read_dataset |
|
133 |
+#' @name read_GMQL |
|
132 | 134 |
#' @rdname read-function |
133 | 135 |
#' @export |
134 | 136 |
#' |
135 |
-read <- function(samples) |
|
137 |
+read_GRangesList <- function(samples) |
|
136 | 138 |
{ |
137 | 139 |
if(!is(samples,"GRangesList")) |
138 | 140 |
stop("only GrangesList") |
... | ... |
@@ -70,7 +70,7 @@ filter.GMQLDateset <- function(.data, m_predicate = NULL, r_predicate = NULL, |
70 | 70 |
#' |
71 | 71 |
#' init_gmql() |
72 | 72 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
73 |
-#' data <- read_dataset(test_path) |
|
73 |
+#' data <- read_GMQL(test_path) |
|
74 | 74 |
#' |
75 | 75 |
#' ## This statement selects from input data samples of patients younger |
76 | 76 |
#' ## than 70 years old, based on filtering on sample metadata attribute |
... | ... |
@@ -83,7 +83,7 @@ filter.GMQLDateset <- function(.data, m_predicate = NULL, r_predicate = NULL, |
83 | 83 |
#' ## as a GMQL dataset named "join_data" |
84 | 84 |
#' |
85 | 85 |
#' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
86 |
-#' join_data <- read_dataset(test_path2) |
|
86 |
+#' join_data <- read_GMQL(test_path2) |
|
87 | 87 |
#' |
88 | 88 |
#' ## This statement creates a new dataset called 'jun_tf' by selecting those |
89 | 89 |
#' ## samples and their regions from the existing 'data' dataset such that: |
... | ... |
@@ -94,10 +94,10 @@ filter.GMQLDateset <- function(.data, m_predicate = NULL, r_predicate = NULL, |
94 | 94 |
#' ## attribute equally called cell has in at least one sample |
95 | 95 |
#' ## of the 'join_data' dataset. |
96 | 96 |
#' ## For each sample satisfying previous conditions, only its regions that |
97 |
-#' ## have a region attribute called pValue with the associated value |
|
97 |
+#' ## have a region attribute called 'pvalue' with the associated value |
|
98 | 98 |
#' ## less than 0.01 are conserved in output |
99 | 99 |
#' |
100 |
-#' jun_tf <- filter(data, antibody_target == "JUN", pValue < 0.01, |
|
100 |
+#' jun_tf <- filter(data, antibody_target == "JUN", pvalue < 0.01, |
|
101 | 101 |
#' semijoin(join_data, TRUE, conds("cell"))) |
102 | 102 |
#' |
103 | 103 |
#' |
... | ... |
@@ -147,16 +147,8 @@ gmql_select <- function(input_data, predicate, region_predicate, s_join) |
147 | 147 |
#' common with the same attributes defined in one sample of '.data' |
148 | 148 |
#' FALSE => semijoin condition is evaluated accordingly. |
149 | 149 |
#' |
150 |
-#' @param groupBy list of evalation functions to define evaluation on metadata: |
|
151 |
-#' \itemize{ |
|
152 |
-#' \item{ \code{\link{FN}}(value): Fullname evaluation, two attributes match |
|
153 |
-#' if they both end with \emph{value} and, if they have further prefixes, |
|
154 |
-#' the two prefix sequence are identical.} |
|
155 |
-#' \item{ \code{\link{EX}}(value): Exact evaluation, only attributes exactly |
|
156 |
-#' as \emph{value} match; no further prefixes are allowed.} |
|
157 |
-#' \item{ \code{\link{DF}}(value): Default evaluation, the two attributes match |
|
158 |
-#' if both end with \emph{value}.} |
|
159 |
-#' } |
|
150 |
+#' @param groupBy \code{\link{condition_evaluation}} function to support |
|
151 |
+#' methods with groupBy or JoinBy input paramter |
|
160 | 152 |
#' |
161 | 153 |
#' @examples |
162 | 154 |
#' |
... | ... |
@@ -169,8 +161,8 @@ gmql_select <- function(input_data, predicate, region_predicate, s_join) |
169 | 161 |
#' init_gmql() |
170 | 162 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
171 | 163 |
#' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
172 |
-#' data <- read_dataset(test_path) |
|
173 |
-#' join_data <- read_dataset(test_path2) |
|
164 |
+#' data <- read_GMQL(test_path) |
|
165 |
+#' join_data <- read_GMQL(test_path2) |
|
174 | 166 |
#' |
175 | 167 |
#' # It creates a new dataset called 'jun_tf' by selecting those samples and |
176 | 168 |
#' # their regions from the existing 'data' dataset such that: |
... | ... |
@@ -184,7 +176,7 @@ gmql_select <- function(input_data, predicate, region_predicate, s_join) |
184 | 176 |
#' # have a region attribute called pValue with the associated value |
185 | 177 |
#' # less than 0.01 are conserved in output |
186 | 178 |
#' |
187 |
-#' jun_tf <- filter(data, antibody_target == "JUN", pValue < 0.01, |
|
179 |
+#' jun_tf <- filter(data, antibody_target == "JUN", pvalue < 0.01, |
|
188 | 180 |
#' semijoin(join_data, TRUE, conds("cell"))) |
189 | 181 |
#' |
190 | 182 |
#' @return semijoin condition as list |
... | ... |
@@ -37,8 +37,8 @@ |
37 | 37 |
#' init_gmql() |
38 | 38 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
39 | 39 |
#' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
40 |
-#' data1 <- read_dataset(test_path) |
|
41 |
-#' data2 <- read_dataset(test_path2) |
|
40 |
+#' data1 <- read_GMQL(test_path) |
|
41 |
+#' data2 <- read_GMQL(test_path2) |
|
42 | 42 |
#' |
43 | 43 |
#' ## This statement creates a dataset called 'full' which contains all samples |
44 | 44 |
#' ## from the datasets 'data1' and 'data2' |
... | ... |
@@ -1,6 +1,6 @@ |
1 |
-#' @importFrom rJava .jpackage |
|
2 |
-#' @importFrom rJava .jinit |
|
3 |
- |
|
1 |
+#' @importFrom rJava .jpackage .jinit |
|
2 |
+#' @import RGMQLScalaLib |
|
3 |
+#' |
|
4 | 4 |
.onLoad <- function(libname, pkgname) { |
5 | 5 |
.jpackage(pkgname, lib.loc = libname) |
6 | 6 |
# tools::vignetteEngine("knitr", pattern = "[.]Rmd$", package = "knitr") |
... | ... |
@@ -16,7 +16,6 @@ |
16 | 16 |
} |
17 | 17 |
|
18 | 18 |
|
19 |
-#' @importFrom utils download.file |
|
20 | 19 |
#' @importFrom rJava .jinit .jaddClassPath |
21 | 20 |
#' |
22 | 21 |
initGMQLscalaAPI <- function(libLoc, mem = "12G") { |
... | ... |
@@ -28,7 +28,7 @@ |
28 | 28 |
#' |
29 | 29 |
#' init_gmql() |
30 | 30 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
31 |
-#' data = read_dataset(test_path) |
|
31 |
+#' data = read_GMQL(test_path) |
|
32 | 32 |
#' |
33 | 33 |
#' ## It orders the samples according to the Region_Count metadata attribute |
34 | 34 |
#' ## and takes the two samples that have the lowest count. |
... | ... |
@@ -25,7 +25,7 @@ if(getRversion() >= "3.1.0") |
25 | 25 |
#' @param password string password used during signup |
26 | 26 |
#' |
27 | 27 |
#' @details |
28 |
-#' If both username and password are NULL you will be logged as guest. |
|
28 |
+#' If both username and password are missing you will be logged as guest. |
|
29 | 29 |
#' After login you will receive an authentication token. |
30 | 30 |
#' As token remains vaild on server (until the next login / registration) |
31 | 31 |
#' a user can safely use a token for a previous session as a convenience; |
... | ... |
@@ -202,7 +202,7 @@ show_queries_list <- function(url) |
202 | 202 |
#' @param url string url of server: It must contain the server address |
203 | 203 |
#' and base url; service name is added automatically |
204 | 204 |
#' @param queryName string name of query |
205 |
-#' @param query string text of GMQL query |
|
205 |
+#' @param queryTxt string text of GMQL query |
|
206 | 206 |
#' |
207 | 207 |
#' @return None |
208 | 208 |
#' |
... | ... |
@@ -6,6 +6,8 @@ |
6 | 6 |
\alias{AGGREGATES-Object} |
7 | 7 |
\alias{COUNT} |
8 | 8 |
\alias{AGGREGATES-Object} |
9 |
+\alias{COUNTSAMP} |
|
10 |
+\alias{AGGREGATES-Object} |
|
9 | 11 |
\alias{MIN} |
10 | 12 |
\alias{AGGREGATES-Object} |
11 | 13 |
\alias{MAX} |
... | ... |
@@ -31,6 +33,8 @@ SUM(value) |
31 | 33 |
|
32 | 34 |
COUNT() |
33 | 35 |
|
36 |
+COUNTSAMP() |
|
37 |
+ |
|
34 | 38 |
MIN(value) |
35 | 39 |
|
36 | 40 |
MAX(value) |
... | ... |
@@ -67,6 +71,8 @@ to be used in GMQL functions that require aggregate on value. |
67 | 71 |
function sum, performing all the type conversions needed } |
68 | 72 |
\item{COUNT: It prepares input parameter to be passed to the library |
69 | 73 |
function count, performing all the type conversions needed } |
74 |
+\item{COUNTSAMP: It prepares input parameter to be passed to the library |
|
75 |
+function third quartile, performing all the type conversions needed } |
|
70 | 76 |
\item{MIN: It prepares input parameter to be passed to the library |
71 | 77 |
function minimum, performing all the type conversions needed } |
72 | 78 |
\item{MAX: It prepares input parameter to be passed to the library |
... | ... |
@@ -101,7 +107,7 @@ function third quartile, performing all the type conversions needed } |
101 | 107 |
|
102 | 108 |
init_gmql() |
103 | 109 |
test_path <- system.file("example", "DATASET", package = "RGMQL") |
104 |
-exp = read_dataset(test_path) |
|
110 |
+exp = read_GMQL(test_path) |
|
105 | 111 |
|
106 | 112 |
## This statement copies all samples of exp dataset into res dataset, and |
107 | 113 |
## then calculates new metadata attribute for each of them: |
... | ... |
@@ -126,8 +132,7 @@ res = extend(exp, max_score = MAX("score")) |
126 | 132 |
## attribute the average signal of the overlapping regions; |
127 | 133 |
## the result has one sample for each input cell. |
128 | 134 |
|
129 |
-res = cover(exp, 2, 3, groupBy = list(DF("cell")), |
|
130 |
-avg_signal = AVG("signal") ) |
|
135 |
+res = cover(exp, 2, 3, groupBy = conds("cell"), avg_signal = AVG("signal")) |
|
131 | 136 |
|
132 | 137 |
## This statement copies all samples of DATA dataset into OUT dataset, |
133 | 138 |
## and then for each of them it adds another metadata attribute, allScores, |
... | ... |
@@ -6,7 +6,7 @@ |
6 | 6 |
\alias{aggregate,GMQLDataset-method} |
7 | 7 |
\title{Method aggregate} |
8 | 8 |
\usage{ |
9 |
-\S4method{aggregate}{GMQLDataset}(x, groupBy = NULL) |
|
9 |
+\S4method{aggregate}{GMQLDataset}(x, groupBy = conds()) |
|
10 | 10 |
} |
11 | 11 |
\arguments{ |
12 | 12 |
\item{x}{GMQLDataset class object} |
... | ... |
@@ -40,7 +40,7 @@ not present in the grouping metadata parameter are disregarded. |
40 | 40 |
|
41 | 41 |
init_gmql() |
42 | 42 |
test_path <- system.file("example","DATASET",package = "RGMQL") |
43 |
-exp = read_dataset(test_path) |
|
43 |
+exp = read_GMQL(test_path) |
|
44 | 44 |
|
45 | 45 |
## This statement creates a dataset called merged which contains one |
46 | 46 |
## sample for each antibody_target and cell value found within the metadata |
... | ... |
@@ -49,6 +49,6 @@ exp = read_dataset(test_path) |
49 | 49 |
## antibody_target and cell metadata |
50 | 50 |
## attributes. |
51 | 51 |
|
52 |
-merged = aggregate(exp, condition_evaluation(c("antibody_target","cell"))) |
|
52 |
+merged = aggregate(exp, conds(c("antibody_target","cell"))) |
|
53 | 53 |
|
54 | 54 |
} |
... | ... |
@@ -7,8 +7,8 @@ |
7 | 7 |
\title{Method arrange} |
8 | 8 |
\usage{ |
9 | 9 |
\S4method{arrange}{GMQLDataset}(.data, metadata_ordering = NULL, |
10 |
- regions_ordering = NULL, fetch_opt = NULL, num_fetch = 0L, |
|
11 |
- reg_fetch_opt = NULL, reg_num_fetch = 0L) |
|
10 |
+ regions_ordering = NULL, fetch_opt = "", num_fetch = 0L, |
|
11 |
+ reg_fetch_opt = "", reg_num_fetch = 0L) |
|
12 | 12 |
} |
13 | 13 |
\arguments{ |
14 | 14 |
\item{.data}{GMQLDataset class object} |
... | ... |
@@ -25,8 +25,8 @@ The functions available are: \code{\link{ASC}}, \code{\link{DESC}}.} |
25 | 25 |
first k samples; it can assume the values: |
26 | 26 |
\itemize{ |
27 | 27 |
\item{mtop: it fetches the first k samples} |
28 |
-\item{mtopg: it fetches the percentage of samples.} |
|
29 |
-\item{mtopp: it fetches the first k samples in each group.} |
|
28 |
+\item{mtopp: it fetches the first k percentage of samples.} |
|
29 |
+\item{mtopg: it fetches the first k samples in each group.} |
|
30 | 30 |
|
31 | 31 |
} |
32 | 32 |
if NULL, \emph{num_fetch} is not considered} |
... | ... |
@@ -39,8 +39,8 @@ s} |
39 | 39 |
first k regions; it can assume the values: |
40 | 40 |
\itemize{ |
41 | 41 |
\item{rtop: it fetches the first k regions.} |
42 |
-\item{rtopg: it fetches the first k percentage of regions.} |
|
43 |
-\item{rtopp: it fetches the first k regions in each group.} |
|
42 |
+\item{rtopp: it fetches the first k percentage of regions.} |
|
43 |
+\item{rtopg: it fetches the first k regions in each group.} |
|
44 | 44 |
} |
45 | 45 |
if NULL, \emph{reg_num_fetch} is not considered} |
46 | 46 |
|
... | ... |
@@ -73,7 +73,7 @@ added to either metadata, or regions, or both of them as specified in inputs |
73 | 73 |
|
74 | 74 |
init_gmql() |
75 | 75 |
test_path <- system.file("example", "DATASET", package = "RGMQL") |
76 |
-data = read_dataset(test_path) |
|
76 |
+data = read_GMQL(test_path) |
|
77 | 77 |
|
78 | 78 |
## The following statement orders the samples according to the Region_Count |
79 | 79 |
## metadata attribute and takes the two samples that have the highest count. |
... | ... |
@@ -42,7 +42,7 @@ folder path |
42 | 42 |
|
43 | 43 |
init_gmql() |
44 | 44 |
test_path <- system.file("example", "DATASET", package = "RGMQL") |
45 |
-data = read_dataset(test_path) |
|
45 |
+data = read_GMQL(test_path) |
|
46 | 46 |
|
47 | 47 |
## The following statement materialize the dataset, previoulsy read, at |
48 | 48 |
## th specific destination path into local folder "ds1" opportunely created |
... | ... |
@@ -35,7 +35,7 @@ considered.} |
35 | 35 |
|
36 | 36 |
init_gmql() |
37 | 37 |
test_path <- system.file("example", "DATASET", package = "RGMQL") |
38 |
-exp = read_dataset(test_path) |
|
38 |
+exp = read_GMQL(test_path) |
|
39 | 39 |
|
40 | 40 |
## The following statement produces an output dataset with a single |
41 | 41 |
## output sample. The COVER operation considers all areas defined by |
... | ... |
@@ -9,7 +9,7 @@ |
9 | 9 |
\usage{ |
10 | 10 |
cover(.data, ...) |
11 | 11 |
|
12 |
-\S4method{cover}{GMQLDataset}(.data, min_acc, max_acc, groupBy = NULL, |
|
12 |
+\S4method{cover}{GMQLDataset}(.data, min_acc, max_acc, groupBy = conds(), |
|
13 | 13 |
variation = "cover", ...) |
14 | 14 |
} |
15 | 15 |
\arguments{ |
... | ... |
@@ -54,8 +54,8 @@ considering any amount of overlapping regions. |
54 | 54 |
ALL() / K, with N and K integer values } |
55 | 55 |
}} |
56 | 56 |
|
57 |
-\item{groupBy}{\code{\link{condition_evaluation}} function to support |
|
58 |
-methods with groupBy or JoinBy input paramter} |
|
57 |
+\item{groupBy}{\code{\link{conds}} function to support methods with |
|
58 |
+groupBy or JoinBy input parameter} |
|
59 | 59 |
|
60 | 60 |
\item{variation}{string identifying the cover GMQL operator variation. |
61 | 61 |
The admissible strings are: |
... | ... |
@@ -114,7 +114,7 @@ are disregarded. |
114 | 114 |
|
115 | 115 |
init_gmql() |
116 | 116 |
test_path <- system.file("example","DATASET",package = "RGMQL") |
117 |
-exp = read_dataset(test_path) |
|
117 |
+exp = read_GMQL(test_path) |
|
118 | 118 |
|
119 | 119 |
## the following statement produces an output dataset with a single output |
120 | 120 |
## sample. The COVER operation considers all areas defined by a minimum |
... | ... |
@@ -131,7 +131,6 @@ res = cover(exp, 2, ANY()) |
131 | 131 |
## regions the minimum pvalue of the overlapping regions (min_pvalue) |
132 | 132 |
## and their Jaccard indexes (JaccardIntersect and JaccardResult). |
133 | 133 |
|
134 |
-res = cover(exp, 2, 3, groupBy = condition_evaluation(c("cell")), |
|
135 |
-min_pValue = MIN("pvalue")) |
|
134 |
+res = cover(exp, 2, 3, groupBy = conds("cell"), min_pValue = MIN("pvalue")) |
|
136 | 135 |
|
137 | 136 |
} |
... | ... |
@@ -89,8 +89,8 @@ directions of the genome.} |
89 | 89 |
init_gmql() |
90 | 90 |
test_path <- system.file("example", "DATASET", package = "RGMQL") |
91 | 91 |
test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
92 |
-TSS = read_dataset(test_path) |
|
93 |
-HM = read_dataset(test_path2) |
|
92 |
+TSS = read_GMQL(test_path) |
|
93 |
+HM = read_GMQL(test_path2) |
|
94 | 94 |
|
95 | 95 |
## Given a dataset HM and one called TSS with a sample including |
96 | 96 |
## Transcription Start Site annotations, it searches for those regions of HM |
... | ... |
@@ -100,7 +100,7 @@ HM = read_dataset(test_path2) |
100 | 100 |
## obtained from the same provider (joinby clause). |
101 | 101 |
|
102 | 102 |
join_data = merge(TSS, HM, |
103 |
-genometric_predicate = list(MD(1), DL(1200)), DF("provider"), |
|
103 |
+genometric_predicate = list(MD(1), DL(1200)), conds("provider"), |
|
104 | 104 |
region_output = "RIGHT") |
105 | 105 |
|
106 | 106 |
## Given a dataset 'HM' and one called 'TSS' with a sample including |
... | ... |
@@ -111,7 +111,7 @@ region_output = "RIGHT") |
111 | 111 |
## from the same provider (joinby clause). |
112 | 112 |
|
113 | 113 |
join_data = merge(TSS, HM, |
114 |
-genometric_predicate = list(MD(1), DGE(12000), DOWN()), |
|
115 |
-DF("provider"), region_output = "RIGHT") |
|
114 |
+genometric_predicate = list(MD(1), DGE(12000), DOWN()), conds("provider"), |
|
115 |
+region_output = "RIGHT") |
|
116 | 116 |
|
117 | 117 |
} |
... | ... |
@@ -22,7 +22,7 @@ The function works only after invoking at least one collect |
22 | 22 |
|
23 | 23 |
init_gmql() |
24 | 24 |
test_path <- system.file("example","DATASET",package = "RGMQL") |
25 |
-data = read_dataset(test_path) |
|
25 |
+data = read_GMQL(test_path) |
|
26 | 26 |
|
27 | 27 |
## The following statement materialize the dataset, previoulsy read, at |
28 | 28 |
## th specific destination path into local folder "ds1" opportunely created |
... | ... |
@@ -54,7 +54,7 @@ Aggregate functions are applied sample by sample. |
54 | 54 |
|
55 | 55 |
init_gmql() |
56 | 56 |
test_path <- system.file("example", "DATASET", package = "RGMQL") |
57 |
-data <- read_dataset(test_path) |
|
57 |
+data <- read_GMQL(test_path) |
|
58 | 58 |
|
59 | 59 |
## This statement counts the regions in each sample and stores their number |
60 | 60 |
## as value of the new metadata attribute RegionCount of the sample. |
... | ... |
@@ -50,7 +50,7 @@ no sample is extracted. |
50 | 50 |
|
51 | 51 |
init_gmql() |
52 | 52 |
test_path <- system.file("example", "DATASET", package = "RGMQL") |
53 |
-data <- read_dataset(test_path) |
|
53 |
+data <- read_GMQL(test_path) |
|
54 | 54 |
|
55 | 55 |
## This statement selects from input data samples of patients younger |
56 | 56 |
## than 70 years old, based on filtering on sample metadata attribute |
... | ... |
@@ -63,7 +63,7 @@ filter_data <- filter(data, patient_age < 70) |
63 | 63 |
## as a GMQL dataset named "join_data" |
64 | 64 |
|
65 | 65 |
test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
66 |
-join_data <- read_dataset(test_path2) |
|
66 |
+join_data <- read_GMQL(test_path2) |
|
67 | 67 |
|
68 | 68 |
## This statement creates a new dataset called 'jun_tf' by selecting those |
69 | 69 |
## samples and their regions from the existing 'data' dataset such that: |
... | ... |
@@ -74,11 +74,11 @@ join_data <- read_dataset(test_path2) |
74 | 74 |
## attribute equally called cell has in at least one sample |
75 | 75 |
## of the 'join_data' dataset. |
76 | 76 |
## For each sample satisfying previous conditions, only its regions that |
77 |
-## have a region attribute called pValue with the associated value |
|
77 |
+## have a region attribute called 'pvalue' with the associated value |
|
78 | 78 |
## less than 0.01 are conserved in output |
79 | 79 |
|
80 |
-jun_tf <- filter(data, antibody_target == "JUN", pValue < 0.01, |
|
81 |
-semijoin(join_data, TRUE, list(DF("cell")))) |
|
80 |
+jun_tf <- filter(data, antibody_target == "JUN", pvalue < 0.01, |
|
81 |
+semijoin(join_data, TRUE, conds("cell"))) |
|
82 | 82 |
|
83 | 83 |
|
84 | 84 |
} |
... | ... |
@@ -6,27 +6,19 @@ |
6 | 6 |
\alias{group_by,GMQLDataset-method} |
7 | 7 |
\title{Method group_by} |
8 | 8 |
\usage{ |
9 |
-\S4method{group_by}{GMQLDataset}(.data, groupBy_meta = NULL, |
|
10 |
- groupBy_regions = NULL, region_aggregates = NULL, |
|
9 |
+\S4method{group_by}{GMQLDataset}(.data, groupBy_meta = conds(), |
|
10 |
+ groupBy_regions = c(""), region_aggregates = NULL, |
|
11 | 11 |
meta_aggregates = NULL) |
12 | 12 |
} |
13 | 13 |
\arguments{ |
14 | 14 |
\item{.data}{GMQLDataset object} |
15 | 15 |
|
16 |
-\item{groupBy_meta}{it define condition evaluation on metadata. |
|
17 |
-\itemize{ |
|
18 |
-\item{\code{\link{FN}}: Fullname evaluation, two attributes match |
|
19 |
-if they both end with value and, if they have a further prefixes, |
|
20 |
-the two prefix sequence are identical} |
|
21 |
-\item{\code{\link{EX}}: Exact evaluation, only attributes exactly |
|
22 |
-as value will match; no further prefixes are allowed. } |
|
23 |
-\item{\code{\link{DF}}: Default evaluation, the two attributes match |
|
24 |
-if both end with value.} |
|
25 |
-}} |
|
16 |
+\item{groupBy_meta}{\code{\link{conds}} function to support methods with |
|
17 |
+groupBy or JoinBy input parameter} |
|
26 | 18 |
|
27 | 19 |
\item{groupBy_regions}{vector of string made up by schema field attribute} |
28 | 20 |
|
29 |
-\item{region_aggregates}{It accept a series of aggregate function on |
|
21 |
+\item{region_aggregates}{It accept a list of aggregate function on |
|
30 | 22 |
region attribute. |
31 | 23 |
All the element in the form \emph{key} = \emph{aggregate}. |
32 | 24 |
The \emph{aggregate} is an object of class AGGREGATES |
... | ... |
@@ -45,7 +37,7 @@ attributes. Two style are allowed: |
45 | 37 |
} |
46 | 38 |
"mixed style" is not allowed} |
47 | 39 |
|
48 |
-\item{meta_aggregates}{It accept a series of aggregate function on |
|
40 |
+\item{meta_aggregates}{It accept a list of aggregate function on |
|
49 | 41 |
metadata attribute. |
50 | 42 |
All the element in the form \emph{key} = \emph{aggregate}. |
51 | 43 |
The \emph{aggregate} is an object of class AGGREGATES |
... | ... |
@@ -73,4 +65,55 @@ Wrapper to GMQL GROUP operator |
73 | 65 |
} |
74 | 66 |
\examples{ |
75 | 67 |
|
68 |
+## This statement initializes and runs the GMQL server for local execution |
|
69 |
+## and creation of results on disk. Then, with system.file() it defines |
|
70 |
+## the path to the folder "DATASET" in the subdirectory "example" |
|
71 |
+## of the package "RGMQL" and opens such file as a GMQL dataset named "exp" |
|
72 |
+## using customParser |
|
73 |
+ |
|
74 |
+init_gmql() |
|
75 |
+test_path <- system.file("example","DATASET",package = "RGMQL") |
|
76 |
+exp = read_GMQL(test_path) |
|
77 |
+ |
|
78 |
+## This GMQL statement groups samples of the input 'exp' dataset according to |
|
79 |
+## their value of the metadata attribute 'tumor_type' and computes the |
|
80 |
+## maximum value that the metadata attribute size takes inside the samples |
|
81 |
+## belonging to each group. The samples in the output GROUPS_T dataset |
|
82 |
+## have a new _group metadata attribute which indicates which group they |
|
83 |
+## belong to, based on the grouping on the metadata attribute tumor_type. |
|
84 |
+## In addition, they present the new metadata aggregate attribute MaxSize. |
|
85 |
+## Note that the samples without metadata attribute tumor_type are assigned |
|
86 |
+## to a single group with _group value equal 0 |
|
87 |
+ |
|
88 |
+GROUPS_T = group_by(exp, conds("tumor_type"), |
|
89 |
+meta_aggregates = list(max_size = MAX("size"))) |
|
90 |
+ |
|
91 |
+## This GMQL statement takes as input dataset the same input dataset as |
|
92 |
+## the previous example. Yet, it calculates new _group values based on the |
|
93 |
+## grouping attribute 'cell', and adds the metadata aggregate attribute |
|
94 |
+## 'n_samp', which counts the number of samples belonging to the respective |
|
95 |
+## group. It has the following output GROUPS_C dataset samples |
|
96 |
+## (note that now no sample has metadata attribute _group with value equal 0 |
|
97 |
+## since all input samples include the metadata attribute cell, |
|
98 |
+## with different values, on which the new grouping is based) |
|
99 |
+ |
|
100 |
+GROUPS_C = group_by(exp, conds("cell"), |
|
101 |
+meta_aggregates = list(n_samp AS COUNTSAMP())) |
|
102 |
+ |
|
103 |
+## This GMQL statement groups the regions of each 'exp' dataset sample by |
|
104 |
+## region coordinates chr, left, right, strand (these are implicitly |
|
105 |
+## considered) and the additional region attribute score (which is explicitly |
|
106 |
+## specified), and keeps only one region for each group. |
|
107 |
+## In the output GROUPS dataset schema, the new region attributes |
|
108 |
+## avg_pvalue and max_qvalue are added, respectively computed as the |
|
109 |
+## average of the values taken by the pvalue and the maximum of the values |
|
110 |
+## taken by the qvalue region attributes in the regions grouped together, |
|
111 |
+## and the computed value is assigned to each region of each output sample. |
|
112 |
+## Note that the region attributes which are not coordinates or score are |
|
113 |
+## discarded. |
|
114 |
+ |
|
115 |
+GROUPS = group_by(exp, group_reg = "score", |
|
116 |
+region_aggregates = list(avg_pvalue = AVG("pvalue"), |
|
117 |
+max_qvalue = MAX("qvalue"))) |
|
118 |
+ |
|
76 | 119 |
} |
... | ... |
@@ -23,7 +23,7 @@ and password, or as guest, using the proper GMQL web service available |
23 | 23 |
on a remote server |
24 | 24 |
} |
25 | 25 |
\details{ |
26 |
-If both username and password are NULL you will be logged as guest. |
|
26 |
+If both username and password are missing you will be logged as guest. |
|
27 | 27 |
After login you will receive an authentication token. |
28 | 28 |
As token remains vaild on server (until the next login / registration) |
29 | 29 |
a user can safely use a token for a previous session as a convenience; |
... | ... |
@@ -10,7 +10,7 @@ |
10 | 10 |
\usage{ |
11 | 11 |
map(x, y, ...) |
12 | 12 |
|
13 |
-\S4method{map}{GMQLDataset}(x, y, ..., joinBy = NULL, count_name = NULL) |
|
13 |
+\S4method{map}{GMQLDataset}(x, y, ..., joinBy = conds(), count_name = "") |
|
14 | 14 |
} |
15 | 15 |
\arguments{ |
16 | 16 |
\item{x}{GMQLDataset class object} |
... | ... |
@@ -34,16 +34,8 @@ attributes. Two styles are allowed: |
34 | 34 |
} |
35 | 35 |
"mixed style" is not allowed} |
36 | 36 |
|
37 |
-\item{joinBy}{list of evalation functions to define evaluation on metadata: |
|
38 |
-\itemize{ |
|
39 |
-\item{ \code{\link{FN}}(value): Fullname evaluation, two attributes match |
|
40 |
-if they both end with \emph{value} and, if they have further prefixes, |
|
41 |
-the two prefix sequence are identical.} |
|
42 |
-\item{ \code{\link{EX}}(value): Exact evaluation, only attributes exactly |
|
43 |
-as \emph{value} match; no further prefixes are allowed.} |
|
44 |
-\item{ \code{\link{DF}}(value): Default evaluation, the two attributes match |
|
45 |
-if both end with \emph{value}.} |
|
46 |
-}} |
|
37 |
+\item{joinBy}{\code{\link{conds}} function to support methods with |
|
38 |
+groupBy or JoinBy input parameter} |
|
47 | 39 |
|
48 | 40 |
\item{count_name}{string defining the metadata count name; if it is |
49 | 41 |
not specifying the name is "count_left_right"} |
... | ... |
@@ -85,8 +77,8 @@ present with equal values in both M1 and M2 |
85 | 77 |
init_gmql() |
86 | 78 |
test_path <- system.file("example", "DATASET", package = "RGMQL") |
87 | 79 |
test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
88 |
-exp = read_dataset(test_path) |
|
89 |
-ref = read_dataset(test_path2) |
|
80 |
+exp = read_GMQL(test_path) |
|
81 |
+ref = read_GMQL(test_path2) |
|
90 | 82 |
|
91 | 83 |
# It counts the number of regions in each sample from exp that overlap with |
92 | 84 |
# a ref region, and for each ref region it computes the minimum score |
... | ... |
@@ -97,7 +89,6 @@ ref = read_dataset(test_path2) |
97 | 89 |
# but with a different value from the one(s) of ref sample(s), |
98 | 90 |
# are disregarded. |
99 | 91 |
|
100 |
-out = map(ref, exp, minScore = MIN("score"), |
|
101 |
-joinBy = list(DF("cell_tissue"))) |
|
92 |
+out = map(ref, exp, minScore = MIN("score"), joinBy = conds("cell_tissue")) |
|
102 | 93 |
|
103 | 94 |
} |
... | ... |
@@ -7,7 +7,7 @@ |
7 | 7 |
\title{Method merge} |
8 | 8 |
\usage{ |
9 | 9 |
\S4method{merge}{GMQLDataset,GMQLDataset}(x, y, genometric_predicate = NULL, |
10 |
- region_output = "CAT", joinBy = NULL, reg_attr = NULL) |
|
10 |
+ region_output = "CAT", joinBy = conds(), reg_attr = c("")) |
|
11 | 11 |
} |
12 | 12 |
\arguments{ |
13 | 13 |
\item{x}{GMQLDataset class object} |
... | ... |
@@ -35,17 +35,17 @@ that satisfy the genometric predicate, (i.e. the output regionis defined as |
35 | 35 |
having left (right) coordinates equal to the minimum (maximum) of the |
36 | 36 |
corresponding coordinate values in the 'x' and 'y' regions satisfying |
37 | 37 |
the genometric predicate)} |
38 |
-\item{LEFT_DISTINCT: It outputs the duplicate elimination of "x" output |
|
38 |
+\item{LEFT_DIST: It outputs the duplicate elimination of "x" output |
|
39 | 39 |
regions with the same values, regardless the "y" paired region and its |
40 | 40 |
values. In this case, the output regions attributes and their values are |
41 | 41 |
all those of "x", and the output metadata are equal to the "x" metadata, |
42 | 42 |
without additional prefixes} |
43 |
-\item{RIGHT_DISTINCT: It outputs the duplicate elimination of "y" output |
|
43 |
+\item{RIGHT_DIST: It outputs the duplicate elimination of "y" output |
|
44 | 44 |
regions with the same values, regardless the "x" paired region and its |
45 | 45 |
values. In this case, the output regions attributes and their values are |
46 | 46 |
all those of "y", and the output metadata are equal to the "y" metadata, |
47 | 47 |
without additional prefixes} |
48 |
-\item{BOTH: outputs the same regions as LEFT, but it adds in the output |
|
48 |
+\item{BOTH: It outputs the same regions as LEFT, but it adds in the output |
|
49 | 49 |
region attributes the coordinates of the "y" dataset region that, |
50 | 50 |
together with the output "x" dataset region, satisfies the equi predicate |
51 | 51 |
and the genometric predicate} |
... | ... |
@@ -85,8 +85,8 @@ respectively. |
85 | 85 |
init_gmql() |
86 | 86 |
test_path <- system.file("example", "DATASET", package = "RGMQL") |
87 | 87 |
test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
88 |
-TSS = read_dataset(test_path) |
|
89 |
-HM = read_dataset(test_path2) |
|
88 |
+TSS = read_GMQL(test_path) |
|
89 |
+HM = read_GMQL(test_path2) |
|
90 | 90 |
|
91 | 91 |
## Given a dataset 'HM' and one called 'TSS' with a sample including |
92 | 92 |
## Transcription Start Site annotations, it searches for those regions of HM |
... | ... |
@@ -95,10 +95,8 @@ HM = read_dataset(test_path2) |
95 | 95 |
## is lesser than 120K bases and joined 'tss' and 'hm' samples are obtained |
96 | 96 |
## from the same provider (joinby clause). |
97 | 97 |
|
98 |
- |
|
99 |
-join_data = merge(TSS, HM, |
|
100 |
-genometric_predicate = list(MD(1), DLE(120000)), DF("provider"), |
|
101 |
-region_output = "RIGHT") |
|
98 |
+join_data = merge(TSS, HM, genometric_predicate = list(MD(1), DLE(120000)), |
|
99 |
+conds("provider"), region_output = "RIGHT") |
|
102 | 100 |
|
103 | 101 |
|
104 | 102 |
} |
... | ... |
@@ -47,7 +47,7 @@ null, performing all the type conversions needed} |
47 | 47 |
|
48 | 48 |
init_gmql() |
49 | 49 |
test_path <- system.file("example", "DATASET", package = "RGMQL") |
50 |
-exp = read_dataset(test_path) |
|
50 |
+exp = read_GMQL(test_path) |
|
51 | 51 |
|
52 | 52 |
## This statement allows to select, in all input sample, all those regions |
53 | 53 |
## for which the region attribute score has a value which is greater |
... | ... |
@@ -38,7 +38,7 @@ that require ordering on value; is used only in arrange method |
38 | 38 |
|
39 | 39 |
init_gmql() |
40 | 40 |
test_path <- system.file("example", "DATASET", package = "RGMQL") |
41 |
-data = read_dataset(test_path) |
|
41 |
+data = read_GMQL(test_path) |
|
42 | 42 |
|
43 | 43 |
## It orders the samples according to the Region_Count metadata attribute |
44 | 44 |
## and takes the two samples that have the lowest count. |
... | ... |
@@ -1,15 +1,15 @@ |
1 | 1 |
% Generated by roxygen2: do not edit by hand |
2 | 2 |
% Please edit documentation in R/gmql_read.R |
3 |
-\name{read_dataset} |
|
4 |
-\alias{read_dataset} |
|
5 |
-\alias{read_dataset} |
|
6 |
-\alias{read} |
|
3 |
+\name{read_GMQL} |
|
4 |
+\alias{read_GMQL} |
|
5 |
+\alias{read_GMQL} |
|
6 |
+\alias{read_GRangesList} |
|
7 | 7 |
\title{Function read} |
8 | 8 |
\usage{ |
9 |
-read_dataset(dataset, parser = "CustomParser", is_local = TRUE, |
|
9 |
+read_GMQL(dataset, parser = "CustomParser", is_local = TRUE, |
|
10 | 10 |
is_GMQL = TRUE) |
11 | 11 |
|
12 |
-read(samples) |
|
12 |
+read_GRangesList(samples) |
|
13 | 13 |
} |
14 | 14 |
\arguments{ |
15 | 15 |
\item{dataset}{folder path for GMQL dataset or dataset name on repository} |
... | ... |
@@ -65,16 +65,18 @@ generated. |
65 | 65 |
|
66 | 66 |
init_gmql() |
67 | 67 |
test_path <- system.file("example", "DATASET", package = "RGMQL") |
68 |
-data = read_dataset(test_path) |
|
68 |
+data = read_GMQL(test_path) |
|
69 | 69 |
|
70 | 70 |
## This statement opens such folder as a GMQL dataset named "data" using |
71 | 71 |
## "NarrowPeakParser" |
72 |
-dataPeak = read_dataset(test_path,"NarrowPeakParser") |
|
72 |
+dataPeak = read_GMQL(test_path,"NarrowPeakParser") |
|
73 | 73 |
|
74 | 74 |
## This statement reads a remote public dataset stored into GMQL system |
75 | 75 |
## repository. For a public dataset in a (remote) GMQL repository the |
76 | 76 |
## prefix "public." is needed before dataset name |
77 | 77 |
|
78 |
-data1 = read_dataset("public.Example_Dataset1",is_local = FALSE) |
|
78 |
+remote_url = "http://genomic.deib.polimi.it/gmql-rest-r/" |
|
79 |
+login_gmql(remote_url) |
|
80 |
+data1 = read_GMQL("public.Example_Dataset1",is_local = FALSE) |
|
79 | 81 |
|
80 | 82 |
} |
... | ... |
@@ -15,9 +15,9 @@ and base url; service name is added automatically} |
15 | 15 |
|
16 | 16 |
\item{queryName}{string name of query} |
17 | 17 |
|
18 |
-\item{filePath}{string local file path of txt file containing a GMQL query} |
|
18 |
+\item{queryTxt}{string text of GMQL query} |
|
19 | 19 |
|
20 |
-\item{query}{string text of GMQL query} |
|
20 |
+\item{filePath}{string local file path of txt file containing a GMQL query} |
|
21 | 21 |
} |
22 | 22 |
\value{ |
23 | 23 |
None |
... | ... |
@@ -73,7 +73,7 @@ those in the input dataset. It allows to: |
73 | 73 |
|
74 | 74 |
init_gmql() |
75 | 75 |
test_path <- system.file("example", "DATASET", package = "RGMQL") |
76 |
-data = read_dataset(test_path) |
|
76 |
+data = read_GMQL(test_path) |
|
77 | 77 |
|
78 | 78 |
## It creates a new dataset called CTCF_NORM_SCORE by preserving all |
79 | 79 |
## region attributes apart from score, and creating a new region attribute |
... | ... |
@@ -4,7 +4,7 @@ |
4 | 4 |
\alias{semijoin} |
5 | 5 |
\title{Semijoin condtion} |
6 | 6 |
\usage{ |
7 |
-semijoin(.data, not_in = FALSE, groupBy = NULL) |
|
7 |
+semijoin(.data, not_in = FALSE, groupBy) |
|
8 | 8 |
} |
9 | 9 |
\arguments{ |
10 | 10 |
\item{.data}{GMQLDataset class object} |
... | ... |
@@ -16,16 +16,8 @@ in groupBy and these attributes of 'data' have at least one value in |
16 | 16 |
common with the same attributes defined in one sample of '.data' |
17 | 17 |
FALSE => semijoin condition is evaluated accordingly.} |
18 | 18 |
|
19 |
-\item{groupBy}{list of evalation functions to define evaluation on metadata: |
|
20 |
-\itemize{ |
|
21 |
-\item{ \code{\link{FN}}(value): Fullname evaluation, two attributes match |
|
22 |
-if they both end with \emph{value} and, if they have further prefixes, |
|
23 |
-the two prefix sequence are identical.} |
|
24 |
-\item{ \code{\link{EX}}(value): Exact evaluation, only attributes exactly |
|
25 |
-as \emph{value} match; no further prefixes are allowed.} |
|
26 |
-\item{ \code{\link{DF}}(value): Default evaluation, the two attributes match |
|
27 |
-if both end with \emph{value}.} |
|
28 |
-}} |
|
19 |
+\item{groupBy}{\code{\link{condition_evaluation}} function to support |
|
20 |
+methods with groupBy or JoinBy input paramter} |
|
29 | 21 |
} |
30 | 22 |
\value{ |
31 | 23 |
semijoin condition as list |
... | ... |
@@ -45,8 +37,8 @@ semijoin conditions on metadata |
45 | 37 |
init_gmql() |
46 | 38 |
test_path <- system.file("example", "DATASET", package = "RGMQL") |
47 | 39 |
test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
48 |
-data <- read_dataset(test_path) |
|
49 |
-join_data <- read_dataset(test_path2) |
|
40 |
+data <- read_GMQL(test_path) |
|
41 |
+join_data <- read_GMQL(test_path2) |
|
50 | 42 |
|
51 | 43 |
# It creates a new dataset called 'jun_tf' by selecting those samples and |
52 | 44 |
# their regions from the existing 'data' dataset such that: |
... | ... |
@@ -60,7 +52,7 @@ join_data <- read_dataset(test_path2) |
60 | 52 |
# have a region attribute called pValue with the associated value |
61 | 53 |
# less than 0.01 are conserved in output |
62 | 54 |
|
63 |
-jun_tf <- filter(data, antibody_target == "JUN", pValue < 0.01, |
|
64 |
-semijoin(join_data, TRUE, list(DF("cell")))) |
|
55 |
+jun_tf <- filter(data, antibody_target == "JUN", pvalue < 0.01, |
|
56 |
+semijoin(join_data, TRUE, conds("cell"))) |
|
65 | 57 |
|
66 | 58 |
} |
... | ... |
@@ -6,7 +6,7 @@ |
6 | 6 |
\alias{setdiff,GMQLDataset,GMQLDataset-method} |
7 | 7 |
\title{Method setdiff} |
8 | 8 |
\usage{ |
9 |
-\S4method{setdiff}{GMQLDataset,GMQLDataset}(x, y, joinBy = NULL, |
|
9 |
+\S4method{setdiff}{GMQLDataset,GMQLDataset}(x, y, joinBy = conds(), |
|
10 | 10 |
is_exact = FALSE) |
11 | 11 |
} |
12 | 12 |
\arguments{ |
... | ... |
@@ -14,8 +14,8 @@ |
14 | 14 |
|
15 | 15 |
\item{y}{GMQLDataset class object} |
16 | 16 |
|
17 |
-\item{joinBy}{\code{\link{condition_evaluation}} function to support |
|
18 |
-methods with groupBy or JoinBy input paramter} |
|
17 |
+\item{joinBy}{\code{\link{conds}} function to support methods with |
|
18 |
+groupBy or JoinBy input paramter} |
|
19 | 19 |
|
20 | 20 |
\item{is_exact}{single logical value: TRUE means that the region difference |
21 | 21 |
is executed only on regions in left_input_data with exactly the same |
... | ... |
@@ -51,8 +51,8 @@ are considered when performing the difference. |
51 | 51 |
init_gmql() |
52 | 52 |
test_path <- system.file("example", "DATASET", package = "RGMQL") |
53 | 53 |
test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
54 |
-data1 = read_dataset(test_path) |
|
55 |
-data2 = read_dataset(test_path2) |
|
54 |
+data1 = read_GMQL(test_path) |
|
55 |
+data2 = read_GMQL(test_path2) |
|
56 | 56 |
|
57 | 57 |
## This GMQL statement returns all the regions in the first dataset |
58 | 58 |
## that do not overlap any region in the second dataset. |
... | ... |
@@ -65,6 +65,6 @@ out = setdiff(data1, data2) |
65 | 65 |
## do not overlap any region in s2; |
66 | 66 |
## metadata of the result are the same as the metadata of s1. |
67 | 67 |
|
68 |
-out_t = setdiff(data1, data2, condition_evaluation(c("cell"))) |
|
68 |
+out_t = setdiff(data1, data2, conds("cell")) |
|
69 | 69 |
|
70 | 70 |
} |
... | ... |
@@ -44,9 +44,9 @@ all those datasets are materialized as folders. |
44 | 44 |
|
45 | 45 |
init_gmql() |
46 | 46 |
test_path <- system.file("example", "DATASET", package = "RGMQL") |
47 |
-rd = read_dataset(test_path) |
|
47 |
+rd = read_GMQL(test_path) |
|
48 | 48 |
|
49 |
-aggr = aggregate(rd, list(DF("antibody_target", "cell_karyotype"))) |
|
49 |
+aggr = aggregate(rd, conds(c("antibody_target", "cell_karyotype"))) |
|
50 | 50 |
taken <- take(aggr, rows = 45) |
51 | 51 |
|
52 | 52 |
} |
... | ... |
@@ -46,8 +46,8 @@ w.r.t. the merged schema are set to null.} |
46 | 46 |
init_gmql() |
47 | 47 |
test_path <- system.file("example", "DATASET", package = "RGMQL") |
48 | 48 |
test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
49 |
-data1 <- read_dataset(test_path) |
|
50 |
-data2 <- read_dataset(test_path2) |
|
49 |
+data1 <- read_GMQL(test_path) |
|
50 |
+data2 <- read_GMQL(test_path2) |
|
51 | 51 |
|
52 | 52 |
## This statement creates a dataset called 'full' which contains all samples |
53 | 53 |
## from the datasets 'data1' and 'data2' |
... | ... |
@@ -247,17 +247,17 @@ As data are already in the user computer, we simply execute: |
247 | 247 |
|
248 | 248 |
```{r, read GMQL dataset} |
249 | 249 |
gmql_dataset_path <- system.file("example", "EXON", package = "RGMQL") |
250 |
-data_out = read_dataset(gmql_dataset_path) |
|
250 |
+data_out = read_GMQL(gmql_dataset_path) |
|
251 | 251 |
``` |
252 | 252 |
In this case we are reading a GMQL dataset specified by the path of |
253 | 253 |
its folder "EXON" within the subdirectory "example" of the package "RGMQL". |
254 |
-It does not matter what kind of format the data are, *read_dataset()* reads |
|
254 |
+It does not matter what kind of format the data are, *read_GMQL()* reads |
|
255 | 255 |
many standard tab-delimited text formats without the need of specifying any |
256 | 256 |
additional input parameter. |
257 | 257 |
|
258 | 258 |
2. GRangesList:\newline |
259 | 259 |
For better integration in the R environment and with other R packages, |
260 |
-we provide the *read()* function to read directly from R memory/environment |
|
260 |
+we provide the *read_GRangesList()* function to read directly from R memory/environment |
|
261 | 261 |
using GRangesList as input. |
262 | 262 |
|
263 | 263 |
```{r, read GRangesList} |
... | ... |
@@ -270,12 +270,12 @@ gr2 <- GRanges(seqnames = c("chr1", "chr1"), |
270 | 270 |
score = 3:4, GC = c(0.3, 0.5)) |
271 | 271 |
|
272 | 272 |
grl <- GRangesList("txA" = gr1, "txB" = gr2) |
273 |
-data_out <- read(grl) |
|
273 |
+data_out <- read_GRangesList(grl) |
|
274 | 274 |
``` |
275 | 275 |
In this example we show how versatile the RGMQL package is. |
276 | 276 |
As specified above, we can directly read a list of GRanges previously created |
277 | 277 |
starting from two GRanges. |
278 |
-Both *read()* and *read_dataset()* functions return a result object, |
|
278 |
+Both *read_GRangesList()* and *read_GMQL()* functions return a result object, |
|
279 | 279 |
in this case *data_out*: an instance of GMQLDataset class used as input |
280 | 280 |
for executing the subsequent GMQL operation. |
281 | 281 |
|
... | ... |
@@ -306,8 +306,8 @@ mut_path <- system.file("example", "MUT", package = "RGMQL") |
306 | 306 |
# sample with exon regions, and MUT folder as a GMQL dataset named "mut_ds" |
307 | 307 |
# containing multiple samples with mutation regions |
308 | 308 |
|
309 |
-exon_ds <- read_dataset(exon_path) |
|
310 |
-mut_ds <- read_dataset(mut_path) |
|
309 |
+exon_ds <- read_GMQL(exon_path) |
|
310 |
+mut_ds <- read_GMQL(mut_path) |
|
311 | 311 |
|
312 | 312 |
# Filter out mut_ds based on predicate |
313 | 313 |
|
... | ... |
@@ -484,8 +484,8 @@ After initialization, we can start building our query: |
484 | 484 |
## Read the remote dataset HG19_TCGA_dnaseq |
485 | 485 |
## Read the remote dataset HG19_BED_ANNOTATION |
486 | 486 |
|
487 |
-TCGA_dnaseq <- read_dataset("public.HG19_TCGA_dnaseq", is_local = FALSE) |
|
488 |
-HG19_bed_ann <- read_dataset("public.HG19_BED_ANNOTATION", is_local = FALSE) |
|
487 |
+TCGA_dnaseq <- read_GMQL("public.HG19_TCGA_dnaseq", is_local = FALSE) |
|
488 |
+HG19_bed_ann <- read_GMQL("public.HG19_BED_ANNOTATION", is_local = FALSE) |
|
489 | 489 |
|
490 | 490 |
## Filter out TCGA_dnaseq based on predicate |
491 | 491 |
|
... | ... |
@@ -535,7 +535,7 @@ remote_processing(TRUE) |
535 | 535 |
``` |
536 | 536 |
An user can switch processing mode until the first *collect()* has been performed. |
537 | 537 |
|
538 |
-This kind of processing comes from the fact that the *read()* function can |
|
538 |
+This kind of processing comes from the fact that the *read_GMQL()* function can |
|
539 | 539 |
accept either a local dataset or a remote repository dataset, |
540 | 540 |
even in the same query as in the following example: |
541 | 541 |
```{r, mixed query} |
... | ... |
@@ -549,11 +549,11 @@ mut_path <- system.file("example", "MUT", package = "RGMQL") |
549 | 549 |
# Read MUT folder as a GMQL dataset named "mut_ds" containing a single |
550 | 550 |
# sample with mutation regions |
551 | 551 |
|
552 |
-mut_ds <- read_dataset(mut_path, is_local = TRUE) |
|
552 |
+mut_ds <- read_GMQL(mut_path, is_local = TRUE) |
|
553 | 553 |
|
554 | 554 |
# Read the remote dataset HG19_BED_ANNOTATION |
555 | 555 |
|
556 |
-HG19_bed_ann <- read_dataset("public.HG19_BED_ANNOTATION", is_local = FALSE) |
|
556 |
+HG19_bed_ann <- read_GMQL("public.HG19_BED_ANNOTATION", is_local = FALSE) |
|
557 | 557 |
|
558 | 558 |
# Filter out based on predicate |
559 | 559 |
|
... | ... |
@@ -592,7 +592,7 @@ collect(exon_res) |
592 | 592 |
execute() |
593 | 593 |
``` |
594 | 594 |
|
595 |
-As we can see, the two *read()* functions above read from different sources: |
|
595 |
+As we can see, the two *read_GMQL()* functions above read from different sources: |
|
596 | 596 |
*mut_ds* from local dataset, *HG19_bed_ann* from remote repository. |
597 | 597 |
|
598 | 598 |
If we set remote processing to false (*remote_processing(FALSE)*), |