... | ... |
@@ -96,21 +96,21 @@ export_gmql <- function(samples, dir_out, is_gtf) |
96 | 96 |
|
97 | 97 |
files_sub_dir <- paste0(dir_out,"/files") |
98 | 98 |
dir.create(files_sub_dir) |
99 |
- c = .counter() |
|
99 |
+ cnt = .counter() |
|
100 | 100 |
#col_names <- .get_schema_names(samples) |
101 | 101 |
if(to_GTF) |
102 | 102 |
{ |
103 | 103 |
#write region |
104 | 104 |
lapply(samples,function(x,dir){ |
105 |
- sample_name = paste0(dir,"/S_",c(),".gtf") |
|
105 |
+ sample_name = paste0(dir,"/S_",cnt(),".gtf") |
|
106 | 106 |
g <- rtracklayer::export(x,sample_name,format = "gtf") |
107 | 107 |
},files_sub_dir) |
108 |
- c = .counter(0) |
|
108 |
+ cnt = .counter(0) |
|
109 | 109 |
meta <- metadata(samples) |
110 | 110 |
|
111 | 111 |
#write metadata |
112 | 112 |
lapply(meta,function(x,dir){ |
113 |
- sample_name = paste0(dir,"/S_",c(),".gtf") |
|
113 |
+ sample_name = paste0(dir,"/S_",cnt(),".gtf") |
|
114 | 114 |
.write_metadata(x,sample_name) |
115 | 115 |
},files_sub_dir) |
116 | 116 |
} |
... | ... |
@@ -118,18 +118,18 @@ export_gmql <- function(samples, dir_out, is_gtf) |
118 | 118 |
{ |
119 | 119 |
#write region |
120 | 120 |
lapply(samples,function(x,dir){ |
121 |
- sample_name = paste0(dir,"/S_",c(),".gdm") |
|
121 |
+ sample_name = paste0(dir,"/S_",cnt(),".gdm") |
|
122 | 122 |
region_frame <- data.frame(x) |
123 | 123 |
write.table(region_frame,sample_name,col.names = FALSE, |
124 | 124 |
row.names = FALSE, sep = '\t',quote = FALSE) |
125 | 125 |
},files_sub_dir) |
126 | 126 |
|
127 |
- c = .counter(0) |
|
127 |
+ cnt = .counter(0) |
|
128 | 128 |
meta <- metadata(samples) |
129 | 129 |
|
130 | 130 |
#write metadata |
131 | 131 |
lapply(meta,function(x,dir){ |
132 |
- sample_name = paste0(dir,"/S_",c(),".gdm") |
|
132 |
+ sample_name = paste0(dir,"/S_",cnt(),".gdm") |
|
133 | 133 |
.write_metadata(x,sample_name) |
134 | 134 |
},files_sub_dir) |
135 | 135 |
} |
... | ... |
@@ -3,7 +3,7 @@ group_by.GMQLDateset <- function(.data, groupBy_meta = conds(), |
3 | 3 |
{ |
4 | 4 |
ptr_data = value(.data) |
5 | 5 |
gmql_group(ptr_data, groupBy_meta, groupBy_regions, region_aggregates, |
6 |
- meta_aggregates) |
|
6 |
+ meta_aggregates) |
|
7 | 7 |
} |
8 | 8 |
|
9 | 9 |
#' Method group_by |
... | ... |
@@ -71,8 +71,8 @@ group_by.GMQLDateset <- function(.data, groupBy_meta = conds(), |
71 | 71 |
#' test_path <- system.file("example","DATASET",package = "RGMQL") |
72 | 72 |
#' exp = read_GMQL(test_path) |
73 | 73 |
#' |
74 |
-#' ## This GMQL statement groups samples of the input 'exp' dataset according to |
|
75 |
-#' ## their value of the metadata attribute 'tumor_type' and computes the |
|
74 |
+#' ## This GMQL statement groups samples of the input 'exp' dataset according |
|
75 |
+#' ## to their value of the metadata attribute 'tumor_type' and computes the |
|
76 | 76 |
#' ## maximum value that the metadata attribute size takes inside the samples |
77 | 77 |
#' ## belonging to each group. The samples in the output GROUPS_T dataset |
78 | 78 |
#' ## have a new _group metadata attribute which indicates which group they |
... | ... |
@@ -89,17 +89,17 @@ group_by.GMQLDateset <- function(.data, groupBy_meta = conds(), |
89 | 89 |
#' ## grouping attribute 'cell', and adds the metadata aggregate attribute |
90 | 90 |
#' ## 'n_samp', which counts the number of samples belonging to the respective |
91 | 91 |
#' ## group. It has the following output GROUPS_C dataset samples |
92 |
-#' ## (note that now no sample has metadata attribute _group with value equal 0 |
|
93 |
-#' ## since all input samples include the metadata attribute cell, |
|
92 |
+#' ## (note that now no sample has metadata attribute _group with value |
|
93 |
+#' ## equal 0 since all input samples include the metadata attribute cell, |
|
94 | 94 |
#' ## with different values, on which the new grouping is based) |
95 | 95 |
#' |
96 | 96 |
#' GROUPS_C = group_by(exp, conds("cell"), |
97 |
-#' meta_aggregates = list(n_samp AS COUNTSAMP())) |
|
97 |
+#' meta_aggregates = list(n_samp = COUNTSAMP())) |
|
98 | 98 |
#' |
99 | 99 |
#' ## This GMQL statement groups the regions of each 'exp' dataset sample by |
100 | 100 |
#' ## region coordinates chr, left, right, strand (these are implicitly |
101 |
-#' ## considered) and the additional region attribute score (which is explicitly |
|
102 |
-#' ## specified), and keeps only one region for each group. |
|
101 |
+#' ## considered) and the additional region attribute score (which is |
|
102 |
+#' ## explicitly specified), and keeps only one region for each group. |
|
103 | 103 |
#' ## In the output GROUPS dataset schema, the new region attributes |
104 | 104 |
#' ## avg_pvalue and max_qvalue are added, respectively computed as the |
105 | 105 |
#' ## average of the values taken by the pvalue and the maximum of the values |
... | ... |
@@ -108,7 +108,7 @@ group_by.GMQLDateset <- function(.data, groupBy_meta = conds(), |
108 | 108 |
#' ## Note that the region attributes which are not coordinates or score are |
109 | 109 |
#' ## discarded. |
110 | 110 |
#' |
111 |
-#' GROUPS = group_by(exp, group_reg = "score", |
|
111 |
+#' GROUPS = group_by(exp, groupBy_regions = "score", |
|
112 | 112 |
#' region_aggregates = list(avg_pvalue = AVG("pvalue"), |
113 | 113 |
#' max_qvalue = MAX("qvalue"))) |
114 | 114 |
#' |
... | ... |
@@ -149,8 +149,8 @@ gmql_group <- function(input_data, group_meta, group_reg, region_aggregates, |
149 | 149 |
|
150 | 150 |
if(!length(group_reg)) |
151 | 151 |
group_reg <- .jnull("java/lang/String") |
152 |
- |
|
153 |
- group_reg <- .jarray(group_reg) |
|
152 |
+ else |
|
153 |
+ group_reg <- .jarray(group_reg,dispatch = TRUE) |
|
154 | 154 |
} |
155 | 155 |
else |
156 | 156 |
group_reg <- .jnull("java/lang/String") |
... | ... |
@@ -69,8 +69,8 @@ init_gmql <- function(output_format = "GTF", remote_processing = FALSE, |
69 | 69 |
#' |
70 | 70 |
#' @examples |
71 | 71 |
#' |
72 |
-#' ## These statements initializes GMQL with local processing with sample files |
|
73 |
-#' ## output format as tab delimited and then stop it |
|
72 |
+#' ## These statements initializes GMQL with local processing with sample |
|
73 |
+#' ## files output format as tab delimited and then stop it |
|
74 | 74 |
#' |
75 | 75 |
#' init_gmql("tab", FALSE) |
76 | 76 |
#' |
... | ... |
@@ -102,8 +102,9 @@ stop_gmql <- function() |
102 | 102 |
#' |
103 | 103 |
#' @examples |
104 | 104 |
#' |
105 |
-#' ## These statements initializes GMQL with local processing with sample files |
|
106 |
-#' ## output format as tab delimited and then change processing mode to remote |
|
105 |
+#' ## These statements initializes GMQL with local processing with sample |
|
106 |
+#' ## files output format as tab delimited and then change processing mode |
|
107 |
+#' ## to remote |
|
107 | 108 |
#' |
108 | 109 |
#' init_gmql("tab", remote_processing = FALSE) |
109 | 110 |
#' |
... | ... |
@@ -100,18 +100,6 @@ select.GMQLDataset <- function(.data, metadata = NULL, metadata_update = NULL, |
100 | 100 |
#' regions_update = list(new_score = (score / 1000.0) + 100), |
101 | 101 |
#' regions = c("score"), all_but_reg = TRUE) |
102 | 102 |
#' |
103 |
-#' |
|
104 |
-#' ## It produces an output dataset that contains the same samples |
|
105 |
-#' ## as the input dataset. |
|
106 |
-#' ## Each output sample only contains, as region attributes, |
|
107 |
-#' ## the four basic coordinates (chr, left, right, strand) and the specified |
|
108 |
-#' ## region attributes 'variant_classification' and 'variant_type', |
|
109 |
-#' ## and as metadata attributes only the specified ones, |
|
110 |
-#' ## i.e. manually_curated_tissue_status and manually_curated_tumor_tag. |
|
111 |
-#' |
|
112 |
-#' DS_out = select(data, regions = c("variant_classification", |
|
113 |
-#' "variant_type"), metadata = c("manually_curated_tissue_status", |
|
114 |
-#' "manually_curated_tumor_tag")) |
|
115 | 103 |
#' |
116 | 104 |
#' |
117 | 105 |
#' @name select |
... | ... |
@@ -61,7 +61,7 @@ |
61 | 61 |
#' |
62 | 62 |
#' remote_url = "http://genomic.deib.polimi.it/gmql-rest-r/" |
63 | 63 |
#' login_gmql(remote_url) |
64 |
-#' data1 = read_GMQL("public.Example_Dataset1",is_local = FALSE) |
|
64 |
+#' data1 = read_GMQL("public.Example_Dataset_1",is_local = FALSE) |
|
65 | 65 |
#' |
66 | 66 |
#' @name read_GMQL |
67 | 67 |
#' @rdname read-function |
... | ... |
@@ -1,5 +1,5 @@ |
1 | 1 |
#' @importFrom rJava .jpackage .jinit |
2 |
-#' @import RGMQLScalaLib |
|
2 |
+#' @import RGMQLlib |
|
3 | 3 |
#' |
4 | 4 |
.onLoad <- function(libname, pkgname) { |
5 | 5 |
.jpackage(pkgname, lib.loc = libname) |
... | ... |
@@ -24,7 +24,7 @@ initGMQLscalaAPI <- function(libLoc, mem = "12G") { |
24 | 24 |
# Starting the java engine |
25 | 25 |
.jinit(force.init = TRUE) |
26 | 26 |
if (missing(libLoc)) { |
27 |
- libLoc = system.file("extdata", "java", package = "RGMQLScalaLib") |
|
27 |
+ libLoc = system.file("extdata", "java", package = "RGMQLlib") |
|
28 | 28 |
} |
29 | 29 |
|
30 | 30 |
path = Sys.glob(paste0(libLoc, "/*.jar")) |
... | ... |
@@ -664,7 +664,7 @@ show_datasets_list <- function(url) |
664 | 664 |
#' |
665 | 665 |
#' ## It show all sample present into public dataset 'Example_Dataset1' |
666 | 666 |
#' |
667 |
-#' list <- show_samples_list(remote_url, "public.Example_Dataset1") |
|
667 |
+#' list <- show_samples_list(remote_url, "public.Example_Dataset_1") |
|
668 | 668 |
#' |
669 | 669 |
#' @name show_samples_list |
670 | 670 |
#' @rdname show_samples_list |
... | ... |
@@ -712,7 +712,7 @@ show_samples_list <- function(url,datasetName) |
712 | 712 |
#' |
713 | 713 |
#' ## show schema of public dataset 'Example_Dataset1' |
714 | 714 |
#' |
715 |
-#' list <- show_schema(remote_url, "public.Example_Dataset1") |
|
715 |
+#' list <- show_schema(remote_url, "public.Example_Dataset_1") |
|
716 | 716 |
#' |
717 | 717 |
#' @name show_schema |
718 | 718 |
#' @rdname show_schema |
... | ... |
@@ -937,12 +937,12 @@ delete_dataset <- function(url,datasetName) |
937 | 937 |
#' |
938 | 938 |
#' remote_url = "http://genomic.deib.polimi.it/gmql-rest-r" |
939 | 939 |
#' login_gmql(remote_url) |
940 |
-#' download_dataset(remote_url, "public.Example_Dataset1", path = getwd()) |
|
940 |
+#' download_dataset(remote_url, "public.Example_Dataset_1", path = getwd()) |
|
941 | 941 |
#' |
942 | 942 |
#' ## Create GRangesList from public dataset Example_Dataset1 got |
943 | 943 |
#' ## from repository |
944 | 944 |
#' |
945 |
-#' download_as_GRangesList(remote_url, "public.Example_Dataset1") |
|
945 |
+#' download_as_GRangesList(remote_url, "public.Example_Dataset_1") |
|
946 | 946 |
#' } |
947 | 947 |
#' |
948 | 948 |
#' @name download_dataset |
... | ... |
@@ -1031,7 +1031,7 @@ download_as_GRangesList <- function(url,datasetName) |
1031 | 1031 |
#' ## This statement retrieves metadata for sample 'S_00000' from public |
1032 | 1032 |
#' ## dataset 'Example_Dataset1' |
1033 | 1033 |
#' |
1034 |
-#' sample_metadata(remote_url, "public.Example_Dataset1", "S_00000") |
|
1034 |
+#' sample_metadata(remote_url, "public.Example_Dataset_1", "S_00000") |
|
1035 | 1035 |
#' |
1036 | 1036 |
#' |
1037 | 1037 |
#' @name sample_metadata |
... | ... |
@@ -1095,7 +1095,7 @@ sample_metadata <- function(url, datasetName,sampleName) |
1095 | 1095 |
#' ## This statement retrieves regions data for sample "S_00000" from public |
1096 | 1096 |
#' ## dataset "Example_Dataset1" |
1097 | 1097 |
#' |
1098 |
-#' sample_region(remote_url, "public.Example_Dataset1", "S_00000") |
|
1098 |
+#' sample_region(remote_url, "public.Example_Dataset_1", "S_00000") |
|
1099 | 1099 |
#' |
1100 | 1100 |
#' } |
1101 | 1101 |
#' |
... | ... |
@@ -40,12 +40,12 @@ If error occurs, a specific error is printed |
40 | 40 |
|
41 | 41 |
remote_url = "http://genomic.deib.polimi.it/gmql-rest-r" |
42 | 42 |
login_gmql(remote_url) |
43 |
-download_dataset(remote_url, "public.Example_Dataset1", path = getwd()) |
|
43 |
+download_dataset(remote_url, "public.Example_Dataset_1", path = getwd()) |
|
44 | 44 |
|
45 | 45 |
## Create GRangesList from public dataset Example_Dataset1 got |
46 | 46 |
## from repository |
47 | 47 |
|
48 |
-download_as_GRangesList(remote_url, "public.Example_Dataset1") |
|
48 |
+download_as_GRangesList(remote_url, "public.Example_Dataset_1") |
|
49 | 49 |
} |
50 | 50 |
|
51 | 51 |
} |
... | ... |
@@ -75,8 +75,8 @@ init_gmql() |
75 | 75 |
test_path <- system.file("example","DATASET",package = "RGMQL") |
76 | 76 |
exp = read_GMQL(test_path) |
77 | 77 |
|
78 |
-## This GMQL statement groups samples of the input 'exp' dataset according to |
|
79 |
-## their value of the metadata attribute 'tumor_type' and computes the |
|
78 |
+## This GMQL statement groups samples of the input 'exp' dataset according |
|
79 |
+## to their value of the metadata attribute 'tumor_type' and computes the |
|
80 | 80 |
## maximum value that the metadata attribute size takes inside the samples |
81 | 81 |
## belonging to each group. The samples in the output GROUPS_T dataset |
82 | 82 |
## have a new _group metadata attribute which indicates which group they |
... | ... |
@@ -93,17 +93,17 @@ meta_aggregates = list(max_size = MAX("size"))) |
93 | 93 |
## grouping attribute 'cell', and adds the metadata aggregate attribute |
94 | 94 |
## 'n_samp', which counts the number of samples belonging to the respective |
95 | 95 |
## group. It has the following output GROUPS_C dataset samples |
96 |
-## (note that now no sample has metadata attribute _group with value equal 0 |
|
97 |
-## since all input samples include the metadata attribute cell, |
|
96 |
+## (note that now no sample has metadata attribute _group with value |
|
97 |
+## equal 0 since all input samples include the metadata attribute cell, |
|
98 | 98 |
## with different values, on which the new grouping is based) |
99 | 99 |
|
100 | 100 |
GROUPS_C = group_by(exp, conds("cell"), |
101 |
-meta_aggregates = list(n_samp AS COUNTSAMP())) |
|
101 |
+meta_aggregates = list(n_samp = COUNTSAMP())) |
|
102 | 102 |
|
103 | 103 |
## This GMQL statement groups the regions of each 'exp' dataset sample by |
104 | 104 |
## region coordinates chr, left, right, strand (these are implicitly |
105 |
-## considered) and the additional region attribute score (which is explicitly |
|
106 |
-## specified), and keeps only one region for each group. |
|
105 |
+## considered) and the additional region attribute score (which is |
|
106 |
+## explicitly specified), and keeps only one region for each group. |
|
107 | 107 |
## In the output GROUPS dataset schema, the new region attributes |
108 | 108 |
## avg_pvalue and max_qvalue are added, respectively computed as the |
109 | 109 |
## average of the values taken by the pvalue and the maximum of the values |
... | ... |
@@ -112,7 +112,7 @@ meta_aggregates = list(n_samp AS COUNTSAMP())) |
112 | 112 |
## Note that the region attributes which are not coordinates or score are |
113 | 113 |
## discarded. |
114 | 114 |
|
115 |
-GROUPS = group_by(exp, group_reg = "score", |
|
115 |
+GROUPS = group_by(exp, groupBy_regions = "score", |
|
116 | 116 |
region_aggregates = list(avg_pvalue = AVG("pvalue"), |
117 | 117 |
max_qvalue = MAX("qvalue"))) |
118 | 118 |
|
... | ... |
@@ -77,6 +77,6 @@ dataPeak = read_GMQL(test_path,"NarrowPeakParser") |
77 | 77 |
|
78 | 78 |
remote_url = "http://genomic.deib.polimi.it/gmql-rest-r/" |
79 | 79 |
login_gmql(remote_url) |
80 |
-data1 = read_GMQL("public.Example_Dataset1",is_local = FALSE) |
|
80 |
+data1 = read_GMQL("public.Example_Dataset_1",is_local = FALSE) |
|
81 | 81 |
|
82 | 82 |
} |
... | ... |
@@ -22,8 +22,9 @@ After invoking collect() it is not possbile to switch the processing mode. |
22 | 22 |
} |
23 | 23 |
\examples{ |
24 | 24 |
|
25 |
-## These statements initializes GMQL with local processing with sample files |
|
26 |
-## output format as tab delimited and then change processing mode to remote |
|
25 |
+## These statements initializes GMQL with local processing with sample |
|
26 |
+## files output format as tab delimited and then change processing mode |
|
27 |
+## to remote |
|
27 | 28 |
|
28 | 29 |
init_gmql("tab", remote_processing = FALSE) |
29 | 30 |
|
... | ... |
@@ -33,7 +33,7 @@ login_gmql(remote_url) |
33 | 33 |
## This statement retrieves metadata for sample 'S_00000' from public |
34 | 34 |
## dataset 'Example_Dataset1' |
35 | 35 |
|
36 |
-sample_metadata(remote_url, "public.Example_Dataset1", "S_00000") |
|
36 |
+sample_metadata(remote_url, "public.Example_Dataset_1", "S_00000") |
|
37 | 37 |
|
38 | 38 |
|
39 | 39 |
} |
... | ... |
@@ -39,7 +39,7 @@ login_gmql(remote_url) |
39 | 39 |
## This statement retrieves regions data for sample "S_00000" from public |
40 | 40 |
## dataset "Example_Dataset1" |
41 | 41 |
|
42 |
-sample_region(remote_url, "public.Example_Dataset1", "S_00000") |
|
42 |
+sample_region(remote_url, "public.Example_Dataset_1", "S_00000") |
|
43 | 43 |
|
44 | 44 |
} |
45 | 45 |
|
... | ... |
@@ -89,17 +89,5 @@ regions_update = list(new_score = (score / 1000.0) + 100), |
89 | 89 |
regions = c("score"), all_but_reg = TRUE) |
90 | 90 |
|
91 | 91 |
|
92 |
-## It produces an output dataset that contains the same samples |
|
93 |
-## as the input dataset. |
|
94 |
-## Each output sample only contains, as region attributes, |
|
95 |
-## the four basic coordinates (chr, left, right, strand) and the specified |
|
96 |
-## region attributes 'variant_classification' and 'variant_type', |
|
97 |
-## and as metadata attributes only the specified ones, |
|
98 |
-## i.e. manually_curated_tissue_status and manually_curated_tumor_tag. |
|
99 |
- |
|
100 |
-DS_out = select(data, regions = c("variant_classification", |
|
101 |
-"variant_type"), metadata = c("manually_curated_tissue_status", |
|
102 |
-"manually_curated_tumor_tag")) |
|
103 |
- |
|
104 | 92 |
|
105 | 93 |
} |
... | ... |
@@ -14,8 +14,8 @@ Stop GMQL server |
14 | 14 |
} |
15 | 15 |
\examples{ |
16 | 16 |
|
17 |
-## These statements initializes GMQL with local processing with sample files |
|
18 |
-## output format as tab delimited and then stop it |
|
17 |
+## These statements initializes GMQL with local processing with sample |
|
18 |
+## files output format as tab delimited and then stop it |
|
19 | 19 |
|
20 | 20 |
init_gmql("tab", FALSE) |
21 | 21 |
|