... | ... |
@@ -63,7 +63,9 @@ Imports: |
63 | 63 |
methods, |
64 | 64 |
S4Vectors, |
65 | 65 |
dplyr, |
66 |
- stats |
|
66 |
+ stats, |
|
67 |
+ glue, |
|
68 |
+ BiocGenerics |
|
67 | 69 |
Depends: |
68 | 70 |
R(<= 3.4.2) |
69 | 71 |
VignetteBuilder: knitr |
... | ... |
@@ -92,6 +94,7 @@ Collate: |
92 | 94 |
'gmql_cover.R' |
93 | 95 |
'gmql_difference.R' |
94 | 96 |
'gmql_extend.R' |
97 |
+ 'gmql_group.R' |
|
95 | 98 |
'gmql_join.R' |
96 | 99 |
'gmql_map.R' |
97 | 100 |
'gmql_materialize.R' |
... | ... |
@@ -67,6 +67,7 @@ exportMethods(collect) |
67 | 67 |
exportMethods(cover) |
68 | 68 |
exportMethods(extend) |
69 | 69 |
exportMethods(filter) |
70 |
+exportMethods(group_by) |
|
70 | 71 |
exportMethods(map) |
71 | 72 |
exportMethods(merge) |
72 | 73 |
exportMethods(select) |
... | ... |
@@ -78,10 +79,16 @@ import(httr) |
78 | 79 |
import(xml2) |
79 | 80 |
importClassesFrom(GenomicRanges,GRangesList) |
80 | 81 |
importClassesFrom(S4Vectors,DataTable) |
82 |
+importFrom(BiocGenerics,setdiff) |
|
83 |
+importFrom(BiocGenerics,union) |
|
81 | 84 |
importFrom(GenomicRanges,makeGRangesFromDataFrame) |
85 |
+importFrom(S4Vectors,aggregate) |
|
86 |
+importFrom(S4Vectors,merge) |
|
82 | 87 |
importFrom(S4Vectors,metadata) |
83 | 88 |
importFrom(data.table,fread) |
84 | 89 |
importFrom(dplyr,bind_cols) |
90 |
+importFrom(dplyr,filter) |
|
91 |
+importFrom(glue,glue) |
|
85 | 92 |
importFrom(methods,is) |
86 | 93 |
importFrom(methods,isClass) |
87 | 94 |
importFrom(methods,new) |
... | ... |
@@ -1,15 +1,3 @@ |
1 |
-#' Method aggregate |
|
2 |
-#' |
|
3 |
-#' Wrapper to GMQL merge function |
|
4 |
-#' |
|
5 |
-#' @name aggregate |
|
6 |
-#' @rdname aggregate-GMQLDataset-method |
|
7 |
-#' @aliases aggregate |
|
8 |
-#' |
|
9 |
-setGeneric("aggregate", function(x, ...) |
|
10 |
- standardGeneric("aggregate")) |
|
11 |
- |
|
12 |
- |
|
13 | 1 |
#' Method filter |
14 | 2 |
#' |
15 | 3 |
#' Wrapper to GMQL select function |
... | ... |
@@ -71,7 +59,6 @@ setGeneric("take", function(data, ...) standardGeneric("take")) |
71 | 59 |
#' @aliases extend GMQLDataset-method |
72 | 60 |
setGeneric("extend", function(.data, ...) standardGeneric("extend")) |
73 | 61 |
|
74 |
- |
|
75 | 62 |
#' Method select |
76 | 63 |
#' |
77 | 64 |
#' Wrapper to GMQL project function |
... | ... |
@@ -81,6 +68,7 @@ setGeneric("extend", function(.data, ...) standardGeneric("extend")) |
81 | 68 |
#' @aliases select |
82 | 69 |
setGeneric("select", function(.data, ...) standardGeneric("select")) |
83 | 70 |
|
71 |
+ |
|
84 | 72 |
#' Method arrange |
85 | 73 |
#' |
86 | 74 |
#' Wrapper to GMQL order function |
... | ... |
@@ -94,3 +82,12 @@ setGeneric("arrange", function(.data, metadata_ordering = NULL, |
94 | 82 |
reg_fetch_opt = NULL, reg_num_fetch = 0, ...) |
95 | 83 |
standardGeneric("arrange")) |
96 | 84 |
|
85 |
+#' Method group_by |
|
86 |
+#' |
|
87 |
+#' Wrapper to GMQL group function |
|
88 |
+#' |
|
89 |
+#' @name group_by |
|
90 |
+#' @rdname group_by-GMQLDataset-method |
|
91 |
+#' @aliases group_by |
|
92 |
+setGeneric("group_by", function(.data, ...) standardGeneric("group_by")) |
|
93 |
+ |
... | ... |
@@ -1,9 +1,9 @@ |
1 |
-#' FILTER AND EXTRACT |
|
1 |
+#' FILTER AND EXTRACT |
|
2 | 2 |
#' |
3 |
-#' This function let user to create a new GRangeslist with fixed information: |
|
4 |
-#' seqnames,ranges ans strand and a variable part made up by the regions |
|
3 |
+#' This function let user to create a new GRangeslist with fixed information: |
|
4 |
+#' seqnames,ranges ans strand and a variable part made up by the regions |
|
5 | 5 |
#' defined as input. |
6 |
-#' The metadata and metadata_prefix are used to filter the data and choose |
|
6 |
+#' The metadata and metadata_prefix are used to filter the data and choose |
|
7 | 7 |
#' only the samples that match at least one metdatata with its prefix. |
8 | 8 |
#' The input regions are shown for each sample obtained from filtering. |
9 | 9 |
#' |
... | ... |
@@ -12,51 +12,57 @@ |
12 | 12 |
#' @importFrom data.table fread |
13 | 13 |
#' @importFrom rtracklayer import |
14 | 14 |
#' |
15 |
-#' @param data string GMQL dataset folder path or GrangesList |
|
15 |
+#' @param data string GMQL dataset folder path or GrangesList |
|
16 | 16 |
#' object |
17 |
-#' @param metadata vector of character containing names of metadata |
|
17 |
+#' @param metadata vector of character containing names of metadata |
|
18 | 18 |
#' to be searched for in metadata files. |
19 | 19 |
#' data will be extracted if at least one condition is satisfied: |
20 | 20 |
#' this condition will be logically "ANDed" with prefix filtering (see below) |
21 |
-#' if NULL no filtering action occured |
|
21 |
+#' if NULL no filtering action occured |
|
22 | 22 |
#' (i.e every sample will be taken for regions filtering) |
23 |
-#' @param metadata_prefix vector of character that will filter metadata |
|
23 |
+#' @param metadata_prefix vector of character that will filter metadata |
|
24 | 24 |
#' containing rispectively every element of this vector. |
25 | 25 |
#' number of elelment in both vector must match |
26 |
-#' @param regions vector of character that will extract only region |
|
27 |
-#' attribute specified; if NULL no regions will be taken and the output |
|
26 |
+#' @param regions vector of character that will extract only region |
|
27 |
+#' attribute specified; if NULL no regions will be taken and the output |
|
28 | 28 |
#' will be only GRanges made up by the first attribute |
29 | 29 |
#' (seqnames,start,end,strand) |
30 |
+#' @param suffix name for each metadata column of GRanges. by default is the |
|
31 |
+#' "antibody_target". This string is taken from sample metadata file or from |
|
32 |
+#' metadata() associated. If not present, the column name is the name of |
|
33 |
+#' selected regions |
|
30 | 34 |
#' |
31 |
-#' |
|
32 |
-#' @details |
|
33 |
-#' This function works only with datatset or Grangeslist that have the same |
|
35 |
+#' @details |
|
36 |
+#' This function works only with datatset or Grangeslist that have the same |
|
34 | 37 |
#' information about regions attribute (but of course different value) |
35 |
-#' |
|
38 |
+#' In case of Grangeslist data input the function will search for metadata |
|
39 |
+#' into metadata() function associated to Grangeslist. |
|
40 |
+#' |
|
36 | 41 |
#' @return granges with selected regions (if any) in elementMetadata |
37 | 42 |
#' |
38 | 43 |
#' @examples |
39 | 44 |
#' |
40 | 45 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
41 | 46 |
#' filter_and_extract(test_path,regions = c("pvalue", "peak")) |
42 |
-#' |
|
47 |
+#' |
|
43 | 48 |
#' grl = import_gmql(test_path, TRUE) |
44 | 49 |
#' filter_and_extract(grl, regions = c("pvalue", "peak")) |
45 | 50 |
#' |
46 | 51 |
#' |
47 | 52 |
#' @export |
48 | 53 |
#' |
49 |
-filter_and_extract <- function(data, metadata = NULL, |
|
50 |
- metadata_prefix = NULL, regions = NULL) |
|
54 |
+filter_and_extract <- function(data, metadata = NULL, |
|
55 |
+ metadata_prefix = NULL, regions = NULL, |
|
56 |
+ suffix = "antibody_target") |
|
51 | 57 |
{ |
52 | 58 |
if(is(data,"GRangesList")) |
53 |
- .extract_from_GRangesList(data,metadata,metadata_prefix,regions) |
|
59 |
+ .extract_from_GRangesList(data, metadata, metadata_prefix, regions) |
|
54 | 60 |
else |
55 |
- .extract_from_dataset(data,metadata,metadata_prefix,regions) |
|
61 |
+ .extract_from_dataset(data, metadata, metadata_prefix, regions, suffix) |
|
56 | 62 |
} |
57 | 63 |
|
58 |
-.extract_from_dataset <- function(datasetName, metadata = NULL, |
|
59 |
- metadata_prefix = NULL, regions = NULL) |
|
64 |
+.extract_from_dataset <- function(datasetName, metadata, metadata_prefix, |
|
65 |
+ regions, suffix) |
|
60 | 66 |
{ |
61 | 67 |
datasetName <- sub("/*[/]$","",datasetName) |
62 | 68 |
if(basename(datasetName) !="files") |
... | ... |
@@ -64,55 +70,66 @@ filter_and_extract <- function(data, metadata = NULL, |
64 | 70 |
|
65 | 71 |
if(!dir.exists(datasetName)) |
66 | 72 |
stop("Directory does not exists") |
67 |
- |
|
73 |
+ |
|
68 | 74 |
gdm_meta_files <- list.files(datasetName, pattern = "*.gdm.meta$", |
69 | 75 |
full.names = TRUE) |
70 | 76 |
gtf_meta_files <- list.files(datasetName, pattern = "*.gtf.meta$", |
71 | 77 |
full.names = TRUE) |
72 |
- |
|
78 |
+ |
|
73 | 79 |
if(length(gdm_meta_files)==0 && length(gtf_meta_files)==0) |
74 | 80 |
stop("no samples present or no files format supported") |
75 |
- |
|
81 |
+ |
|
76 | 82 |
if(length(gdm_meta_files)>=1 && length(gtf_meta_files)>=1) |
77 | 83 |
stop("GMQL dataset cannot be mixed dataset: no GTF and GDM together") |
78 |
- |
|
84 |
+ |
|
79 | 85 |
vector_field <- .schema_header(datasetName) |
80 |
- |
|
86 |
+ |
|
81 | 87 |
if(length(gdm_meta_files)>0) |
82 | 88 |
{ |
83 |
- samples_file <- .check_metadata_files(metadata,metadata_prefix, |
|
84 |
- gdm_meta_files) |
|
89 |
+ samples_with_suffix <- .check_metadata_files(metadata,metadata_prefix, |
|
90 |
+ gdm_meta_files, suffix) |
|
91 |
+ |
|
92 |
+ samples_file <- sapply(samples_with_suffix, function(x) x$sample) |
|
93 |
+ suffix_vec <- sapply(samples_with_suffix, function(x) x$suffix) |
|
94 |
+ suffixes <- unlist(suffix_vec) |
|
85 | 95 |
samples_to_read <- unlist(samples_file) |
96 |
+ |
|
86 | 97 |
if(length(samples_to_read)>0) |
87 | 98 |
samples_to_read <- gsub(".meta$", "", samples_to_read) |
88 | 99 |
else |
89 | 100 |
samples_to_read <- gsub(".meta$", "", gdm_meta_files) |
90 |
- |
|
91 |
- granges <- .parse_gdm_files(vector_field,samples_to_read,regions) |
|
101 |
+ |
|
102 |
+ granges <- .parse_gdm_files(vector_field,samples_to_read,regions, |
|
103 |
+ suffixes) |
|
92 | 104 |
} |
93 | 105 |
else |
94 | 106 |
{ |
95 |
- samples_file <- .check_metadata_files(metadata,metadata_prefix, |
|
96 |
- gtf_meta_files) |
|
107 |
+ samples_with_suffix <- .check_metadata_files(metadata,metadata_prefix, |
|
108 |
+ gtf_meta_files, suffix) |
|
109 |
+ |
|
110 |
+ samples_file <- sapply(samples_with_suffix, function(x) x$sample) |
|
111 |
+ suffix_vec <- sapply(samples_with_suffix, function(x) x$suffix) |
|
112 |
+ suffixes <- unlist(suffix_vec) |
|
97 | 113 |
samples_to_read <- unlist(samples_file) |
114 |
+ |
|
98 | 115 |
if(length(samples_to_read)>0) |
99 | 116 |
samples_to_read <- gsub(".meta$", "", samples_to_read) |
100 | 117 |
else |
101 | 118 |
samples_to_read <- gsub(".meta$", "", gtf_meta_files) |
102 |
- |
|
103 |
- granges <- .parse_gtf_files(samples_to_read,regions) |
|
119 |
+ |
|
120 |
+ granges <- .parse_gtf_files(samples_to_read, regions, suffixes) |
|
104 | 121 |
} |
105 | 122 |
} |
106 | 123 |
|
107 |
-.extract_from_GRangesList <- function(rangesList, metadata = NULL, |
|
108 |
- metadata_prefix = NULL, regions = NULL) |
|
124 |
+.extract_from_GRangesList <- function(rangesList, metadata, metadata_prefix, |
|
125 |
+ regions) |
|
109 | 126 |
{ |
110 | 127 |
if(!is(rangesList,"GRangesList")) |
111 | 128 |
stop("only GrangesList admitted") |
112 |
- |
|
129 |
+ |
|
113 | 130 |
if(length(rangesList)<=0) |
114 | 131 |
stop("rangesList empty") |
115 |
- |
|
132 |
+ |
|
116 | 133 |
meta_list <- metadata(rangesList) |
117 | 134 |
samples <- .check_metadata_list(metadata, metadata_prefix,meta_list) |
118 | 135 |
if(length(unlist(samples))<=0) |
... | ... |
@@ -156,75 +173,92 @@ filter_and_extract <- function(data, metadata = NULL, |
156 | 173 |
}, meta_list, seq_along(meta_list)) |
157 | 174 |
} |
158 | 175 |
|
159 |
-.check_metadata_files <- function(metadata,metadata_prefix,meta_files) |
|
176 |
+.check_metadata_files <- function(metadata,metadata_prefix,meta_files,col_name) |
|
160 | 177 |
{ |
178 |
+ suffix <- paste0(col_name,"$") |
|
161 | 179 |
vec_meta <- paste0(metadata_prefix,metadata) |
162 | 180 |
meta_list <- lapply(meta_files, function(x){ |
163 | 181 |
list <- .add_metadata(x) |
164 | 182 |
vec_names <- names(list) |
165 | 183 |
a <- sapply(vec_meta, function(y) { |
166 |
- grep(y,vec_names) |
|
184 |
+ grep(y,vec_names) |
|
167 | 185 |
}) |
168 |
- |
|
169 | 186 |
## we would like that manage more index from grep |
170 | 187 |
found <- as.logical(length(unlist(a))) |
188 |
+ index <- grep(suffix,vec_names) |
|
189 |
+ suffix <- unlist(list[index])[1] # ne prendo solo uno |
|
190 |
+ names(suffix) <- NULL |
|
171 | 191 |
#if found retrieve samples that has at least one choosen metadata |
172 |
- if(found){x} |
|
192 |
+ if(found) |
|
193 |
+ list("sample" = x, "suffix" = suffix ) |
|
194 |
+ else |
|
195 |
+ list("sample" = NULL, "suffix" = suffix ) |
|
196 |
+ |
|
173 | 197 |
}) |
174 | 198 |
} |
175 | 199 |
|
176 | 200 |
|
177 |
-.parse_gtf_files <- function(gtf_region_files,regions) |
|
201 |
+.parse_gtf_files <- function(gtf_region_files, regions, suffixes) |
|
178 | 202 |
{ |
179 | 203 |
g1 <- rtracklayer::import(con = gtf_region_files[1], format = "gtf") |
180 | 204 |
elementMetadata(g1) <- NULL |
205 |
+ if(is.null(suffixes)) |
|
206 |
+ suffixes = "" |
|
207 |
+ |
|
181 | 208 |
if(!is.null(regions)) |
182 | 209 |
{ |
183 |
- DF_list <- lapply(gtf_region_files, function(x){ |
|
210 |
+ DF_list <- mapply(function(x,h){ |
|
184 | 211 |
g_x <- rtracklayer::import(con = x, format = "gtf") |
185 | 212 |
meta <- elementMetadata(g_x)[regions] |
213 |
+ if(h!="") |
|
214 |
+ names(meta) <- paste(regions,h,sep = ".") |
|
186 | 215 |
data.frame(meta) |
187 |
- }) |
|
216 |
+ },gtf_region_files, suffixes, SIMPLIFY = FALSE) |
|
188 | 217 |
DF_only_regions <- dplyr::bind_cols(DF_list) |
189 | 218 |
elementMetadata(g1) <- DF_only_regions |
190 | 219 |
} |
191 | 220 |
g1 |
192 | 221 |
} |
193 | 222 |
|
194 |
-.parse_gdm_files <- function(vector_field,gdm_region_files,regions) |
|
223 |
+.parse_gdm_files <- function(vector_field,gdm_region_files,regions,suffixes) |
|
195 | 224 |
{ |
196 | 225 |
#read first sample cause chromosome regions are the same for all samples |
197 | 226 |
df <- data.table::fread(gdm_region_files[1],col.names = vector_field, |
198 |
- header = FALSE,sep = '\t') |
|
227 |
+ header = FALSE,sep = '\t') |
|
199 | 228 |
col_names <- names(df) |
200 |
- df <- df[c("chr","left","right","strand")] |
|
201 |
- |
|
229 |
+ df <- subset(df, TRUE, c("chr","left","right","strand")) |
|
230 |
+ |
|
202 | 231 |
if(!is.null(regions)) |
203 | 232 |
{ |
204 | 233 |
df_list <- lapply(gdm_region_files,function(x,regions,vector_field){ |
205 | 234 |
region_frame <- data.table::fread(x,col.names = vector_field, |
206 |
- header = FALSE,sep = '\t') |
|
235 |
+ header = FALSE,sep = '\t') |
|
207 | 236 |
col_names <- names(region_frame) |
208 | 237 |
#delete column not choosen by input |
209 | 238 |
if(!is.null(regions)) |
210 |
- col_names <- col_names[col_names %in% regions] |
|
211 |
- r <- region_frame[col_names] |
|
239 |
+ col_names <- col_names[col_names %in% regions] |
|
240 |
+ |
|
241 |
+ if(length(col_names)!=0) |
|
242 |
+ r <- subset(region_frame,TRUE,col_names) |
|
212 | 243 |
},regions,vector_field) |
213 | 244 |
|
214 |
- |
|
215 | 245 |
df_only_regions <- dplyr::bind_cols(df_list) |
216 | 246 |
complete_df <- dplyr::bind_cols(df,df_only_regions) |
247 |
+ |
|
248 |
+ region_names <- names(complete_df)[-(1:4)] |
|
249 |
+ region_names <- gsub('[0-9]+', '',region_names) |
|
250 |
+ region_names <- paste(region_names,suffixes,sep = ".") |
|
251 |
+ region_names <- c(names(complete_df)[(1:4)],region_names ) |
|
252 |
+ names(complete_df) <- region_names |
|
217 | 253 |
g <- GenomicRanges::makeGRangesFromDataFrame(complete_df, |
218 |
- keep.extra.columns = TRUE, |
|
219 |
- start.field = "left", |
|
220 |
- end.field = "right") |
|
254 |
+ keep.extra.columns = TRUE, |
|
255 |
+ start.field = "left", |
|
256 |
+ end.field = "right") |
|
221 | 257 |
} |
222 | 258 |
else |
223 |
- g <- GenomicRanges::makeGRangesFromDataFrame(df, |
|
224 |
- keep.extra.columns = TRUE, |
|
225 |
- start.field = "left", |
|
226 |
- end.field = "right") |
|
227 |
- |
|
259 |
+ g <- GenomicRanges::makeGRangesFromDataFrame(df, |
|
260 |
+ keep.extra.columns = TRUE, |
|
261 |
+ start.field = "left", |
|
262 |
+ end.field = "right") |
|
263 |
+ |
|
228 | 264 |
} |
229 |
- |
|
230 |
- |
... | ... |
@@ -87,7 +87,7 @@ check.DISTAL <- function(value) |
87 | 87 |
#' # and HM samples are obtained from the same provider (joinby clause). |
88 | 88 |
#' |
89 | 89 |
#' join_data = merge(TSS, HM, |
90 |
-#' genometric_predicate = list(list(MD(1), DL(1200))), DF("provider"), |
|
90 |
+#' genometric_predicate = list(MD(1), DL(1200)), DF("provider"), |
|
91 | 91 |
#' region_output = "RIGHT") |
92 | 92 |
#' |
93 | 93 |
#' #' # Given a dataset 'hm' and one called 'tss' with a sample including |
... | ... |
@@ -98,7 +98,7 @@ check.DISTAL <- function(value) |
98 | 98 |
#' # from the same provider (joinby clause). |
99 | 99 |
#' |
100 | 100 |
#' join_data = merge(TSS, HM, |
101 |
-#' genometric_predicate = list(list(MD(1), DGE(12000), DOWN())), |
|
101 |
+#' genometric_predicate = list(MD(1), DGE(12000), DOWN()), |
|
102 | 102 |
#' DF("provider"), region_output = "RIGHT") |
103 | 103 |
#' |
104 | 104 |
#' @name DL |
... | ... |
@@ -23,7 +23,7 @@ |
23 | 23 |
schema_name_xml <- list.files(datasetName, pattern = "*.xml$", |
24 | 24 |
full.names = TRUE) |
25 | 25 |
|
26 |
- if(length(schema_name)==0 || length(schema_name_xml) == 0) |
|
26 |
+ if(length(schema_name)==0 && length(schema_name_xml) == 0) |
|
27 | 27 |
stop("schema not present") |
28 | 28 |
|
29 | 29 |
xml_schema <- xml2::read_xml(schema_name) |
... | ... |
@@ -12,9 +12,8 @@ |
12 | 12 |
#' only those samples that have the same value for each attribute |
13 | 13 |
#' are considered when performing the difference. |
14 | 14 |
#' |
15 |
-#' @importFrom rJava J |
|
16 |
-#' @importFrom rJava .jnull |
|
17 |
-#' @importFrom rJava .jarray |
|
15 |
+#' @importFrom rJava J .jnull .jarray |
|
16 |
+#' @importFrom BiocGenerics setdiff |
|
18 | 17 |
#' |
19 | 18 |
#' @param x GMQLDataset class object |
20 | 19 |
#' @param y GMQLDataset class object |
21 | 20 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,79 @@ |
1 |
+#' Method group_by |
|
2 |
+#' |
|
3 |
+#' |
|
4 |
+#' @importFrom rJava J .jarray .jnull |
|
5 |
+#' |
|
6 |
+#' |
|
7 |
+#' @param .data GMQLDataset object |
|
8 |
+#' |
|
9 |
+#' @return GMQLDataset object. It contains the value to use as input |
|
10 |
+#' for the subsequent GMQLDataset method |
|
11 |
+#' |
|
12 |
+#' @examples |
|
13 |
+#' |
|
14 |
+#' |
|
15 |
+#' |
|
16 |
+#' @aliases group_by group_by-method |
|
17 |
+#' @export |
|
18 |
+setMethod("group_by","GMQLDataset", |
|
19 |
+ function(.data, groupBy_meta = NULL, groupBy_regions = NULL, |
|
20 |
+ region_aggregates = NULL, meta_aggregates = NULL) |
|
21 |
+ { |
|
22 |
+ ptr_data = .data@value |
|
23 |
+ gmql_group(ptr_data, groupBy_meta, groupBy_regions, |
|
24 |
+ region_aggregates, meta_aggregates) |
|
25 |
+ }) |
|
26 |
+ |
|
27 |
+gmql_group <- function(input_data, group_meta, group_reg) |
|
28 |
+{ |
|
29 |
+ |
|
30 |
+ if(!is.null(group_meta)) |
|
31 |
+ { |
|
32 |
+ cond <- .join_condition(group_meta) |
|
33 |
+ join_condition_matrix <- .jarray(cond, dispatch = TRUE) |
|
34 |
+ } |
|
35 |
+ else |
|
36 |
+ join_condition_matrix <- .jnull("java/lang/String") |
|
37 |
+ |
|
38 |
+ if(!is.null(group_reg)) |
|
39 |
+ { |
|
40 |
+ if(!is.character(group_reg)) |
|
41 |
+ stop("metadata: no valid input") |
|
42 |
+ |
|
43 |
+ group_reg <- group_reg[!group_reg %in% ""] |
|
44 |
+ group_reg <- group_reg[!duplicated(group_reg)] |
|
45 |
+ |
|
46 |
+ if(length(group_reg)==0) |
|
47 |
+ group_reg <- .jnull("java/lang/String") |
|
48 |
+ |
|
49 |
+ group_reg <- .jarray(metadata) |
|
50 |
+ } |
|
51 |
+ else |
|
52 |
+ group_reg <- .jnull("java/lang/String") |
|
53 |
+ |
|
54 |
+ if(!is.null(meta_aggregates) && !length(meta_aggregates) == 0) |
|
55 |
+ { |
|
56 |
+ aggr <- .aggregates(meta_aggregates,"AGGREGATES") |
|
57 |
+ metadata_matrix <- .jarray(aggr, dispatch = TRUE) |
|
58 |
+ } |
|
59 |
+ else |
|
60 |
+ metadata_matrix <- .jnull("java/lang/String") |
|
61 |
+ |
|
62 |
+ if(!is.null(region_aggregates) && !length(region_aggregates) == 0) |
|
63 |
+ { |
|
64 |
+ aggr <- .aggregates(region_aggregates,"AGGREGATES") |
|
65 |
+ region_matrix <- .jarray(aggr, dispatch = TRUE) |
|
66 |
+ } |
|
67 |
+ else |
|
68 |
+ region_matrix <- .jnull("java/lang/String") |
|
69 |
+ |
|
70 |
+ WrappeR <- J("it/polimi/genomics/r/Wrapper") |
|
71 |
+ response <- WrappeR$group(join_condition_matrix, metadata_matrix, |
|
72 |
+ groupBy_regions, region_matrix, input_data) |
|
73 |
+ error <- strtoi(response[1]) |
|
74 |
+ data <- response[2] |
|
75 |
+ if(error!=0) |
|
76 |
+ stop(data) |
|
77 |
+ else |
|
78 |
+ GMQLDataset(data) |
|
79 |
+} |
... | ... |
@@ -10,9 +10,8 @@ |
10 | 10 |
#' The output metadata are the union of the input metadata, |
11 | 11 |
#' with their attribute names prefixed with left or right respectively. |
12 | 12 |
#' |
13 |
-#' @importFrom rJava .jnull |
|
14 |
-#' @importFrom rJava J |
|
15 |
-#' @importFrom rJava .jarray |
|
13 |
+#' @importFrom rJava J .jnull .jarray |
|
14 |
+#' @importFrom S4Vectors merge |
|
16 | 15 |
#' |
17 | 16 |
#' @param x GMQLDataset class object |
18 | 17 |
#' @param y GMQLDataset class object |
... | ... |
@@ -106,7 +105,7 @@ gmql_join <- function(left_data, right_data, genometric_predicate, joinBy, |
106 | 105 |
|
107 | 106 |
if(!all(sapply(genometric_predicate, function(x) {is(x,"DISTAL")} ))) |
108 | 107 |
stop("All elements should be DISTAL object") |
109 |
- |
|
108 |
+ |
|
110 | 109 |
genomatrix <- t(sapply(genometric_predicate, function(x) { |
111 | 110 |
new_value = as.character(x) |
112 | 111 |
array <- c(new_value) |
... | ... |
@@ -39,7 +39,7 @@ execute <- function() |
39 | 39 |
{ |
40 | 40 |
url <- WrappeR$get_url() |
41 | 41 |
.download_or_upload() |
42 |
- serialize_query(url,FALSE,data) |
|
42 |
+ res <- serialize_query(url,FALSE,data) |
|
43 | 43 |
} |
44 | 44 |
} |
45 | 45 |
} |
... | ... |
@@ -107,13 +107,18 @@ setMethod("collect", "GMQLDataset", |
107 | 107 |
|
108 | 108 |
gmql_materialize <- function(input_data, dir_out, name) |
109 | 109 |
{ |
110 |
- dir_out <- sub("/*[/]$","",dir_out) |
|
111 |
- |
|
112 |
- res_dir_out <- paste0(dir_out,"/",name) |
|
113 |
- if(!dir.exists(res_dir_out)) |
|
114 |
- dir.create(res_dir_out) |
|
115 |
- |
|
116 | 110 |
WrappeR <- J("it/polimi/genomics/r/Wrapper") |
111 |
+ remote_proc <- WrappeR$is_remote_processing() |
|
112 |
+ if(!remote_proc) |
|
113 |
+ { |
|
114 |
+ dir_out <- sub("/*[/]$","",dir_out) |
|
115 |
+ res_dir_out <- paste0(dir_out,"/",name) |
|
116 |
+ if(!dir.exists(res_dir_out)) |
|
117 |
+ dir.create(res_dir_out) |
|
118 |
+ } |
|
119 |
+ else |
|
120 |
+ res_dir_out <- dir_out |
|
121 |
+ |
|
117 | 122 |
response <- WrappeR$materialize(input_data, res_dir_out) |
118 | 123 |
error <- strtoi(response[1]) |
119 | 124 |
data <- response[2] |
... | ... |
@@ -10,9 +10,8 @@ |
10 | 10 |
#' Samples whose names are not present in the grouping metadata parameter |
11 | 11 |
#' are disregarded. |
12 | 12 |
#' |
13 |
-#' @importFrom rJava J |
|
14 |
-#' @importFrom rJava .jnull |
|
15 |
-#' @importFrom rJava .jarray |
|
13 |
+#' @importFrom rJava J .jarray .jnull |
|
14 |
+#' @importFrom S4Vectors aggregate |
|
16 | 15 |
#' |
17 | 16 |
#' @param x GMQLDataset class object |
18 | 17 |
#' @param ... Additional arguments for use in specific methods. |
... | ... |
@@ -244,7 +244,7 @@ We provide two metadata for you") |
244 | 244 |
{ |
245 | 245 |
col_names <- plyr::revalue(col_names,c(start = "left", |
246 | 246 |
end = "right", seqnames = "chr")) |
247 |
- schema_matrix <- cbind(toupper(col_types),col_names) |
|
247 |
+ schema_matrix <- cbind(col_names,toupper(col_types)) |
|
248 | 248 |
schema_matrix<- schema_matrix[setdiff(rownames(schema_matrix), |
249 | 249 |
c("group","width")),] |
250 | 250 |
} |
... | ... |
@@ -10,10 +10,10 @@ |
10 | 10 |
#' If no metadata in common between input dataset and semijoin dataset, |
11 | 11 |
#' no sample is extracted. |
12 | 12 |
#' |
13 |
-#' @importFrom rJava J |
|
14 |
-#' @importFrom rJava .jnull |
|
15 |
-#' @importFrom rJava .jarray |
|
13 |
+#' @importFrom rJava J .jnull .jarray |
|
16 | 14 |
#' @importFrom methods isClass |
15 |
+#' @importFrom glue glue |
|
16 |
+#' @importFrom dplyr filter |
|
17 | 17 |
#' |
18 | 18 |
#' @param .data GMQLDataset class object |
19 | 19 |
#' @param m_predicate logical predicate made up by R logical operation |
... | ... |
@@ -69,31 +69,34 @@ |
69 | 69 |
#' @aliases filter filter-method |
70 | 70 |
#' @export |
71 | 71 |
setMethod("filter", "GMQLDataset", |
72 |
- function(.data, m_predicate = NULL, r_predicate = NULL, |
|
73 |
- semijoin = NULL, ...) |
|
72 |
+ function(.data, m_predicate = NULL, r_predicate = NULL, |
|
73 |
+ semijoin = NULL, ...) |
|
74 |
+ { |
|
75 |
+ val <- .data@value |
|
76 |
+ meta_pred <- substitute(m_predicate) |
|
77 |
+ if(!is.null(meta_pred)) |
|
74 | 78 |
{ |
75 |
- val <- .data@value |
|
76 |
- meta_pred <- substitute(m_predicate) |
|
77 |
- if(!is.null(meta_pred)) |
|
78 |
- { |
|
79 |
- predicate <- .trasform(deparse(meta_pred)) |
|
80 |
- predicate <- paste(predicate,collapse = "") |
|
81 |
- } |
|
82 |
- else |
|
83 |
- predicate <- .jnull("java/lang/String") |
|
79 |
+ predicate <- .trasform(deparse(meta_pred)) |
|
80 |
+ predicate <- paste(predicate,collapse = "") |
|
81 |
+ predicate <- as.character(glue::glue(predicate)) |
|
84 | 82 |
|
85 |
- reg_pred <- substitute(r_predicate) |
|
86 |
- if(!is.null(reg_pred)) |
|
87 |
- { |
|
88 |
- region_predicate <- .trasform(deparse(reg_pred)) |
|
89 |
- region_predicate <- paste(region_predicate,collapse = "") |
|
90 |
- } |
|
91 |
- else |
|
92 |
- region_predicate <- .jnull("java/lang/String") |
|
93 |
- |
|
94 |
- gmql_select(val, predicate, region_predicate, semijoin) |
|
95 |
- }) |
|
96 |
- |
|
83 |
+ } |
|
84 |
+ else |
|
85 |
+ predicate <- .jnull("java/lang/String") |
|
86 |
+ |
|
87 |
+ reg_pred <- substitute(r_predicate) |
|
88 |
+ if(!is.null(reg_pred)) |
|
89 |
+ { |
|
90 |
+ region_predicate <- .trasform(deparse(reg_pred)) |
|
91 |
+ region_predicate <- paste(region_predicate,collapse = "") |
|
92 |
+ region_predicate <- as.character(glue::glue(region_predicate)) |
|
93 |
+ |
|
94 |
+ } |
|
95 |
+ else |
|
96 |
+ region_predicate <- .jnull("java/lang/String") |
|
97 |
+ |
|
98 |
+ gmql_select(val, predicate, region_predicate, semijoin) |
|
99 |
+ }) |
|
97 | 100 |
|
98 | 101 |
gmql_select <- function(input_data, predicate, region_predicate, s_join) |
99 | 102 |
{ |
... | ... |
@@ -1052,12 +1052,14 @@ sample_metadata <- function(url, datasetName,sampleName) |
1052 | 1052 |
#' |
1053 | 1053 |
sample_region <- function(url, datasetName,sampleName) |
1054 | 1054 |
{ |
1055 |
+ |
|
1055 | 1056 |
url <- sub("/*[/]$","",url) |
1056 | 1057 |
URL <- paste0(url,"/datasets/",datasetName,"/",sampleName,"/region") |
1057 | 1058 |
h <- c('X-Auth-Token' = authToken, 'Accpet' = 'text/plain') |
1058 | 1059 |
req <- httr::GET(URL, httr::add_headers(h)) |
1059 | 1060 |
content <- httr::content(req, 'parsed',encoding = "UTF-8") |
1060 | 1061 |
|
1062 |
+ |
|
1061 | 1063 |
if(req$status_code !=200) |
1062 | 1064 |
stop(content) |
1063 | 1065 |
else |
... | ... |
@@ -1065,6 +1067,8 @@ sample_region <- function(url, datasetName,sampleName) |
1065 | 1067 |
list <- show_schema(url,datasetName) |
1066 | 1068 |
schema_type <- list$type |
1067 | 1069 |
|
1070 |
+ #df <- read.table(textConnection(content),sep = "\t") |
|
1071 |
+ |
|
1068 | 1072 |
temp <- tempfile("temp") #use temporary files |
1069 | 1073 |
write.table(content,temp,quote = FALSE,sep = '\t',col.names = FALSE, |
1070 | 1074 |
row.names = FALSE) |
... | ... |
@@ -1,6 +1,6 @@ |
1 | 1 |
##gff-version 2 |
2 | 2 |
##source-version rtracklayer 1.36.6 |
3 |
-##date 2017-11-19 |
|
3 |
+##date 2017-12-04 |
|
4 | 4 |
chr1 rtracklayer sequence_feature 11873 12227 0 + . name "NR_046018" |
5 | 5 |
chr1 rtracklayer sequence_feature 12612 12721 0 + . name "NR_046018" |
6 | 6 |
chr1 rtracklayer sequence_feature 917444 917497 0 - . name "NM_001291366" |
... | ... |
@@ -1,14 +1,11 @@ |
1 | 1 |
% Generated by roxygen2: do not edit by hand |
2 |
-% Please edit documentation in R/AllGenerics.R, R/gmql_merge.R |
|
2 |
+% Please edit documentation in R/gmql_merge.R |
|
3 | 3 |
\docType{methods} |
4 |
-\name{aggregate} |
|
5 |
-\alias{aggregate} |
|
4 |
+\name{aggregate,GMQLDataset-method} |
|
6 | 5 |
\alias{aggregate,GMQLDataset-method} |
7 | 6 |
\alias{aggregate-method} |
8 | 7 |
\title{Method aggregate} |
9 | 8 |
\usage{ |
10 |
-aggregate(x, ...) |
|
11 |
- |
|
12 | 9 |
\S4method{aggregate}{GMQLDataset}(x, ...) |
13 | 10 |
} |
14 | 11 |
\arguments{ |
... | ... |
@@ -31,8 +28,6 @@ GMQLDataset object. It contains the value to use as input |
31 | 28 |
for the subsequent GMQLDataset method |
32 | 29 |
} |
33 | 30 |
\description{ |
34 |
-Wrapper to GMQL merge function |
|
35 |
- |
|
36 | 31 |
It builds a dataset consisting of a single sample having as many regions as |
37 | 32 |
the number of regions of the input data and as many metadata as the union of |
38 | 33 |
the 'attribute-value' tuples of the input samples. |
... | ... |
@@ -85,7 +85,7 @@ HM = read_dataset(test_path2) |
85 | 85 |
# and HM samples are obtained from the same provider (joinby clause). |
86 | 86 |
|
87 | 87 |
join_data = merge(TSS, HM, |
88 |
-genometric_predicate = list(list(MD(1), DL(1200))), DF("provider"), |
|
88 |
+genometric_predicate = list(MD(1), DL(1200)), DF("provider"), |
|
89 | 89 |
region_output = "RIGHT") |
90 | 90 |
|
91 | 91 |
#' # Given a dataset 'hm' and one called 'tss' with a sample including |
... | ... |
@@ -96,7 +96,7 @@ region_output = "RIGHT") |
96 | 96 |
# from the same provider (joinby clause). |
97 | 97 |
|
98 | 98 |
join_data = merge(TSS, HM, |
99 |
-genometric_predicate = list(list(MD(1), DGE(12000), DOWN())), |
|
99 |
+genometric_predicate = list(MD(1), DGE(12000), DOWN()), |
|
100 | 100 |
DF("provider"), region_output = "RIGHT") |
101 | 101 |
|
102 | 102 |
} |
... | ... |
@@ -78,7 +78,7 @@ test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
78 | 78 |
data <- read_dataset(test_path) |
79 | 79 |
join_data <- read_dataset(test_path2) |
80 | 80 |
jun_tf <- filter(data, antibody_target == "JUN", pValue < 0.01, |
81 |
-semijoin(join_data, TRUE, DF("cell"))) |
|
81 |
+semijoin(join_data, TRUE, list(DF("cell")))) |
|
82 | 82 |
|
83 | 83 |
} |
84 | 84 |
|
... | ... |
@@ -5,42 +5,49 @@ |
5 | 5 |
\title{FILTER AND EXTRACT} |
6 | 6 |
\usage{ |
7 | 7 |
filter_and_extract(data, metadata = NULL, metadata_prefix = NULL, |
8 |
- regions = NULL) |
|
8 |
+ regions = NULL, suffix = "antibody_target") |
|
9 | 9 |
} |
10 | 10 |
\arguments{ |
11 |
-\item{data}{string GMQL dataset folder path or GrangesList |
|
11 |
+\item{data}{string GMQL dataset folder path or GrangesList |
|
12 | 12 |
object} |
13 | 13 |
|
14 |
-\item{metadata}{vector of character containing names of metadata |
|
14 |
+\item{metadata}{vector of character containing names of metadata |
|
15 | 15 |
to be searched for in metadata files. |
16 | 16 |
data will be extracted if at least one condition is satisfied: |
17 | 17 |
this condition will be logically "ANDed" with prefix filtering (see below) |
18 |
-if NULL no filtering action occured |
|
18 |
+if NULL no filtering action occured |
|
19 | 19 |
(i.e every sample will be taken for regions filtering)} |
20 | 20 |
|
21 |
-\item{metadata_prefix}{vector of character that will filter metadata |
|
21 |
+\item{metadata_prefix}{vector of character that will filter metadata |
|
22 | 22 |
containing rispectively every element of this vector. |
23 | 23 |
number of elelment in both vector must match} |
24 | 24 |
|
25 |
-\item{regions}{vector of character that will extract only region |
|
26 |
-attribute specified; if NULL no regions will be taken and the output |
|
25 |
+\item{regions}{vector of character that will extract only region |
|
26 |
+attribute specified; if NULL no regions will be taken and the output |
|
27 | 27 |
will be only GRanges made up by the first attribute |
28 | 28 |
(seqnames,start,end,strand)} |
29 |
+ |
|
30 |
+\item{suffix}{name for each metadata column of GRanges. by default is the |
|
31 |
+"antibody_target". This string is taken from sample metadata file or from |
|
32 |
+metadata() associated. If not present, the column name is the name of |
|
33 |
+selected regions} |
|
29 | 34 |
} |
30 | 35 |
\value{ |
31 | 36 |
granges with selected regions (if any) in elementMetadata |
32 | 37 |
} |
33 | 38 |
\description{ |
34 |
-This function let user to create a new GRangeslist with fixed information: |
|
35 |
-seqnames,ranges ans strand and a variable part made up by the regions |
|
39 |
+This function let user to create a new GRangeslist with fixed information: |
|
40 |
+seqnames,ranges ans strand and a variable part made up by the regions |
|
36 | 41 |
defined as input. |
37 |
-The metadata and metadata_prefix are used to filter the data and choose |
|
42 |
+The metadata and metadata_prefix are used to filter the data and choose |
|
38 | 43 |
only the samples that match at least one metdatata with its prefix. |
39 | 44 |
The input regions are shown for each sample obtained from filtering. |
40 | 45 |
} |
41 | 46 |
\details{ |
42 |
-This function works only with datatset or Grangeslist that have the same |
|
47 |
+This function works only with datatset or Grangeslist that have the same |
|
43 | 48 |
information about regions attribute (but of course different value) |
49 |
+In case of Grangeslist data input the function will search for metadata |
|
50 |
+into metadata() function associated to Grangeslist. |
|
44 | 51 |
} |
45 | 52 |
\examples{ |
46 | 53 |
|
47 | 54 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,31 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/AllGenerics.R, R/gmql_group.R |
|
3 |
+\docType{methods} |
|
4 |
+\name{group_by} |
|
5 |
+\alias{group_by} |
|
6 |
+\alias{group_by,GMQLDataset-method} |
|
7 |
+\alias{group_by} |
|
8 |
+\alias{group_by-method} |
|
9 |
+\title{Method group_by} |
|
10 |
+\usage{ |
|
11 |
+group_by(.data, ...) |
|
12 |
+ |
|
13 |
+\S4method{group_by}{GMQLDataset}(.data, ...) |
|
14 |
+} |
|
15 |
+\arguments{ |
|
16 |
+\item{.data}{GMQLDataset object} |
|
17 |
+} |
|
18 |
+\value{ |
|
19 |
+GMQLDataset object. It contains the value to use as input |
|
20 |
+for the subsequent GMQLDataset method |
|
21 |
+} |
|
22 |
+\description{ |
|
23 |
+Wrapper to GMQL group function |
|
24 |
+ |
|
25 |
+Method group_by |
|
26 |
+} |
|
27 |
+\examples{ |
|
28 |
+ |
|
29 |
+ |
|
30 |
+ |
|
31 |
+} |
... | ... |
@@ -51,6 +51,7 @@ test_path <- system.file("example", "DATASET", package = "RGMQL") |
51 | 51 |
test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
52 | 52 |
data <- read_dataset(test_path) |
53 | 53 |
join_data <- read_dataset(test_path2) |
54 |
-jun_tf <- filter(data,NULL,NULL, semijoin(join_data, TRUE, DF("cell"))) |
|
54 |
+jun_tf <- filter(data,NULL,NULL, semijoin(join_data, TRUE, |
|
55 |
+list(DF("cell")))) |
|
55 | 56 |
|
56 | 57 |
} |
... | ... |
@@ -390,7 +390,7 @@ Upon succesfull logon, you get a request token that you must use |
390 | 390 |
in every subsequent REST call. |
391 | 391 |
Login can be performed using the function: |
392 | 392 |
```{r, eval = TRUE} |
393 |
-test_url = "http://genomic.elet.polimi.it/gmql-rest-r" |
|
393 |
+test_url = "http://genomic.elet.polimi.it/gmql-rest-r/" |
|
394 | 394 |
login_gmql(test_url) |
395 | 395 |
``` |
396 | 396 |
that saves the token in the Global R environment within the variable |
... | ... |
@@ -436,6 +436,12 @@ name_dataset <- job$datasets[[1]]$name |
436 | 436 |
download_dataset(test_url,name_dataset) |
437 | 437 |
``` |
438 | 438 |
|
439 |
+Once done we can loogut from remote repository using: |
|
440 |
+```{r, logout, eval = TRUE} |
|
441 |
+logout_gmql(test_url) |
|
442 |
+``` |
|
443 |
+*logout_gmql* delete the authToken from R environment |
|
444 |
+ |
|
439 | 445 |
### Batch execution |
440 | 446 |
|
441 | 447 |
This execution type is similar to local processing |
... | ... |
@@ -446,7 +452,7 @@ using the functions in this RGMQL package.[see import/export](# Utilities) |
446 | 452 |
|
447 | 453 |
Before starting with examples, note that we have to log into remote |
448 | 454 |
infrastructure with login function: |
449 |
-```{r, login remote, eval = TRUE} |
|
455 |
+```{r, login remote, eval = FALSE} |
|
450 | 456 |
login_gmql(test_url) |
451 | 457 |
``` |
452 | 458 |
Otherwise, we can initialize the data engine with a remote url: |
... | ... |
@@ -598,7 +604,7 @@ We can also import only a part of a GMQL dataset into R environment, |
598 | 604 |
by filtering its content as follows: |
599 | 605 |
```{r, filter_extract, eval = TRUE} |
600 | 606 |
|
601 |
-data_in <- base::system.file("example", "TEAD", package = "RGMQL") |
|
607 |
+data_in <- system.file("example", "TEAD", package = "RGMQL") |
|
602 | 608 |
matrix <- filter_and_extract(data_in, metadata = NULL,regions = c("count")) |
603 | 609 |
matrix |
604 | 610 |
|