Browse code

add group, fixed some function

Simone authored on 05/12/2017 10:34:47
Showing 24 changed files

... ...
@@ -63,7 +63,9 @@ Imports:
63 63
     methods,
64 64
     S4Vectors,
65 65
     dplyr,
66
-    stats
66
+    stats,
67
+    glue,
68
+    BiocGenerics
67 69
 Depends:
68 70
     R(<= 3.4.2)
69 71
 VignetteBuilder: knitr
... ...
@@ -92,6 +94,7 @@ Collate:
92 94
     'gmql_cover.R'
93 95
     'gmql_difference.R'
94 96
     'gmql_extend.R'
97
+    'gmql_group.R'
95 98
     'gmql_join.R'
96 99
     'gmql_map.R'
97 100
     'gmql_materialize.R'
... ...
@@ -67,6 +67,7 @@ exportMethods(collect)
67 67
 exportMethods(cover)
68 68
 exportMethods(extend)
69 69
 exportMethods(filter)
70
+exportMethods(group_by)
70 71
 exportMethods(map)
71 72
 exportMethods(merge)
72 73
 exportMethods(select)
... ...
@@ -78,10 +79,16 @@ import(httr)
78 79
 import(xml2)
79 80
 importClassesFrom(GenomicRanges,GRangesList)
80 81
 importClassesFrom(S4Vectors,DataTable)
82
+importFrom(BiocGenerics,setdiff)
83
+importFrom(BiocGenerics,union)
81 84
 importFrom(GenomicRanges,makeGRangesFromDataFrame)
85
+importFrom(S4Vectors,aggregate)
86
+importFrom(S4Vectors,merge)
82 87
 importFrom(S4Vectors,metadata)
83 88
 importFrom(data.table,fread)
84 89
 importFrom(dplyr,bind_cols)
90
+importFrom(dplyr,filter)
91
+importFrom(glue,glue)
85 92
 importFrom(methods,is)
86 93
 importFrom(methods,isClass)
87 94
 importFrom(methods,new)
... ...
@@ -1,15 +1,3 @@
1
-#' Method aggregate
2
-#' 
3
-#' Wrapper to GMQL merge function
4
-#' 
5
-#' @name aggregate
6
-#' @rdname aggregate-GMQLDataset-method
7
-#' @aliases aggregate
8
-#'  
9
-setGeneric("aggregate", function(x, ...) 
10
-                                    standardGeneric("aggregate"))
11
-
12
-
13 1
 #' Method filter
14 2
 #' 
15 3
 #' Wrapper to GMQL select function
... ...
@@ -71,7 +59,6 @@ setGeneric("take", function(data, ...) standardGeneric("take"))
71 59
 #' @aliases extend GMQLDataset-method
72 60
 setGeneric("extend", function(.data, ...) standardGeneric("extend"))
73 61
 
74
-
75 62
 #' Method select
76 63
 #' 
77 64
 #' Wrapper to GMQL project function
... ...
@@ -81,6 +68,7 @@ setGeneric("extend", function(.data, ...) standardGeneric("extend"))
81 68
 #' @aliases select 
82 69
 setGeneric("select", function(.data, ...) standardGeneric("select"))
83 70
 
71
+
84 72
 #' Method arrange
85 73
 #' 
86 74
 #' Wrapper to GMQL order function
... ...
@@ -94,3 +82,12 @@ setGeneric("arrange", function(.data, metadata_ordering = NULL,
94 82
                     reg_fetch_opt = NULL, reg_num_fetch = 0, ...) 
95 83
                             standardGeneric("arrange"))
96 84
 
85
+#' Method group_by
86
+#' 
87
+#' Wrapper to GMQL group function
88
+#' 
89
+#' @name group_by
90
+#' @rdname group_by-GMQLDataset-method
91
+#' @aliases group_by 
92
+setGeneric("group_by", function(.data, ...) standardGeneric("group_by"))
93
+
... ...
@@ -1,9 +1,9 @@
1
-#' FILTER AND EXTRACT 
1
+#' FILTER AND EXTRACT
2 2
 #'
3
-#' This function let user to create a new GRangeslist with fixed information: 
4
-#' seqnames,ranges ans strand and a variable part made up by the regions 
3
+#' This function let user to create a new GRangeslist with fixed information:
4
+#' seqnames,ranges ans strand and a variable part made up by the regions
5 5
 #' defined as input.
6
-#' The metadata and metadata_prefix are used to filter the data and choose 
6
+#' The metadata and metadata_prefix are used to filter the data and choose
7 7
 #' only the samples that match at least one metdatata with its prefix.
8 8
 #' The input regions are shown for each sample obtained from filtering.
9 9
 #'
... ...
@@ -12,51 +12,57 @@
12 12
 #' @importFrom data.table fread
13 13
 #' @importFrom rtracklayer import
14 14
 #'
15
-#' @param data string GMQL dataset folder path or GrangesList 
15
+#' @param data string GMQL dataset folder path or GrangesList
16 16
 #' object
17
-#' @param metadata vector of character containing names of metadata 
17
+#' @param metadata vector of character containing names of metadata
18 18
 #' to be searched for in metadata files.
19 19
 #' data will be extracted if at least one condition is satisfied:
20 20
 #' this condition will be logically "ANDed" with prefix filtering (see below)
21
-#' if NULL no filtering action occured 
21
+#' if NULL no filtering action occured
22 22
 #' (i.e every sample will be taken for regions filtering)
23
-#' @param metadata_prefix vector of character that will filter metadata 
23
+#' @param metadata_prefix vector of character that will filter metadata
24 24
 #' containing rispectively every element of this vector.
25 25
 #' number of elelment in both vector must match
26
-#' @param regions vector of character that will extract only region 
27
-#' attribute specified; if NULL no regions will be taken and the output 
26
+#' @param regions vector of character that will extract only region
27
+#' attribute specified; if NULL no regions will be taken and the output
28 28
 #' will be only GRanges made up by the first attribute
29 29
 #' (seqnames,start,end,strand)
30
+#' @param suffix name for each metadata column of GRanges. by default is the 
31
+#' "antibody_target". This string is taken from sample metadata file or from
32
+#' metadata() associated. If not present, the column name is the name of 
33
+#' selected regions
30 34
 #'
31
-#' 
32
-#' @details 
33
-#' This function works only with datatset or Grangeslist that have the same 
35
+#' @details
36
+#' This function works only with datatset or Grangeslist that have the same
34 37
 #' information about regions attribute (but of course different value)
35
-#' 
38
+#' In case of Grangeslist data input the function will search for metadata
39
+#' into metadata() function associated to Grangeslist.
40
+#'
36 41
 #' @return granges with selected regions (if any) in elementMetadata
37 42
 #'
38 43
 #' @examples
39 44
 #'
40 45
 #' test_path <- system.file("example", "DATASET", package = "RGMQL")
41 46
 #' filter_and_extract(test_path,regions = c("pvalue", "peak"))
42
-#' 
47
+#'
43 48
 #' grl = import_gmql(test_path, TRUE)
44 49
 #' filter_and_extract(grl, regions = c("pvalue", "peak"))
45 50
 #'
46 51
 #'
47 52
 #' @export
48 53
 #'
49
-filter_and_extract <- function(data, metadata = NULL, 
50
-                                metadata_prefix = NULL, regions = NULL)
54
+filter_and_extract <- function(data, metadata = NULL,
55
+                                metadata_prefix = NULL, regions = NULL, 
56
+                                suffix = "antibody_target")
51 57
 {
52 58
     if(is(data,"GRangesList"))
53
-        .extract_from_GRangesList(data,metadata,metadata_prefix,regions)
59
+        .extract_from_GRangesList(data, metadata, metadata_prefix, regions)
54 60
     else
55
-        .extract_from_dataset(data,metadata,metadata_prefix,regions)
61
+        .extract_from_dataset(data, metadata, metadata_prefix, regions, suffix)
56 62
 }
57 63
 
58
-.extract_from_dataset <- function(datasetName, metadata = NULL, 
59
-                        metadata_prefix = NULL, regions = NULL)
64
+.extract_from_dataset <- function(datasetName, metadata, metadata_prefix, 
65
+                                        regions, suffix)
60 66
 {
61 67
     datasetName <- sub("/*[/]$","",datasetName)
62 68
     if(basename(datasetName) !="files")
... ...
@@ -64,55 +70,66 @@ filter_and_extract <- function(data, metadata = NULL,
64 70
     
65 71
     if(!dir.exists(datasetName))
66 72
         stop("Directory does not exists")
67
-
73
+    
68 74
     gdm_meta_files <- list.files(datasetName, pattern = "*.gdm.meta$",
69 75
                                     full.names = TRUE)
70 76
     gtf_meta_files <- list.files(datasetName, pattern = "*.gtf.meta$",
71 77
                                     full.names = TRUE)
72
-
78
+    
73 79
     if(length(gdm_meta_files)==0 && length(gtf_meta_files)==0)
74 80
         stop("no samples present or no files format supported")
75
-
81
+    
76 82
     if(length(gdm_meta_files)>=1 && length(gtf_meta_files)>=1)
77 83
         stop("GMQL dataset cannot be mixed dataset: no GTF and GDM together")
78
-
84
+    
79 85
     vector_field <- .schema_header(datasetName)
80
-
86
+    
81 87
     if(length(gdm_meta_files)>0)
82 88
     {
83
-        samples_file <- .check_metadata_files(metadata,metadata_prefix,
84
-                                                gdm_meta_files)
89
+        samples_with_suffix <- .check_metadata_files(metadata,metadata_prefix,
90
+                                                gdm_meta_files, suffix)
91
+        
92
+        samples_file <- sapply(samples_with_suffix, function(x) x$sample)
93
+        suffix_vec <- sapply(samples_with_suffix, function(x) x$suffix)
94
+        suffixes <- unlist(suffix_vec)
85 95
         samples_to_read <- unlist(samples_file)
96
+        
86 97
         if(length(samples_to_read)>0)
87 98
             samples_to_read <- gsub(".meta$", "", samples_to_read)
88 99
         else
89 100
             samples_to_read <- gsub(".meta$", "", gdm_meta_files)
90
-
91
-        granges <- .parse_gdm_files(vector_field,samples_to_read,regions)
101
+        
102
+        granges <- .parse_gdm_files(vector_field,samples_to_read,regions,
103
+                                                        suffixes)
92 104
     }
93 105
     else
94 106
     {
95
-        samples_file <- .check_metadata_files(metadata,metadata_prefix,
96
-                                                gtf_meta_files)
107
+        samples_with_suffix <- .check_metadata_files(metadata,metadata_prefix,
108
+                                                    gtf_meta_files, suffix)
109
+        
110
+        samples_file <- sapply(samples_with_suffix, function(x) x$sample)
111
+        suffix_vec <- sapply(samples_with_suffix, function(x) x$suffix)
112
+        suffixes <- unlist(suffix_vec)
97 113
         samples_to_read <- unlist(samples_file)
114
+        
98 115
         if(length(samples_to_read)>0)
99 116
             samples_to_read <- gsub(".meta$", "", samples_to_read)
100 117
         else
101 118
             samples_to_read <- gsub(".meta$", "", gtf_meta_files)
102
-
103
-        granges <- .parse_gtf_files(samples_to_read,regions)
119
+        
120
+        granges <- .parse_gtf_files(samples_to_read, regions, suffixes)
104 121
     }
105 122
 }
106 123
 
107
-.extract_from_GRangesList <- function(rangesList, metadata = NULL,
108
-                                metadata_prefix = NULL, regions = NULL)
124
+.extract_from_GRangesList <- function(rangesList, metadata, metadata_prefix, 
125
+                                        regions)
109 126
 {
110 127
     if(!is(rangesList,"GRangesList"))
111 128
         stop("only GrangesList admitted")
112
-
129
+    
113 130
     if(length(rangesList)<=0)
114 131
         stop("rangesList empty")
115
-
132
+    
116 133
     meta_list <- metadata(rangesList)
117 134
     samples <- .check_metadata_list(metadata, metadata_prefix,meta_list)
118 135
     if(length(unlist(samples))<=0)
... ...
@@ -156,75 +173,92 @@ filter_and_extract <- function(data, metadata = NULL,
156 173
     }, meta_list, seq_along(meta_list))
157 174
 }
158 175
 
159
-.check_metadata_files <- function(metadata,metadata_prefix,meta_files)
176
+.check_metadata_files <- function(metadata,metadata_prefix,meta_files,col_name)
160 177
 {
178
+    suffix <- paste0(col_name,"$")
161 179
     vec_meta <- paste0(metadata_prefix,metadata)
162 180
     meta_list <- lapply(meta_files, function(x){
163 181
         list <- .add_metadata(x)
164 182
         vec_names <- names(list)
165 183
         a <- sapply(vec_meta, function(y) {
166
-            grep(y,vec_names) 
184
+            grep(y,vec_names)
167 185
         })
168
-
169 186
         ## we would like that manage more index from grep
170 187
         found <- as.logical(length(unlist(a)))
188
+        index <- grep(suffix,vec_names)
189
+        suffix <- unlist(list[index])[1] # ne prendo solo uno
190
+        names(suffix) <- NULL
171 191
         #if found retrieve samples that has at least one choosen metadata
172
-        if(found){x}
192
+        if(found)
193
+            list("sample" = x, "suffix" = suffix )
194
+        else
195
+            list("sample" = NULL, "suffix" = suffix )
196
+            
173 197
     })
174 198
 }
175 199
 
176 200
 
177
-.parse_gtf_files <- function(gtf_region_files,regions)
201
+.parse_gtf_files <- function(gtf_region_files, regions, suffixes)
178 202
 {
179 203
     g1 <- rtracklayer::import(con = gtf_region_files[1], format = "gtf")
180 204
     elementMetadata(g1) <- NULL
205
+    if(is.null(suffixes))
206
+        suffixes = ""
207
+    
181 208
     if(!is.null(regions))
182 209
     {
183
-        DF_list <- lapply(gtf_region_files, function(x){
210
+        DF_list <- mapply(function(x,h){
184 211
             g_x <- rtracklayer::import(con = x, format = "gtf")
185 212
             meta <- elementMetadata(g_x)[regions]
213
+            if(h!="")
214
+                names(meta) <- paste(regions,h,sep = ".")
186 215
             data.frame(meta)
187
-        })
216
+        },gtf_region_files, suffixes, SIMPLIFY = FALSE)
188 217
         DF_only_regions <- dplyr::bind_cols(DF_list)
189 218
         elementMetadata(g1) <- DF_only_regions
190 219
     }
191 220
     g1
192 221
 }
193 222
 
194
-.parse_gdm_files <- function(vector_field,gdm_region_files,regions)
223
+.parse_gdm_files <- function(vector_field,gdm_region_files,regions,suffixes)
195 224
 {
196 225
     #read first sample cause chromosome regions are the same for all samples
197 226
     df <- data.table::fread(gdm_region_files[1],col.names = vector_field,
198
-                                header = FALSE,sep = '\t')
227
+    header = FALSE,sep = '\t')
199 228
     col_names <- names(df)
200
-    df <-  df[c("chr","left","right","strand")]
201
-
229
+    df <-  subset(df, TRUE, c("chr","left","right","strand"))
230
+    
202 231
     if(!is.null(regions))
203 232
     {
204 233
         df_list <- lapply(gdm_region_files,function(x,regions,vector_field){
205 234
             region_frame <- data.table::fread(x,col.names = vector_field,
206
-                                                header = FALSE,sep = '\t')
235
+                    header = FALSE,sep = '\t')
207 236
             col_names <- names(region_frame)
208 237
             #delete column not choosen by input
209 238
             if(!is.null(regions))
210
-                col_names <- col_names[col_names %in% regions] 
211
-            r <- region_frame[col_names]
239
+                col_names <- col_names[col_names %in% regions]
240
+            
241
+            if(length(col_names)!=0)
242
+                r <- subset(region_frame,TRUE,col_names)
212 243
         },regions,vector_field)
213 244
 
214
-
215 245
         df_only_regions <- dplyr::bind_cols(df_list)
216 246
         complete_df <- dplyr::bind_cols(df,df_only_regions)
247
+        
248
+        region_names <- names(complete_df)[-(1:4)]
249
+        region_names <- gsub('[0-9]+', '',region_names)
250
+        region_names <- paste(region_names,suffixes,sep = ".")
251
+        region_names <- c(names(complete_df)[(1:4)],region_names )
252
+        names(complete_df) <- region_names
217 253
         g <- GenomicRanges::makeGRangesFromDataFrame(complete_df,
218
-                                            keep.extra.columns = TRUE,
219
-                                            start.field = "left",
220
-                                            end.field = "right")
254
+                            keep.extra.columns = TRUE,
255
+                            start.field = "left",
256
+                            end.field = "right")
221 257
     }
222 258
     else
223
-        g <- GenomicRanges::makeGRangesFromDataFrame(df,
224
-                                            keep.extra.columns = TRUE,
225
-                                            start.field = "left",
226
-                                            end.field = "right")
227
-
259
+        g <- GenomicRanges::makeGRangesFromDataFrame(df, 
260
+                            keep.extra.columns = TRUE,
261
+                            start.field = "left",
262
+                            end.field = "right")
263
+    
228 264
 }
229
-
230
-
... ...
@@ -87,7 +87,7 @@ check.DISTAL <- function(value)
87 87
 #' # and HM samples are obtained from the same provider (joinby clause).
88 88
 #' 
89 89
 #' join_data = merge(TSS, HM, 
90
-#' genometric_predicate = list(list(MD(1), DL(1200))), DF("provider"), 
90
+#' genometric_predicate = list(MD(1), DL(1200)), DF("provider"), 
91 91
 #' region_output = "RIGHT")
92 92
 #'
93 93
 #' #' # Given a dataset 'hm' and one called 'tss' with a sample including 
... ...
@@ -98,7 +98,7 @@ check.DISTAL <- function(value)
98 98
 #' # from the same provider (joinby clause).
99 99
 #' 
100 100
 #' join_data = merge(TSS, HM, 
101
-#' genometric_predicate = list(list(MD(1), DGE(12000), DOWN())), 
101
+#' genometric_predicate = list(MD(1), DGE(12000), DOWN()), 
102 102
 #' DF("provider"), region_output = "RIGHT")
103 103
 #'
104 104
 #' @name DL
... ...
@@ -23,7 +23,7 @@
23 23
     schema_name_xml <- list.files(datasetName, pattern = "*.xml$",
24 24
                                 full.names = TRUE)
25 25
     
26
-    if(length(schema_name)==0 || length(schema_name_xml) == 0)
26
+    if(length(schema_name)==0 && length(schema_name_xml) == 0)
27 27
         stop("schema not present")
28 28
     
29 29
     xml_schema <- xml2::read_xml(schema_name)
... ...
@@ -12,9 +12,8 @@
12 12
 #' only those samples that have the same value for each attribute
13 13
 #' are considered when performing the difference.
14 14
 #'
15
-#' @importFrom rJava J
16
-#' @importFrom rJava .jnull
17
-#' @importFrom rJava .jarray
15
+#' @importFrom rJava J .jnull .jarray
16
+#' @importFrom BiocGenerics setdiff
18 17
 #' 
19 18
 #' @param x GMQLDataset class object
20 19
 #' @param y GMQLDataset class object
21 20
new file mode 100644
... ...
@@ -0,0 +1,79 @@
1
+#' Method group_by
2
+#' 
3
+#' 
4
+#' @importFrom rJava J .jarray .jnull
5
+#' 
6
+#' 
7
+#' @param .data GMQLDataset object
8
+#'
9
+#' @return GMQLDataset object. It contains the value to use as input 
10
+#' for the subsequent GMQLDataset method
11
+#'
12
+#' @examples
13
+#' 
14
+#' 
15
+#' 
16
+#' @aliases group_by group_by-method
17
+#' @export
18
+setMethod("group_by","GMQLDataset",
19
+          function(.data, groupBy_meta = NULL, groupBy_regions = NULL, 
20
+            region_aggregates = NULL, meta_aggregates = NULL)
21
+          {
22
+              ptr_data = .data@value
23
+              gmql_group(ptr_data, groupBy_meta, groupBy_regions, 
24
+                            region_aggregates, meta_aggregates)
25
+          })
26
+
27
+gmql_group <- function(input_data, group_meta, group_reg)
28
+{
29
+    
30
+    if(!is.null(group_meta))
31
+    {
32
+        cond <- .join_condition(group_meta)
33
+        join_condition_matrix <- .jarray(cond, dispatch = TRUE)
34
+    }
35
+    else
36
+        join_condition_matrix <- .jnull("java/lang/String")
37
+    
38
+    if(!is.null(group_reg))
39
+    {
40
+        if(!is.character(group_reg))
41
+            stop("metadata: no valid input")
42
+        
43
+        group_reg <- group_reg[!group_reg %in% ""]
44
+        group_reg <- group_reg[!duplicated(group_reg)]
45
+        
46
+        if(length(group_reg)==0)
47
+            group_reg <- .jnull("java/lang/String")
48
+        
49
+        group_reg <- .jarray(metadata)
50
+    }
51
+    else
52
+        group_reg <- .jnull("java/lang/String")
53
+    
54
+    if(!is.null(meta_aggregates) && !length(meta_aggregates) == 0)
55
+    {
56
+        aggr <- .aggregates(meta_aggregates,"AGGREGATES")
57
+        metadata_matrix <- .jarray(aggr, dispatch = TRUE)
58
+    }
59
+    else
60
+        metadata_matrix <- .jnull("java/lang/String")
61
+    
62
+    if(!is.null(region_aggregates) && !length(region_aggregates) == 0)
63
+    {
64
+        aggr <- .aggregates(region_aggregates,"AGGREGATES")
65
+        region_matrix <- .jarray(aggr, dispatch = TRUE)
66
+    }
67
+    else
68
+        region_matrix <- .jnull("java/lang/String")
69
+    
70
+    WrappeR <- J("it/polimi/genomics/r/Wrapper")
71
+    response <- WrappeR$group(join_condition_matrix, metadata_matrix, 
72
+                                groupBy_regions, region_matrix, input_data)
73
+    error <- strtoi(response[1])
74
+    data <- response[2]
75
+    if(error!=0)
76
+        stop(data)
77
+    else
78
+        GMQLDataset(data)
79
+}
... ...
@@ -10,9 +10,8 @@
10 10
 #' The output metadata are the union of the input metadata, 
11 11
 #' with their attribute names prefixed with left or right respectively.
12 12
 #'
13
-#' @importFrom rJava .jnull
14
-#' @importFrom rJava J
15
-#' @importFrom rJava .jarray
13
+#' @importFrom rJava J .jnull .jarray
14
+#' @importFrom S4Vectors merge
16 15
 #' 
17 16
 #' @param x GMQLDataset class object
18 17
 #' @param y GMQLDataset class object
... ...
@@ -106,7 +105,7 @@ gmql_join <- function(left_data, right_data, genometric_predicate, joinBy,
106 105
     
107 106
         if(!all(sapply(genometric_predicate, function(x) {is(x,"DISTAL")} )))
108 107
             stop("All elements should be DISTAL object")
109
-         
108
+        
110 109
         genomatrix <- t(sapply(genometric_predicate, function(x) {
111 110
                 new_value = as.character(x)
112 111
                 array <- c(new_value)
... ...
@@ -39,7 +39,7 @@ execute <- function()
39 39
         {
40 40
             url <- WrappeR$get_url()
41 41
             .download_or_upload()
42
-            serialize_query(url,FALSE,data)
42
+            res <- serialize_query(url,FALSE,data)
43 43
         }
44 44
     }
45 45
 }
... ...
@@ -107,13 +107,18 @@ setMethod("collect", "GMQLDataset",
107 107
 
108 108
 gmql_materialize <- function(input_data, dir_out, name)
109 109
 {
110
-    dir_out <- sub("/*[/]$","",dir_out)
111
-    
112
-    res_dir_out <- paste0(dir_out,"/",name)
113
-    if(!dir.exists(res_dir_out))
114
-        dir.create(res_dir_out)
115
-    
116 110
     WrappeR <- J("it/polimi/genomics/r/Wrapper")
111
+    remote_proc <- WrappeR$is_remote_processing()
112
+    if(!remote_proc)
113
+    {
114
+        dir_out <- sub("/*[/]$","",dir_out)
115
+        res_dir_out <- paste0(dir_out,"/",name)
116
+        if(!dir.exists(res_dir_out))
117
+            dir.create(res_dir_out)
118
+    }
119
+    else
120
+        res_dir_out <- dir_out
121
+    
117 122
     response <- WrappeR$materialize(input_data, res_dir_out)
118 123
     error <- strtoi(response[1])
119 124
     data <- response[2]
... ...
@@ -10,9 +10,8 @@
10 10
 #' Samples whose names are not present in the grouping metadata parameter 
11 11
 #' are disregarded.
12 12
 #'
13
-#' @importFrom rJava J
14
-#' @importFrom rJava .jnull
15
-#' @importFrom rJava .jarray
13
+#' @importFrom rJava J .jarray .jnull
14
+#' @importFrom S4Vectors aggregate
16 15
 #'
17 16
 #' @param x GMQLDataset class object
18 17
 #' @param ... Additional arguments for use in specific methods.
... ...
@@ -244,7 +244,7 @@ We provide two metadata for you")
244 244
     {
245 245
         col_names <- plyr::revalue(col_names,c(start = "left", 
246 246
                                         end = "right", seqnames = "chr"))
247
-        schema_matrix <- cbind(toupper(col_types),col_names)
247
+        schema_matrix <- cbind(col_names,toupper(col_types))
248 248
         schema_matrix<- schema_matrix[setdiff(rownames(schema_matrix),
249 249
                                         c("group","width")),]
250 250
     }
... ...
@@ -10,10 +10,10 @@
10 10
 #' If no metadata in common between input dataset and semijoin dataset, 
11 11
 #' no sample is extracted.
12 12
 #'
13
-#' @importFrom rJava J
14
-#' @importFrom rJava .jnull
15
-#' @importFrom rJava .jarray
13
+#' @importFrom rJava J .jnull .jarray
16 14
 #' @importFrom methods isClass
15
+#' @importFrom glue glue
16
+#' @importFrom dplyr filter
17 17
 #' 
18 18
 #' @param .data GMQLDataset class object
19 19
 #' @param m_predicate logical predicate made up by R logical operation 
... ...
@@ -69,31 +69,34 @@
69 69
 #' @aliases filter filter-method
70 70
 #' @export
71 71
 setMethod("filter", "GMQLDataset",
72
-            function(.data, m_predicate = NULL, r_predicate = NULL, 
73
-                        semijoin = NULL, ...)
72
+        function(.data, m_predicate = NULL, r_predicate = NULL, 
73
+                    semijoin = NULL, ...)
74
+        {
75
+            val <- .data@value
76
+            meta_pred <- substitute(m_predicate)
77
+            if(!is.null(meta_pred))
74 78
             {
75
-                val <- .data@value
76
-                meta_pred <- substitute(m_predicate)
77
-                if(!is.null(meta_pred))
78
-                {
79
-                    predicate <- .trasform(deparse(meta_pred))
80
-                    predicate <- paste(predicate,collapse = "")
81
-                }
82
-                else
83
-                    predicate <- .jnull("java/lang/String")
79
+                predicate <- .trasform(deparse(meta_pred))
80
+                predicate <- paste(predicate,collapse = "")
81
+                predicate <- as.character(glue::glue(predicate))
84 82
                 
85
-                reg_pred <- substitute(r_predicate)
86
-                if(!is.null(reg_pred))
87
-                {
88
-                    region_predicate <- .trasform(deparse(reg_pred))
89
-                    region_predicate <- paste(region_predicate,collapse = "")
90
-                }
91
-                else
92
-                    region_predicate <- .jnull("java/lang/String")
93
-
94
-                gmql_select(val, predicate, region_predicate, semijoin)
95
-            })
96
-
83
+            }
84
+            else
85
+                predicate <- .jnull("java/lang/String")
86
+            
87
+            reg_pred <- substitute(r_predicate)
88
+            if(!is.null(reg_pred))
89
+            {
90
+                region_predicate <- .trasform(deparse(reg_pred))
91
+                region_predicate <- paste(region_predicate,collapse = "")
92
+                region_predicate <- as.character(glue::glue(region_predicate))
93
+                
94
+            }
95
+            else
96
+                region_predicate <- .jnull("java/lang/String")
97
+            
98
+            gmql_select(val, predicate, region_predicate, semijoin)
99
+        })
97 100
 
98 101
 gmql_select <- function(input_data, predicate, region_predicate, s_join)
99 102
 {
... ...
@@ -22,6 +22,7 @@
22 22
 #' the dataset to which they originally belonged.
23 23
 #' 
24 24
 #' @importFrom rJava J
25
+#' @importFrom BiocGenerics union
25 26
 #' 
26 27
 #' @param x GMQLDataset object
27 28
 #' @param y GMQLDataset object 
... ...
@@ -1052,12 +1052,14 @@ sample_metadata <- function(url, datasetName,sampleName)
1052 1052
 #'
1053 1053
 sample_region <- function(url, datasetName,sampleName)
1054 1054
 {
1055
+    
1055 1056
     url <- sub("/*[/]$","",url)
1056 1057
     URL <- paste0(url,"/datasets/",datasetName,"/",sampleName,"/region")
1057 1058
     h <- c('X-Auth-Token' = authToken, 'Accpet' = 'text/plain')
1058 1059
     req <- httr::GET(URL, httr::add_headers(h))
1059 1060
     content <- httr::content(req, 'parsed',encoding = "UTF-8")
1060 1061
     
1062
+    
1061 1063
     if(req$status_code !=200)
1062 1064
         stop(content)
1063 1065
     else
... ...
@@ -1065,6 +1067,8 @@ sample_region <- function(url, datasetName,sampleName)
1065 1067
         list <- show_schema(url,datasetName)
1066 1068
         schema_type <- list$type
1067 1069
         
1070
+        #df <- read.table(textConnection(content),sep = "\t")
1071
+        
1068 1072
         temp <- tempfile("temp") #use temporary files
1069 1073
         write.table(content,temp,quote = FALSE,sep = '\t',col.names = FALSE,
1070 1074
                         row.names = FALSE)
... ...
@@ -1,6 +1,6 @@
1 1
 ##gff-version 2
2 2
 ##source-version rtracklayer 1.36.6
3
-##date 2017-11-19
3
+##date 2017-12-04
4 4
 chr1	rtracklayer	sequence_feature	11873	12227	0	+	.	name "NR_046018"
5 5
 chr1	rtracklayer	sequence_feature	12612	12721	0	+	.	name "NR_046018"
6 6
 chr1	rtracklayer	sequence_feature	917444	917497	0	-	.	name "NM_001291366"
7 7
Binary files a/inst/java/GMQL.jar and b/inst/java/GMQL.jar differ
... ...
@@ -1,14 +1,11 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/AllGenerics.R, R/gmql_merge.R
2
+% Please edit documentation in R/gmql_merge.R
3 3
 \docType{methods}
4
-\name{aggregate}
5
-\alias{aggregate}
4
+\name{aggregate,GMQLDataset-method}
6 5
 \alias{aggregate,GMQLDataset-method}
7 6
 \alias{aggregate-method}
8 7
 \title{Method aggregate}
9 8
 \usage{
10
-aggregate(x, ...)
11
-
12 9
 \S4method{aggregate}{GMQLDataset}(x, ...)
13 10
 }
14 11
 \arguments{
... ...
@@ -31,8 +28,6 @@ GMQLDataset object. It contains the value to use as input
31 28
 for the subsequent GMQLDataset method
32 29
 }
33 30
 \description{
34
-Wrapper to GMQL merge function
35
-
36 31
 It builds a dataset consisting of a single sample having as many regions as
37 32
 the number of regions of the input data and as many metadata as the union of
38 33
 the 'attribute-value' tuples of the input samples. 
... ...
@@ -85,7 +85,7 @@ HM = read_dataset(test_path2)
85 85
 # and HM samples are obtained from the same provider (joinby clause).
86 86
 
87 87
 join_data = merge(TSS, HM, 
88
-genometric_predicate = list(list(MD(1), DL(1200))), DF("provider"), 
88
+genometric_predicate = list(MD(1), DL(1200)), DF("provider"), 
89 89
 region_output = "RIGHT")
90 90
 
91 91
 #' # Given a dataset 'hm' and one called 'tss' with a sample including 
... ...
@@ -96,7 +96,7 @@ region_output = "RIGHT")
96 96
 # from the same provider (joinby clause).
97 97
 
98 98
 join_data = merge(TSS, HM, 
99
-genometric_predicate = list(list(MD(1), DGE(12000), DOWN())), 
99
+genometric_predicate = list(MD(1), DGE(12000), DOWN()), 
100 100
 DF("provider"), region_output = "RIGHT")
101 101
 
102 102
 }
... ...
@@ -78,7 +78,7 @@ test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL")
78 78
 data <- read_dataset(test_path)
79 79
 join_data <- read_dataset(test_path2)
80 80
 jun_tf <- filter(data, antibody_target == "JUN", pValue < 0.01, 
81
-semijoin(join_data, TRUE, DF("cell")))
81
+semijoin(join_data, TRUE, list(DF("cell"))))
82 82
 
83 83
 }
84 84
 
... ...
@@ -5,42 +5,49 @@
5 5
 \title{FILTER AND EXTRACT}
6 6
 \usage{
7 7
 filter_and_extract(data, metadata = NULL, metadata_prefix = NULL,
8
-  regions = NULL)
8
+  regions = NULL, suffix = "antibody_target")
9 9
 }
10 10
 \arguments{
11
-\item{data}{string GMQL dataset folder path or GrangesList 
11
+\item{data}{string GMQL dataset folder path or GrangesList
12 12
 object}
13 13
 
14
-\item{metadata}{vector of character containing names of metadata 
14
+\item{metadata}{vector of character containing names of metadata
15 15
 to be searched for in metadata files.
16 16
 data will be extracted if at least one condition is satisfied:
17 17
 this condition will be logically "ANDed" with prefix filtering (see below)
18
-if NULL no filtering action occured 
18
+if NULL no filtering action occured
19 19
 (i.e every sample will be taken for regions filtering)}
20 20
 
21
-\item{metadata_prefix}{vector of character that will filter metadata 
21
+\item{metadata_prefix}{vector of character that will filter metadata
22 22
 containing rispectively every element of this vector.
23 23
 number of elelment in both vector must match}
24 24
 
25
-\item{regions}{vector of character that will extract only region 
26
-attribute specified; if NULL no regions will be taken and the output 
25
+\item{regions}{vector of character that will extract only region
26
+attribute specified; if NULL no regions will be taken and the output
27 27
 will be only GRanges made up by the first attribute
28 28
 (seqnames,start,end,strand)}
29
+
30
+\item{suffix}{name for each metadata column of GRanges. by default is the 
31
+"antibody_target". This string is taken from sample metadata file or from
32
+metadata() associated. If not present, the column name is the name of 
33
+selected regions}
29 34
 }
30 35
 \value{
31 36
 granges with selected regions (if any) in elementMetadata
32 37
 }
33 38
 \description{
34
-This function let user to create a new GRangeslist with fixed information: 
35
-seqnames,ranges ans strand and a variable part made up by the regions 
39
+This function let user to create a new GRangeslist with fixed information:
40
+seqnames,ranges ans strand and a variable part made up by the regions
36 41
 defined as input.
37
-The metadata and metadata_prefix are used to filter the data and choose 
42
+The metadata and metadata_prefix are used to filter the data and choose
38 43
 only the samples that match at least one metdatata with its prefix.
39 44
 The input regions are shown for each sample obtained from filtering.
40 45
 }
41 46
 \details{
42
-This function works only with datatset or Grangeslist that have the same 
47
+This function works only with datatset or Grangeslist that have the same
43 48
 information about regions attribute (but of course different value)
49
+In case of Grangeslist data input the function will search for metadata
50
+into metadata() function associated to Grangeslist.
44 51
 }
45 52
 \examples{
46 53
 
47 54
new file mode 100644
... ...
@@ -0,0 +1,31 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/AllGenerics.R, R/gmql_group.R
3
+\docType{methods}
4
+\name{group_by}
5
+\alias{group_by}
6
+\alias{group_by,GMQLDataset-method}
7
+\alias{group_by}
8
+\alias{group_by-method}
9
+\title{Method group_by}
10
+\usage{
11
+group_by(.data, ...)
12
+
13
+\S4method{group_by}{GMQLDataset}(.data, ...)
14
+}
15
+\arguments{
16
+\item{.data}{GMQLDataset object}
17
+}
18
+\value{
19
+GMQLDataset object. It contains the value to use as input 
20
+for the subsequent GMQLDataset method
21
+}
22
+\description{
23
+Wrapper to GMQL group function
24
+
25
+Method group_by
26
+}
27
+\examples{
28
+
29
+
30
+
31
+}
... ...
@@ -51,6 +51,7 @@ test_path <- system.file("example", "DATASET", package = "RGMQL")
51 51
 test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL")
52 52
 data <- read_dataset(test_path)
53 53
 join_data <-  read_dataset(test_path2)
54
-jun_tf <- filter(data,NULL,NULL, semijoin(join_data, TRUE, DF("cell")))
54
+jun_tf <- filter(data,NULL,NULL, semijoin(join_data, TRUE, 
55
+list(DF("cell"))))
55 56
 
56 57
 }
... ...
@@ -390,7 +390,7 @@ Upon succesfull logon, you get a request token that you must use
390 390
 in every subsequent REST call.
391 391
 Login can be performed using the function:
392 392
 ```{r, eval = TRUE}
393
-test_url = "http://genomic.elet.polimi.it/gmql-rest-r"
393
+test_url = "http://genomic.elet.polimi.it/gmql-rest-r/"
394 394
 login_gmql(test_url)
395 395
 ```
396 396
 that saves the token in the Global R environment within the variable 
... ...
@@ -436,6 +436,12 @@ name_dataset <- job$datasets[[1]]$name
436 436
 download_dataset(test_url,name_dataset)
437 437
 ```
438 438
 
439
+Once done we can loogut from remote repository using:
440
+```{r, logout, eval = TRUE}
441
+logout_gmql(test_url)
442
+```
443
+*logout_gmql* delete the authToken from R environment
444
+
439 445
 ### Batch execution
440 446
 
441 447
 This execution type is similar to local processing 
... ...
@@ -446,7 +452,7 @@ using the functions in this RGMQL package.[see import/export](# Utilities)
446 452
 
447 453
 Before starting with examples, note that we have to log into remote 
448 454
 infrastructure with login function:
449
-```{r, login remote, eval = TRUE}
455
+```{r, login remote, eval = FALSE}
450 456
 login_gmql(test_url)
451 457
 ```
452 458
 Otherwise, we can initialize the data engine with a remote url:
... ...
@@ -598,7 +604,7 @@ We can also import only a part of a GMQL dataset into R environment,
598 604
 by filtering its content as follows:
599 605
 ```{r, filter_extract, eval = TRUE}
600 606
 
601
-data_in <- base::system.file("example", "TEAD", package = "RGMQL")
607
+data_in <- system.file("example", "TEAD", package = "RGMQL")
602 608
 matrix <- filter_and_extract(data_in, metadata = NULL,regions = c("count"))
603 609
 matrix
604 610