Browse code

delete rscala

Simone authored on 15/10/2017 18:21:39
Showing 80 changed files

... ...
@@ -4,5 +4,4 @@
4 4
 .Rproj.user/**
5 5
 .Rapp.history
6 6
 .RData
7
-
8
-
7
+.R.zip
... ...
@@ -1,7 +1,7 @@
1 1
 Package: RGMQL
2 2
 Type: Package
3
-Title: GMQL function
4
-Version: 0.99.0
3
+Title: GenoMetric Query Language for R/Bioconductor
4
+Version: 0.99.1
5 5
 Author: Simone Pallotta, Marco Masseroli
6 6
 Maintainer: Simone Pallotta <simonepallotta@hotmail.com>
7 7
 Description: This package brings GMQL functionalities into R environemnt.
... ...
@@ -21,7 +21,7 @@ License: Artistic-2.0
21 21
 Encoding: UTF-8
22 22
 LazyData: true
23 23
 RoxygenNote: 6.0.1
24
-Imports: rscala(>= 2.4.0), httr, GenomicRanges, rtracklayer, data.table, utils, plyr, xml2, methods, S4Vectors, dplyr, stats
24
+Imports: httr, rJava,GenomicRanges, rtracklayer, data.table, utils, plyr, xml2, methods, S4Vectors, dplyr, stats
25 25
 Depends: R(>= 3.3.2)
26 26
 VignetteBuilder: knitr
27 27
 Suggests: BiocStyle, knitr, rmarkdown
... ...
@@ -1,78 +1,34 @@
1 1
 # Generated by roxygen2: do not edit by hand
2 2
 
3
-export(ASC)
4
-export(AVG)
5
-export(BAG)
6
-export(COUNT)
7
-export(DEF)
8
-export(DESC)
9
-export(DGE)
10
-export(DLE)
11
-export(DOWN)
12
-export(EXACT)
13
-export(FULL)
14
-export(MAX)
15
-export(MD)
16
-export(MEDIAN)
17
-export(MIN)
18
-export(Q1)
19
-export(Q2)
20
-export(Q3)
21
-export(STD)
22
-export(SUM)
23 3
 export(TFARMatrix)
24 4
 export(TFARMemAtrix)
25
-export(UP)
26
-export(compileQuery)
27
-export(compileQuery.fromfile)
28
-export(cover)
29 5
 export(deleteDataset)
30
-export(difference)
31 6
 export(downloadDataset)
32 7
 export(downloadDatasetToGrangesList)
33
-export(execute)
34 8
 export(exportGMQL.gdm)
35 9
 export(exportGMQL.gtf)
36 10
 export(extend)
37
-export(flat)
38
-export(histogram)
39 11
 export(importGMQL.gdm)
40 12
 export(importGMQL.gtf)
41 13
 export(initGMQL)
42
-export(join)
43 14
 export(login.GMQL)
44 15
 export(logout.GMQL)
45
-export(map)
46
-export(materialize)
47
-export(merge)
48 16
 export(metadataFromSample)
49
-export(order)
50
-export(project)
51 17
 export(read)
52 18
 export(readDataset)
53 19
 export(regionFromSample)
54 20
 export(register.GMQL)
55 21
 export(remote_processing)
56
-export(runQuery)
57
-export(runQuery.fromfile)
58 22
 export(saveQuery)
59 23
 export(saveQuery.fromfile)
60
-export(select)
61 24
 export(showDatasets)
62
-export(showJobLog)
63
-export(showJobs)
64 25
 export(showQueries)
65 26
 export(showSamplesFromDataset)
66 27
 export(showSchemaFromDataset)
67
-export(stopJob)
68
-export(summit)
69
-export(take)
70
-export(traceJob)
71
-export(union)
72 28
 export(uploadSamples)
73 29
 import(GenomicRanges)
74 30
 import(httr)
75
-import(rscala)
31
+import(rJava)
76 32
 import(xml2)
77 33
 importClassesFrom(GenomicRanges,GRangesList)
78 34
 importFrom(GenomicRanges,makeGRangesFromDataFrame)
... ...
@@ -81,9 +37,11 @@ importFrom(data.table,fread)
81 37
 importFrom(dplyr,bind_cols)
82 38
 importFrom(methods,is)
83 39
 importFrom(plyr,revalue)
40
+importFrom(rJava,.jinit)
41
+importFrom(rJava,.jnew)
42
+importFrom(rJava,.jpackage)
84 43
 importFrom(rtracklayer,export)
85 44
 importFrom(rtracklayer,import)
86
-importFrom(stats,setNames)
87 45
 importFrom(utils,read.delim)
88 46
 importFrom(utils,unzip)
89 47
 importFrom(utils,write.table)
90 48
deleted file mode 100644
... ...
@@ -1,348 +0,0 @@
1
-#' GMQL Operation: COVER
2
-#'
3
-#' it takes as input a dataset and returns another dataset (with a single sample, if no \emph{groupby} option is specified)
4
-#' by “collapsing” the input dataset samples and their regions according to certain rules specified by the input parameters.
5
-#' The attributes of the output genomic regions are only the region coordinates, and Jaccard indexes (JaccardIntersect and JaccardResult).
6
-#' Jaccard Indexes are standard measures of similarity of the contributing regions, added as default region attributes.
7
-#' The JaccardIntersect index is calculated as the ratio between the lengths of the intersection
8
-#' and of the union of the contributing regions; the JaccardResult index is calculated as the ratio
9
-#' between the lengths of region and the union of the contributing regions.
10
-#' If aggregate functions are specified, new attributes are added.
11
-#' Output metadata are the union of the input ones.
12
-#' If \emph{groupby} clause is specified, the input samples are partitioned in groups,
13
-#' each with distinct values of the grouping metadata attributes, and the COVER operation is separately
14
-#' applied to each group, yielding to one sample in the result for each group.
15
-#' Input samples that do not satisfy the \emph{groupby} condition are disregarded.
16
-#'
17
-#' @importFrom methods is
18
-#' 
19
-#' @param input_data returned object from any GMQL function
20
-#' @param minAcc minimum number of overlapping regions to be considered during executio.n
21
-#' Is a single integer number, declared also as string.
22
-#' minAcc accept ALL and string like (ALL+N)/K as special keyword 
23
-#' ALL sets the minimum to the number of samples in the input dataset
24
-#' @param maxAcc maximum number of overlapping regions to be considered during execution.
25
-#' Is a single integer number, declared also as string.
26
-#' maxAcc accept ALL, ANY and string like (ALL+N)/K as special keyword 
27
-#' ALL sets the maximum to the number of samples in the input dataset
28
-#' ANY acts as a wildcard, consider all areas defined to any amount of overlapping 
29
-#' @param groupBy list of CONDITION objects, or simple string concatenation 
30
-#' (i.e c("cell_type","attribute_tag","size")).
31
-#' Every object contains the name of metadata to be used in \emph{groupby}.
32
-#' For details of CONDITION objects see:
33
-#' \code{\link{DEF}}, \code{\link{FULL}}, \code{\link{EXACT}}
34
-#' 
35
-#' Every condition accepts only one string value. (e.g. DEF("cell_type") )
36
-#' In case of single concatenation with no CONDITION, all metadata are considering as DEF
37
-#' 
38
-#' @param aggregates list of element in the form \emph{key} = \emph{function_aggregate}.
39
-#' The \emph{function_aggregate} is an object of class OPERATOR
40
-#' The aggregate functions available are: \code{\link{MIN}}, \code{\link{MAX}},
41
-#' \code{\link{SUM}}, \code{\link{BAG}}, \code{\link{AVG}}, \code{\link{COUNT}},
42
-#' \code{\link{STD}}, \code{\link{MEDIAN}}, \code{\link{Q1}}, \code{\link{Q2}}, 
43
-#' \code{\link{Q3}}.
44
-#' Every operator accepts a string value, execet for COUNT that cannot have a value.
45
-#' Argument of 'function_aggregate' must exist in schema
46
-#' Two style are allowed:
47
-#' \itemize{
48
-#' \item list of key-value pairs: e.g. sum = SUM("pvalue")
49
-#' \item list of values: e.g. SUM("pvalue")
50
-#' }
51
-#' "mixed style" is not allowed
52
-#'
53
-#' @return DAGgraph class object. It contains the value associated to the graph used 
54
-#' as input for the subsequent GMQL function
55
-#' 
56
-#' @references \url{http://www.bioinformatics.deib.polimi.it/genomic_computing/GMQL/doc/GMQLUserTutorial.pdf}
57
-#'
58
-#' @seealso  \code{\link{summit}} \code{\link{flat}} \code{\link{histogram}}
59
-#'
60
-#' @examples
61
-#' 
62
-#' ## This GMQL statement produces an output dataset with a single output sample. 
63
-#' ## The COVER operation considers all areas defined by a minimum of two overlapping regions 
64
-#' ## in the input samples, up to any amount of overlapping regions.
65
-#' 
66
-#' initGMQL("gtf")
67
-#' test_path <- system.file("example","DATA_SET_VAR_GTF",package = "GMQL")
68
-#' exp = readDataset(test_path)
69
-#' res = cover(input_data = exp,2,"ANY")
70
-#'
71
-#' \dontrun{
72
-#' ## This GMQL statement computes the result grouping the input exp samples by the values of 
73
-#' ## their cell metadata attribute, 
74
-#' ## thus one output res sample is generated for each cell type; 
75
-#' ## output regions are produced where at least 2 and at most 3 regions of grouped exp samples 
76
-#' ## overlap, setting as attributes of the resulting regions the minimum pvalue of the overlapping regions 
77
-#' ## (min_pvalue) and their Jaccard indexes (JaccardIntersect and JaccardResult).
78
-#' 
79
-#' test_path <- system.file("example","DATA_SET_VAR_GTF",package = "GMQL")
80
-#' exp = read(test_path)
81
-#' res = cover(input_data = exp,2,3, c("cell"), list(min_pValue = MIN("pvalue")))
82
-#' }
83
-#' @export
84
-#'
85
-cover <- function(input_data, minAcc, maxAcc, groupBy = NULL, aggregates = NULL)
86
-{
87
-  .doVariant("COVER",minAcc,maxAcc,groupBy,aggregates,input_data)
88
-}
89
-
90
-#' GMQL Operation: HISTOGRAM
91
-#'
92
-#' returns the non-overlapping regions contributing to the cover,
93
-#' each with its accumulation index value, which is assigned to the AccIndex region attribute.
94
-#'
95
-#' @importFrom methods is
96
-#'
97
-#' @param input_data returned object from any GMQL function
98
-#' @param minAcc minimum number of overlapping regions to be considered during execution
99
-#' normally is a single integer number, declared also as string.
100
-#' minAcc accept ALL and string like (ALL+N)/K as special keyword 
101
-#' ALL sets the minimum to the number of samples in the input dataset
102
-#' @param maxAcc maximum number of overlapping regions to be considered during execution
103
-#' normally is a single integer number, declared also as string.
104
-#' maxAcc accept ALL, ANY and string like (ALL+N)/K as special keyword 
105
-#' ALL sets the maximum to the number of samples in the input dataset
106
-#' ANY acts as a wildcard, consider all areas defined to any amount of overlapping 
107
-#' @param groupBy list of CONDITION objects, or simple string concatenation 
108
-#' (i.e c("cell_type","attribute_tag","size")).
109
-#' Every object contains the name of metadata to be used in \emph{groupby}.
110
-#' For details of CONDITION objects see:
111
-#' \code{\link{DEF}}, \code{\link{FULL}}, \code{\link{EXACT}}
112
-#' 
113
-#' Every condition accepts only one string value. (e.g. DEF("cell_type") )
114
-#' In case of single concatenation with no CONDITION, all metadata are considering as DEF
115
-#' 
116
-#' @param aggregates list of element in the form \emph{key} = \emph{function_aggregate}.
117
-#' The \emph{function_aggregate} is an object of class OPERATOR
118
-#' The aggregate functions available are: \code{\link{MIN}}, \code{\link{MAX}},
119
-#' \code{\link{SUM}}, \code{\link{BAG}}, \code{\link{AVG}}, \code{\link{COUNT}},
120
-#' \code{\link{STD}}, \code{\link{MEDIAN}}, \code{\link{Q1}}, \code{\link{Q2}}, 
121
-#' \code{\link{Q3}}.
122
-#' Every operator accepts a string value, execet for COUNT that cannot have a value.
123
-#' Argument of 'function_aggregate' must exist in schema
124
-#' Two style are allowed:
125
-#' \itemize{
126
-#' \item list of key-value pairs: e.g. sum = SUM("pvalue")
127
-#' \item list of values: e.g. SUM("pvalue")
128
-#' }
129
-#' "mixed style" is not allowed
130
-#'
131
-#' @return DAGgraph class object. It contains the value associated to the graph used 
132
-#' as input for the subsequent GMQL function
133
-#' 
134
-#' @references \url{http://www.bioinformatics.deib.polimi.it/genomic_computing/GMQL/doc/GMQLUserTutorial.pdf}
135
-#' @seealso \code{\link{flat}} \code{\link{cover}} \code{\link{summit}}
136
-#'
137
-#' @examples
138
-#'
139
-#' ## This GMQL statement computes the result grouping the input \emph{exp} samples 
140
-#' ## by the values of their \emph{cell} metadata attribute, 
141
-#' ## thus one output \emph{res} sample is generated for each cell type. 
142
-#' ## Output regions are produced by dividing results from COVER in contiguous subregions 
143
-#' ## according to the varying accumulation values (from 2 to 4 in this case): 
144
-#' ## one region for each accumulation value;
145
-#'
146
-#' initGMQL("gtf")
147
-#' test_path <- system.file("example","DATA_SET_VAR_GTF",package = "GMQL")
148
-#' exp = readDataset(test_path)
149
-#' res = histogram(exp, 2,4,groupBy = c("cell"))
150
-#' 
151
-#' @export
152
-#'
153
-histogram <- function(input_data, minAcc, maxAcc, groupBy = NULL, aggregates = NULL)
154
-{
155
-  .doVariant("HISTOGRAM",minAcc,maxAcc,groupBy,aggregates,input_data)
156
-}
157
-
158
-#' GMQL Operation: SUMMIT
159
-#'
160
-#' returns regions that start from a position
161
-#' where the number of intersecting regions is not increasing afterwards and stops
162
-#' at a position where either the number of intersecting regions decreases,
163
-#' or it violates the max accumulation index).
164
-#'
165
-#' @importFrom methods is
166
-#'
167
-#' @param input_data returned object from any GMQL function
168
-#' @param minAcc minimum number of overlapping regions to be considered during execution
169
-#' normally is a single integer number, declared also as string.
170
-#' minAcc accept ALL and string like (ALL+N)/K as special keyword 
171
-#' ALL sets the minimum to the number of samples in the input dataset
172
-#' @param maxAcc maximum number of overlapping regions to be considered during execution
173
-#' normally is a single integer number, declared also as string.
174
-#' maxAcc accept ALL, ANY and string like (ALL+N)/K as special keyword 
175
-#' ALL sets the maximum to the number of samples in the input dataset
176
-#' ANY acts as a wildcard, consider all areas defined to any amount of overlapping 
177
-#' @param groupBy list of CONDITION objects, or simple string concatenation 
178
-#' (i.e c("cell_type","attribute_tag","size")).
179
-#' Every object contains the name of metadata to be used in \emph{groupby}.
180
-#' For details of CONDITION objects see:
181
-#' \code{\link{DEF}}, \code{\link{FULL}}, \code{\link{EXACT}}
182
-#' 
183
-#' Every condition accepts only one string value. (e.g. DEF("cell_type") )
184
-#' In case of single concatenation with no CONDITION, all metadata are considering as DEF
185
-#' 
186
-#' @param aggregates list of element in the form \emph{key} = \emph{function_aggregate}.
187
-#' The \emph{function_aggregate} is an object of class OPERATOR
188
-#' The aggregate functions available are: \code{\link{MIN}}, \code{\link{MAX}},
189
-#' \code{\link{SUM}}, \code{\link{BAG}}, \code{\link{AVG}}, \code{\link{COUNT}},
190
-#' \code{\link{STD}}, \code{\link{MEDIAN}}, \code{\link{Q1}}, \code{\link{Q2}}, 
191
-#' \code{\link{Q3}}.
192
-#' Every operator accepts a string value, execet for COUNT that cannot have a value.
193
-#' Argument of 'function_aggregate' must exist in schema
194
-#' Two style are allowed:
195
-#' \itemize{
196
-#' \item list of key-value pairs: e.g. sum = SUM("pvalue")
197
-#' \item list of values: e.g. SUM("pvalue")
198
-#' }
199
-#' "mixed style" is not allowed
200
-#'
201
-#' @return DAGgraph class object. It contains the value associated to the graph used 
202
-#' as input for the subsequent GMQL function
203
-#' 
204
-#' @references \url{http://www.bioinformatics.deib.polimi.it/genomic_computing/GMQL/doc/GMQLUserTutorial.pdf}
205
-#' @seealso \code{\link{flat}} \code{\link{cover}} \code{\link{histogram}}
206
-#'
207
-#' @examples
208
-#'
209
-#' ## This GMQL statement computes the result grouping the input \emph{exp} samples by the values 
210
-#' ## of their \emph{cell} metadata attribute, thus one output \emph{res} sample is generated 
211
-#' ## for each cell type.
212
-#' ## Output regions are produced by extracting the highest accumulation overlapping 
213
-#' ## (sub)regions according to the methodologies described above;
214
-#'
215
-#'
216
-#' initGMQL("gtf")
217
-#' test_path <- system.file("example","DATA_SET_VAR_GTF",package = "GMQL")
218
-#' exp = readDataset(test_path)
219
-#' res = summit(input_data = exp,2,4, c("cell"))
220
-#' 
221
-#' @export
222
-#'
223
-summit <- function(input_data, minAcc, maxAcc, groupBy = NULL, aggregates = NULL)
224
-{
225
-  .doVariant("SUMMIT",minAcc,maxAcc,groupBy,aggregates,input_data)
226
-}
227
-
228
-#' GMQL Operation: FLAT
229
-#'
230
-#' returns the contiguous region that starts from the first end and stops at
231
-#' the last end of the regions which would contribute to each region of the COVER
232
-#'
233
-#' @importFrom methods is
234
-#'
235
-#' @param input_data returned object from any GMQL function
236
-#' @param minAcc minimum number of overlapping regions to be considered during execution
237
-#' normally is a single integer number, declared also as string.
238
-#' minAcc accept ALL and string like (ALL+N)/K as special keyword 
239
-#' ALL sets the minimum to the number of samples in the input dataset
240
-#' @param maxAcc maximum number of overlapping regions to be considered during execution
241
-#' normally is a single integer number, declared also as string.
242
-#' maxAcc accept ALL, ANY and string like (ALL+N)/K as special keyword 
243
-#' ALL sets the maximum to the number of samples in the input dataset
244
-#' ANY acts as a wildcard, consider all areas defined to any amount of overlapping 
245
-#' @param groupBy list of CONDITION objects, or simple string concatenation 
246
-#' (i.e c("cell_type","attribute_tag","size")).
247
-#' Every object contains the name of metadata to be used in \emph{groupBy}.
248
-#' For details of CONDITION objects see:
249
-#' \code{\link{DEF}}, \code{\link{FULL}}, \code{\link{EXACT}}
250
-#' 
251
-#' Every condition accepts only one string value. (e.g. DEF("cell_type") )
252
-#' In case of single concatenation with no CONDITION, all metadata are considering as DEF
253
-#' 
254
-#' @param aggregates list of element in the form \emph{key} = \emph{function_aggregate}.
255
-#' The \emph{function_aggregate} is an object of class OPERATOR
256
-#' The aggregate functions available are: \code{\link{MIN}}, \code{\link{MAX}},
257
-#' \code{\link{SUM}}, \code{\link{BAG}}, \code{\link{AVG}}, \code{\link{COUNT}},
258
-#' \code{\link{STD}}, \code{\link{MEDIAN}}, \code{\link{Q1}}, \code{\link{Q2}}, 
259
-#' \code{\link{Q3}}.
260
-#' Every operator accepts a string value, execet for COUNT that cannot have a value.
261
-#' Argument of 'function_aggregate' must exist in schema
262
-#' Two style are allowed:
263
-#' \itemize{
264
-#' \item list of key-value pairs: e.g. sum = SUM("pvalue")
265
-#' \item list of values: e.g. SUM("pvalue")
266
-#' }
267
-#' "mixed style" is not allowed
268
-#'
269
-#' @return DAGgraph class object. It contains the value associated to the graph used 
270
-#' as input for the subsequent GMQL function
271
-#' 
272
-#' @references \url{http://www.bioinformatics.deib.polimi.it/genomic_computing/GMQL/doc/GMQLUserTutorial.pdf}
273
-#' @seealso \code{\link{summit}} \code{\link{cover}} \code{\link{histogram}}
274
-#'
275
-#' @examples
276
-#' 
277
-#' ## This GMQL statement computes the result grouping the input \emph{exp} samples by 
278
-#' ## the values of their \emph{cell} metadata attribute, thus one output \emph{res} sample 
279
-#' ## is generated for each cell type. 
280
-#' ## Output regions are produced by concatenating all regions which would have been used 
281
-#' ## to construct a COVER(2,4) statement on the same dataset; 
282
-#' 
283
-#' initGMQL("gtf")
284
-#' test_path <- system.file("example","DATA_SET_VAR_GTF",package = "GMQL")
285
-#' exp = readDataset(test_path)
286
-#' res = flat(input_data = exp,2,4, c("cell"))
287
-#'
288
-#' @export
289
-#'
290
-flat <- function(input_data, minAcc, maxAcc, groupBy = NULL, aggregates = NULL)
291
-{
292
-  .doVariant("FLAT",minAcc,maxAcc,groupBy,aggregates,input_data)
293
-}
294
-
295
-.doVariant <- function(flag,minAcc,maxAcc,groupBy,aggregates,input_data)
296
-{
297
-  min <- .check_cover_param(minAcc,TRUE)
298
-  max <- .check_cover_param(maxAcc,FALSE)
299
-
300
-  if(!is.null(groupBy))
301
-    join_condition_matrix <- .join_condition(groupBy)
302
-  else
303
-    join_condition_matrix <- scalaNull("Array[Array[String]]")
304
-  
305
-  if(!is.null(aggregates))
306
-    metadata_matrix <- .aggregates(aggregates,"OPERATOR")
307
-  else
308
-    metadata_matrix <- scalaNull("Array[Array[String]]")
309
-
310
-
311
-  response <- switch(flag,
312
-                "COVER" = WrappeR$cover(min,max,join_condition_matrix,metadata_matrix,input_data$value),
313
-                "FLAT" = WrappeR$flat(min,max,join_condition_matrix,metadata_matrix,input_data$value),
314
-                "SUMMIT" = WrappeR$summit(min,max,join_condition_matrix,metadata_matrix,input_data$value),
315
-                "HISTOGRAM" = WrappeR$histogram(min,max,join_condition_matrix,metadata_matrix,input_data$value))
316
-
317
-  error <- strtoi(response[1])
318
-  data <- response[2]
319
-  
320
-  if(error!=0)
321
-    stop(data)
322
-  else
323
-    DAGgraph(data)
324
-}
325
-
326
-.check_cover_param <- function(param,is_min)
327
-{
328
-  if(length(param)>1)
329
-    stop("length > 1")
330
-
331
-  if(is.numeric(param))
332
-  {
333
-    if(param<=0)
334
-      stop("No negative value")
335
-    else
336
-      return(as.integer(param))
337
-  }
338
-  else if(is.character(param))
339
-  {
340
-    if(is_min && identical(param,"ANY"))
341
-      stop("min cannot assume ANY as value")
342
-    return(param)
343
-  }
344
-  else
345
-    stop("invalid input data")
346
-}
347
-
348
-
349 0
deleted file mode 100644
... ...
@@ -1,79 +0,0 @@
1
-#' GMQL Operation: DIFFERENCE
2
-#'
3
-#' It produces one sample in the result for each sample of the left operand,
4
-#' by keeping the same metadata of the left input sample and only those regions
5
-#' (with their schema and values) of the left input sample which do not intersect with any region
6
-#' in the right operand sample.
7
-#' The optional \emph{joinby} clause is used to extract a subset of couples
8
-#' from the cartesian product of two dataset \emph{left_input_data} x \emph{right_input_data}
9
-#' on which to apply the DIFFERENCE operator:
10
-#' only those samples that have the same value for each attribute
11
-#' are considered when performing the difference.
12
-#'
13
-#'
14
-#' @param right_input_data returned object from any GMQL function
15
-#' @param left_input_data returned object from any GMQL function
16
-#' @param joinBy list of CONDITION objects, or simple string concatenation 
17
-#' (i.e c("cell_type","attribute_tag","size")).
18
-#' Every object contains the name of metadata to be used in \emph{groupby}.
19
-#' For details of CONDITION objects see:
20
-#' \code{\link{DEF}}, \code{\link{FULL}}, \code{\link{EXACT}}
21
-#' 
22
-#' Every condition accepts only one string value (e.g. DEF("cell_type") )
23
-#' In case of single concatenation with no CONDITION, all metadata are considering as DEF
24
-#' 
25
-#' @param is_exact single logical value: TRUE means that the region difference is executed only 
26
-#' on regions in left_input_data with exactly the same coordinates of at least one region present 
27
-#' in right_input_data; if is_exact = FALSE, the difference is executed on all regions in 
28
-#' left_input_data that overlap with at least one region in right_input_data (even just one base).
29
-#'
30
-#' @return DAGgraph class object. It contains the value associated to the graph used 
31
-#' as input for the subsequent GMQL function
32
-#'
33
-#' @references \url{http://www.bioinformatics.deib.polimi.it/genomic_computing/GMQL/doc/GMQLUserTutorial.pdf}
34
-#'
35
-#' @examples
36
-#'
37
-#' ## This GMQL statement returns all the regions in the first dataset that do not 
38
-#' ## overlap any region in the second dataset.
39
-#' 
40
-#' initGMQL("gtf")
41
-#' test_path <- system.file("example","DATA_SET_VAR_GTF",package = "GMQL")
42
-#' test_path2 <- system.file("example","DATA_SET_VAR_GDM",package = "GMQL")
43
-#' r_left = readDataset(test_path)
44
-#' r_right = readDataset(test_path2)
45
-#' out = difference(r_left,r_right)
46
-#' 
47
-#' \dontrun{
48
-#' ## This GMQL statement extracts for every pair of samples s1 in EXP1 and s2 in EXP2
49
-#' ## having the same value of the metadata attribute 'antibody_target'
50
-#' ## the regions that appear in s1 but do not overlap any region in s2; 
51
-#' ## metadata of the result are the same as the metadata of s1.
52
-#' 
53
-#' initGMQL("gtf")
54
-#' test_path <- system.file("example","DATA_SET_VAR_GTF",package = "GMQL")
55
-#' test_path2 <- system.file("example","DATA_SET_VAR_GDM",package = "GMQL")
56
-#' exp1 = readDataset(test_path)
57
-#' exp2 = readDataset(test_path2)
58
-#' out = difference(exp1,exp2, c("antibody_target"))
59
-#'
60
-#' }
61
-#'
62
-#' @export
63
-#'
64
-difference <- function(left_input_data, right_input_data, joinBy = NULL,is_exact = FALSE)
65
-{
66
-  if(!is.null(joinBy))
67
-    join_condition_matrix <- .join_condition(joinBy)
68
-  else
69
-    join_condition_matrix <- scalaNull("Array[Array[String]]")
70
-  
71
-  response <- WrappeR$difference(join_condition_matrix,right_input_data$value,left_input_data$value,is_exact)
72
-  error <- strtoi(response[1])
73
-  data <- response[2]
74
-  if(error!=0)
75
-    stop(data)
76
-  else
77
-    DAGgraph(data)
78
-}
79
-
... ...
@@ -27,7 +27,7 @@
27 27
 #' @examples
28 28
 #'
29 29
 #' initGMQL("gtf")
30
-#' test_path <- system.file("example","DATA_SET_VAR_GTF",package = "GMQL")
30
+#' test_path <- system.file("example","DATA_SET_VAR_GTF",package = "RGMQL")
31 31
 #' r = readDataset(test_path)
32 32
 #'
33 33
 #' ## it counts the regions in each sample and stores their number as value of the new metadata 
... ...
@@ -36,7 +36,7 @@
36 36
 #' \dontrun{
37 37
 #' 
38 38
 #' initGMQL("gtf")
39
-#' test_path <- system.file("example","DATA_SET_VAR_GTF",package = "GMQL")
39
+#' test_path <- system.file("example","DATA_SET_VAR_GTF",package = "RGMQL")
40 40
 #' exp = readDataset(test_path)
41 41
 #'
42 42
 #' ## it copies all samples of exp dataset into res dataset, and then calculates 
... ...
@@ -56,8 +56,9 @@ extend <-function(input_data, metadata = NULL)
56 56
   if(!is.null(metadata))
57 57
     metadata_matrix <- .aggregates(metadata,"META_OPERATOR")
58 58
   else
59
-    metadata_matrix <- scalaNull("Array[Array[String]]")
60
-
59
+    metadata_matrix <- .jnull("java/lang/String")
60
+  
61
+  WrappeR <- J("it/polimi/genomics/r/Wrapper")
61 62
   response <- WrappeR$extend(metadata_matrix,input_data$value)
62 63
   error <- strtoi(response[1])
63 64
   data <- response[2]
... ...
@@ -24,7 +24,7 @@
24 24
 #'
25 25
 #' @examples
26 26
 #'
27
-#' test_path <- system.file("example","DATA_SET_VAR_GTF",package = "GMQL")
27
+#' test_path <- system.file("example","DATA_SET_VAR_GTF",package = "RGMQL")
28 28
 #' TFARMatrix(test_path,regions = c("pvalue","peak"))
29 29
 #'
30 30
 #' @export
... ...
@@ -98,7 +98,7 @@ TFARMatrix <- function(GMQL_dataset_path, metadata = NULL,metadata_prefix = NULL
98 98
 #' @examples
99 99
 #'
100 100
 #'
101
-#' test_path <- system.file("example","DATA_SET_VAR_GTF",package = "GMQL")
101
+#' test_path <- system.file("example","DATA_SET_VAR_GTF",package = "RGMQL")
102 102
 #' grl <- importGMQL.gtf(test_path)
103 103
 #' TFARMemAtrix(grl,regions = c("pvalue","peak"))
104 104
 #'
... ...
@@ -18,7 +18,7 @@
18 18
 #'
19 19
 #' @examples
20 20
 #'
21
-#' test_path <- system.file("example","DATA_SET_VAR_GTF",package = "GMQL")
21
+#' test_path <- system.file("example","DATA_SET_VAR_GTF",package = "RGMQL")
22 22
 #' grl = importGMQL.gtf(test_path)
23 23
 #'
24 24
 #'
... ...
@@ -39,7 +39,7 @@
39 39
 #' gr2 <- GRanges(seqnames = c("chr1", "chr1"),
40 40
 #' ranges = IRanges(c(7,13), width = 3), strand = c("+", "-"), score = 3:4, GC = c(0.3, 0.5))
41 41
 #' grl = GRangesList(gr1,gr2)
42
-#' test_out_path <- system.file("example",package = "GMQL")
42
+#' test_out_path <- system.file("example",package = "RGMQL")
43 43
 #' exportGMQL.gdm(grl,test_out_path)
44 44
 #'
45 45
 #'
... ...
@@ -92,7 +92,7 @@ exportGMQL.gdm <- function(samples, dir_out)
92 92
 #' gr2 <- GRanges(seqnames = c("chr1", "chr1"),
93 93
 #' ranges = IRanges(c(7,13), width = 3), strand = c("+", "-"), score = 3:4, GC = c(0.3, 0.5))
94 94
 #' grl = GRangesList(gr1,gr2)
95
-#' test_out_path <- system.file("example",package = "GMQL")
95
+#' test_out_path <- system.file("example",package = "RGMQL")
96 96
 #' exportGMQL.gtf(grl,test_out_path)
97 97
 #'
98 98
 #' @export
99 99
deleted file mode 100644
... ...
@@ -1,117 +0,0 @@
1
-#' GMQL Operation: JOIN
2
-#'
3
-#' It takes in input two datasets, respectively known as nchor (left) and experiment (right) and returns
4
-#' a dataset of samples consisting of regions extracted from the operands according to the specified condition
5
-#' (a.k.a genometric_predicate).
6
-#' The number of generated output samples is the Cartesian product of the number of samples
7
-#' in the anchor and in the experiment dataset (if joinBy is not specified).
8
-#' The output metadata are the union of the input metadata, with their attribute names prefixed with
9
-#' left or right respectively.
10
-#'
11
-#'
12
-#' @param left_input_data returned object from any GMQL function
13
-#' @param right_input_data returned object from any GMQL function
14
-#' @param genometric_predicate is a list of lists of DISTAL object by means of logical ANDs
15
-#' For details of DISTAL objects see:
16
-#' \code{\link{DLE}}, \code{\link{DGE}}, \code{\link{MD}}, \code{\link{UP}}, \code{\link{DOWN}}
17
-#' 
18
-#' @param joinBy list of CONDITION objects, or simple string concatenation 
19
-#' (i.e c("cell_type","attribute_tag","size")).
20
-#' Every object contains the name of metadata to be used in \emph{groupby}.
21
-#' For details of CONDITION objects see:
22
-#' \code{\link{DEF}}, \code{\link{FULL}}, \code{\link{EXACT}}
23
-#' 
24
-#' Every condition accepts only one string value. (e.g. DEF("cell_type") )
25
-#' In case of single concatenation with no CONDITION, all metadata are considering as DEF
26
-#' 
27
-#' @param region_output single string that declare which region is given in output for each input pair of left dataset
28
-#' right dataset regions satisfying the genometric predicate:
29
-#' \itemize{
30
-#' \item{left: outputs the anchor regions from left_input_data that satisfy the genometric predicate}
31
-#' \item{right: outputs the experiment regions from right_input_data that satisfy the genometric predicate}
32
-#' \item{int (intersection): outputs the overlapping part (intersection) of the left_input_data and right_input_data
33
-#' regions that satisfy the genometric predicate; if the intersection is empty, no output is produced}
34
-#' \item{contig: outputs the concatenation between the left_input_data and right_input_data regions that satisfy
35
-#' the genometric predicate, (i.e. the output regionis defined as having left (right) coordinates
36
-#' equal to the minimum (maximum) of the corresponding coordinate values in the left_input_data and right_input_data
37
-#' regions satisfying the genometric predicate)}
38
-#' }
39
-#'
40
-#' @return DAGgraph class object. It contains the value associated to the graph used 
41
-#' as input for the subsequent GMQL function
42
-#'
43
-#' @references \url{http://www.bioinformatics.deib.polimi.it/genomic_computing/GMQL/doc/GMQLUserTutorial.pdf}
44
-#'
45
-#'
46
-#' @examples
47
-#' 
48
-#' ## Given a dataset 'hm' and one called 'tss' with a sample including Transcription Start Site annotations,
49
-#' ## it searches for those regions of hm that are at a minimal distance from a transcription start site (TSS) 
50
-#' ## and takes the first/closest one for each TSS, 
51
-#' ## provided that such distance is lesser than 120K bases and joined 'tss' and 'hm' samples are obtained 
52
-#' ## from the same provider (joinby clause).
53
-#' 
54
-#' initGMQL("gtf")
55
-#' test_path <- system.file("example","DATA_SET_VAR_GTF",package = "GMQL")
56
-#' test_path2 <- system.file("example","DATA_SET_VAR_GDM",package = "GMQL")
57
-#' TSS = readDataset(test_path)
58
-#' HM = readDataset(test_path2)
59
-#' join_data = join(TSS,HM,genometric_predicate=list(list(MD(1),DLE(120000))),c("provider"),region_output="RIGHT")
60
-#'
61
-#' @export
62
-#'
63
-join <- function(right_input_data, left_input_data, genometric_predicate = NULL,
64
-                 joinBy = NULL, region_output="contig")
65
-{
66
-  
67
-  if(!is.null(genometric_predicate))
68
-  {
69
-    if(!is.list(genometric_predicate))
70
-      stop("genometric_predicate must be list of lists")
71
-    
72
-    if(!all(sapply(genometric_predicate, function(x) is.list(x) )))
73
-      stop("genometric_predicate must be list of lists")
74
-    
75
-    lapply(genometric_predicate, function(list_pred) {
76
-      if(length(list_pred)>4)
77
-      {
78
-        warning("only 4 element per list, we cut the rest")
79
-        length(list_pred)=4
80
-      }
81
-      
82
-      if(!all(sapply(list_pred, function(x) {is(x,"DISTAL")} )))
83
-        stop("All elements should be DISTAL object")
84
-
85
-    })
86
-    
87
-    genomatrix <- t(sapply(genometric_predicate, function(list_pred) {
88
-      dist_array <- sapply(list_pred, function(x) {
89
-        new_value = as.character(x)
90
-        array <- c(new_value)
91
-      })
92
-      dist_array = c(dist_array,c("NA","NA"),c("NA","NA"),c("NA","NA"))
93
-      length(dist_array) = 8
94
-      dist_array
95
-    }))
96
-  }
97
-  else
98
-    genomatrix <- scalaNull("Array[Array[String]]")
99
-      
100
-  if(!is.null(joinBy))
101
-    join_condition_matrix <- .join_condition(joinBy)
102
-  else
103
-    join_condition_matrix <- scalaNull("Array[Array[String]]")
104
-  
105
-  ouput <- toupper(region_output)
106
-  if(!identical(ouput,"CONTIG") && !identical(ouput,"LEFT") && !identical(ouput,"RIGHT")
107
-     && !identical(ouput,"INT"))
108
-    stop("region_output must be contig,left,right or int (intersection)")
109
-  
110
-  response <- WrappeR$join(genomatrix,join_condition_matrix, ouput,right_input_data$value, left_input_data$value)
111
-  error <- strtoi(response[1])
112
-  data <- response[2]
113
-  if(error!=0)
114
-    stop(data)
115
-  else
116
-    DAGgraph(data)
117
-}
118 0
deleted file mode 100644
... ...
@@ -1,87 +0,0 @@
1
-#' GMQL Operation: MAP
2
-#'
3
-#' It computes, for each sample in the right dataset, aggregates over the values of the right regions
4
-#' that intersect with a region in a left sample, for each region of each sample in the left dataset;
5
-#' The number of generated output samples is the Cartesian product of the samples in the two input datasets;
6
-#' each output sample has the same regions as the related input left sample, with their attributes and values,
7
-#' plus the attributes computed as aggregates over right region values.
8
-#' Output sample metadata are the union of the related input sample metadata,
9
-#' whose attribute names are prefixed with "left" or "right" respectively.
10
-#'
11
-#' When the joinby clause is present, only pairs of samples of left_input_data and of right_input_data with
12
-#' metadata M1 and M2 respectively that satisfy the joinby condition are considered.
13
-#'
14
-#' The clause consists of a list of metadata attribute names that must be present with equal values
15
-#' in both M1 and  M2
16
-#'
17
-#'
18
-#' @param left_input_data returned object from any GMQL function
19
-#' @param right_input_data returned object from any GMQL function
20
-#' @param aggregates list of element in the form \emph{key} = \emph{function_aggregate}.
21
-#' The \emph{function_aggregate} is an object of class OPERATOR
22
-#' The aggregate functions available are: \code{\link{MIN}}, \code{\link{MAX}},
23
-#' \code{\link{SUM}}, \code{\link{BAG}}, \code{\link{AVG}}, \code{\link{COUNT}},
24
-#' \code{\link{STD}}, \code{\link{MEDIAN}}, \code{\link{Q1}}, \code{\link{Q2}}, \code{\link{Q3}}.
25
-#' Every operator accepts a string value, execet for COUNT that cannot have a value.
26
-#' Argument of 'function_aggregate' must exist in schema
27
-#' Two style are allowed:
28
-#' \itemize{
29
-#' \item list of key-value pairs: e.g. sum = SUM("pvalue")
30
-#' \item list of values: e.g. SUM("pvalue")
31
-#' }
32
-#' "mixed style" is not allowed
33
-#'
34
-#' @param joinBy list of CONDITION objects, or simple string concatenation 
35
-#' (i.e c("cell_type","attribute_tag","size")).
36
-#' Every object contains the name of metadata to be used in \emph{groupby}.
37
-#' For details of CONDITION objects see:
38
-#' \code{\link{DEF}}, \code{\link{FULL}}, \code{\link{EXACT}}
39
-#' 
40
-#' Every condition accepts only one string value. (e.g. DEF("cell_type") )
41
-#' In case of single concatenation with no CONDITION, all metadata are considering as DEF
42
-#' 
43
-#' @return DAGgraph class object. It contains the value associated to the graph used 
44
-#' as input for the subsequent GMQL function
45
-#'
46
-#' @references \url{http://www.bioinformatics.deib.polimi.it/genomic_computing/GMQL/doc/GMQLUserTutorial.pdf}
47
-#'
48
-#' @examples
49
-#'
50
-#' ## it counts the number of regions in each sample from exp that overlap with a ref region, 
51
-#' ## and for each ref region it computes the minimum score of all the regions in each exp sample 
52
-#' ## that overlap with it. 
53
-#' ## The MAP joinby option ensures that only the exp samples referring to the same 'cell_tissue' 
54
-#' ## of a ref sample are mapped on such ref sample; 
55
-#' ## exp samples with no cell_tissue metadata attribute, or with such metadata 
56
-#' ## but with a different value from the one(s) of ref sample(s), are disregarded.
57
-#' 
58
-#' initGMQL("gtf")
59
-#' test_path <- system.file("example","DATA_SET_VAR_GTF",package = "GMQL")
60
-#' test_path2 <- system.file("example","DATA_SET_VAR_GDM",package = "GMQL")
61
-#' exp = readDataset(test_path)
62
-#' ref = readDataset(test_path2)
63
-#' out = map(ref,exp, list(minScore = MIN("score")), joinBy = c("cell_tissue") )
64
-#' 
65
-#' 
66
-#' @export
67
-#'
68
-map <- function(left_input_data, right_input_data, aggregates = NULL, joinBy = NULL)
69
-{
70
-  if(!is.null(aggregates))
71
-    metadata_matrix <- .aggregates(aggregates,"OPERATOR")
72
-  else
73
-    metadata_matrix = scalaNull("Array[Array[String]]")
74
-
75
-  if(!is.null(joinBy))
76
-    join_condition_matrix <- .join_condition(joinBy)
77
-  else
78
-    join_condition_matrix <- scalaNull("Array[Array[String]]")
79
-
80
-  response<-WrappeR$map(join_condition_matrix,metadata_matrix,left_input_data$value,right_input_data$value)
81
-  error <- strtoi(response[1])
82
-  data <- response[2]
83
-  if(error!=0)
84
-    stop(data)
85
-  else
86
-    DAGgraph(data)
87
-}
88 0
deleted file mode 100644
... ...
@@ -1,189 +0,0 @@
1
-#' GMQL Function: EXECUTE
2
-#'
3
-#' execute GMQL query.
4
-#' The function works only after invoking at least one materialize
5
-#'
6
-#' @details 
7
-#' 
8
-#' After invoking execution function, all varialbe associated to DAG will be removed
9
-#' from scala enviroment, although the associated R variable will remain stored in R environment
10
-#' 
11
-#' @return None
12
-#'
13
-#' @examples
14
-#'
15
-#' initGMQL()
16
-#' test_path <- system.file("example","DATA_SET_VAR_GTF",package = "GMQL")
17
-#' r = readDataset(test_path)
18
-#' s = select(input_data = r)
19
-#' m = merge(groupBy = c("antibody_targer","cell_karyotype"),input_data = s)
20
-#' materialize(input_data = m, dir_out = test_path)
21
-#' 
22
-#' \dontrun{
23
-#' execute()
24
-#' }
25
-#' @export
26
-#'
27
-execute <- function()
28
-{
29
-  remote_proc <- WrappeR$is_remote_processing()
30
-  if(!remote_proc)
31
-    .download_or_upload()
32
-  
33
-  response <- WrappeR$execute()
34
-  error <- strtoi(response[1])
35
-  data <- response[2]
36
-  if(error!=0)
37
-    stop(data)
38
-  else
39
-  {
40
-    if(remote_proc)
41
-    {
42
-      url <- WrappeR$get_url()
43
-      .download_or_upload()
44
-      serializeQuery(url,FALSE,data)
45
-    }
46
-  }
47
-}
48
-
49
-.download_or_upload <- function()
50
-{
51
-  data <- WrappeR$get_dataset_list()
52
-  data_list <- apply(data, 1, as.list)
53
-  url <- WrappeR$get_url()
54
-  remote <- WrappeR$is_remote_processing()
55
-  if(remote)
56
-  {
57
-    sapply(data_list,function(x){
58
-      uploadSamples(url,x[[2]],x[[1]],x[[3]],FALSE)
59
-    })
60
-  }
61
-  else
62
-  {
63
-    sapply(data_list,function(x){
64
-      downloadDataset(url,x[[2]],x[[1]])
65
-    })
66
-  }
67
-}
68
-
69
-#' GMQL Operation: MATERIALIZE
70
-#'
71
-#' It saves the contents of a dataset that contains samples metadata and samples regions.
72
-#' It is normally used to persist the contents of any dataset generated during a GMQL query.
73
-#' Any dataset can be materialized, but the operation can be very time-consuming.
74
-#' For best performance, materialize the relevant data only.
75
-#'
76
-#'
77
-#' @param input_data returned object from any GMQL function
78
-#' @param dir_out destination folder path.
79
-#' by default is current working directory of the R process
80
-#'
81
-#' @return None
82
-#'
83
-#' @references \url{http://www.bioinformatics.deib.polimi.it/genomic_computing/GMQL/doc/GMQLUserTutorial.pdf}
84
-#'
85
-#' @examples
86
-#'
87
-#' initGMQL("gtf")
88
-#' test_path <- system.file("example","DATA_SET_VAR_GTF",package = "GMQL")
89
-#' r = readDataset(test_path)
90
-#' s = select(input_data = r)
91
-#' m = merge(groupBy = c("antibody_targer","cell_karyotype"),input_data = s)
92
-#' materialize(input_data = m, dir_out = test_path)
93
-#' 
94
-#' @export
95
-#'
96
-materialize <- function(input_data, dir_out = getwd())
97
-{
98
-  response <- WrappeR$materialize(input_data$value,dir_out)
99
-  error <- strtoi(response[1])
100
-  data <- response[2]
101
-  if(error!=0)
102
-    stop(data)
103
-  else
104
-    invisible(NULL)
105
-}
106
-
107
-
108
-#' GMQL Operation: TAKE
109
-#'
110
-#' It saves the contents of a dataset that contains samples metadata and samples regions.
111
-#' It is normally used to store in memoery the contents of any dataset generated during a GMQL query.
112
-#' the operation can be very time-consuming.
113
-#' If you have invoked any materialization before take function, all those dataset will be materialized 
114
-#' as folder (like if execution was invoked)
115
-#'
116
-#' @import GenomicRanges
117
-#' @importFrom stats setNames
118
-#' 
119
-#' @param input_data returned object from any GMQL function
120
-#' @param rows number of rows for each sample regions that you want to retrieve and stored in memory
121
-#' by default is 0 that means take all rows for each sample
122
-#'
123
-#' @return GrangesList with associated metadata
124
-#'
125
-#' @examples
126
-#'
127
-#' initGMQL("gtf")
128
-#' test_path <- system.file("example","DATA_SET_VAR_GTF",package = "GMQL")
129
-#' r = readDataset(test_path)
130
-#' s = select(input_data = r)
131
-#' m = merge(groupBy = c("antibody_targer","cell_karyotype"),input_data = s)
132
-#' g <- take(input_data = m, rows = 45)
133
-#' 
134
-#' @export
135
-#'
136
-take <- function(input_data, rows=0L)
137
-{
138
-  rows <- as.integer(rows[1])
139
-  if(rows<0)
140
-    stop("rows cannot be negative")
141
-
142
-  response <- WrappeR$take(input_data$value,rows)
143
-  error <- strtoi(response[1])
144
-  data <- response[2]
145
-  if(error!=0)
146
-    stop(data)
147
-
148
-  reg <- WrappeR$get_reg()
149
-  if(is.null(reg))
150
-    stop("no regions defined")
151
-  meta <- WrappeR$get_meta()
152
-  if(is.null(meta))
153
-    stop("no metadata defined")
154
-  schema <- WrappeR$get_schema()
155
-  if(is.null(schema))
156
-    stop("no schema defined")
157
-
158
-  reg_data_frame <- as.data.frame(reg)
159
-  list <- split(reg_data_frame, reg_data_frame[1])
160
-  names <- c("seqname","start","end","strand",schema)
161
-  
162
-  sampleList <- lapply(list, function(x){
163
-    x <- x[-1]
164
-    names(x) <- names
165
-    g <- GenomicRanges::makeGRangesFromDataFrame(x,keep.extra.columns = TRUE,
166
-                                                 start.field = "start",end.field = "end")
167
-  })
168
-  gRange_list <- GRangesList(sampleList)
169
-  
170
-  meta_list <- .metadata_from_frame_to_list(meta)
171
-  
172
-  S4Vectors::metadata(gRange_list) <- meta_list
173
-  return(gRange_list)
174
-}
175
-
176
-.metadata_from_frame_to_list <- function(metadata_frame)
177
-{
178
-  meta_frame <- as.data.frame(metadata_frame)
179
-  list <- split(meta_frame, meta_frame[1])
180
-  name_value_list <- lapply(list, function(x){
181
-    x <- x[-1]
182
-  })
183
-  meta_list <- lapply(name_value_list, function(x){
184
-    stats::setNames(as.list(as.character(x[[2]])), x[[1]])
185
-  })
186
-}
187
-
188
-
189
-
190 0
deleted file mode 100644
... ...
@@ -1,56 +0,0 @@
1
-#' GMQL Operation: MERGE
2
-#'
3
-#' It builds a dataset consisting of a single sample having as many regions
4
-#' as the numebr of regions of the input data and as many metadata as the union of
5
-#' the 'attribute-value' tuples of the input samples.
6
-#' A groupby clause can be specified on metadata: the samples are then partitioned in groups,
7
-#' each with a distinct value of the grouping metadata attributes.
8
-#' The operation is separately applied to each group, yielding one sample in the result for each group.
9
-#' Samples whose names are not present in the grouping metadata parameter are disregarded.
10
-#'
11
-#'  
12
-#' @param input_data returned object from any GMQL function
13
-#' @param groupBy list of CONDITION objects, or simple string concatenation 
14
-#' (i.e c("cell_type","attribute_tag","size")).
15
-#' Every object contains the name of metadata to be used in \emph{groupBy}.
16
-#' For details of CONDITION objects see:
17
-#' \code{\link{DEF}}, \code{\link{FULL}}, \code{\link{EXACT}}
18
-#' 
19
-#' Every condition accepts only one string value (e.g. DEF("cell_type") )
20
-#' In case of single concatenation with no CONDITION, all metadata are considering as DEF
21
-#' 
22
-#' @return DAGgraph class object. It contains the value associated to the graph used 
23
-#' as input for the subsequent GMQL function
24
-#'
25
-#' @references \url{http://www.bioinformatics.deib.polimi.it/genomic_computing/GMQL/doc/GMQLUserTutorial.pdf}
26
-#'
27
-#' @examples
28
-#' 
29
-#' ## it creates a dataset called merged which contains one sample for each antibody_target value 
30
-#' ## found within the metadata of the exp dataset sample; 
31
-#' ## each created sample contains all regions from all 'exp' samples with a specific value for their 
32
-#' ## antibody_target metadata attribute.
33
-#' 
34
-#' initGMQL("gtf")
35
-#' test_path <- system.file("example","DATA_SET_VAR_GTF",package = "GMQL")
36
-#' exp = readDataset(test_path)
37
-#' merged = merge(input_data = exp, groupBy = c("antibody_target"))
38
-#' 
39
-#' @export
40
-#'
41
-merge <- function(input_data, groupBy = NULL)
42
-{
43
-  if(!is.null(groupBy))
44
-    join_condition_matrix <- .join_condition(groupBy)
45
-  else
46
-    join_condition_matrix <- scalaNull("Array[Array[String]]")
47
-  
48
-  response <- WrappeR$merge(join_condition_matrix,input_data$value)
49
-  error <- strtoi(response[1])
50
-  data <- response[2]
51
-  if(error!=0)
52
-    stop(data)
53
-  else
54
-    DAGgraph(data)
55
-}
56
-
57 0
deleted file mode 100644
... ...
@@ -1,162 +0,0 @@
1
-#' GMQL operation: ORDER
2
-#'
3
-#' It is used to order either samples or sample regions or both,
4
-#' according to a set of metadata and/or region attributes, and/or region coordinates.
5
-#' Order can be specified as ascending / descending for every attribute
6
-#' The number of samples and their regions remain the same (unless mtop/rtop parameters specified)
7
-#' but a new ordering metadata and/or region attribute is added.
8
-#' Sorted samples or regions have a new attribute "order", added to either metadata, or regions,
9
-#' or both of them as specified in input
10
-#' The input mtop = k and rtop = m extracts the first k samples and m regions respectively,
11
-#' the clause mtopg = k and rtopg = m performs grouping operation,
12
-#' grouping by identical values of ordering attributes
13
-#' and then selects the first k samples or regions of each group
14
-#'
15
-#'
16
-#' @param input_data "url-like" string taken from GMQL function
17
-#' @param metadata_ordering list of ORDER objects where every object contains the name of metadata
18
-#' The ORDER's available are: \code{\link{ASC}}, \code{\link{DESC}}
19
-#' Every condition accepts only one string value. (e.g. ASC("cell_type") )
20
-#' @param mtop integer value specifying the first k samples.
21
-#' default is 0 that means every sample must be considered
22
-#' @param mtopg integer value specifying the first j samples in each group.
23
-#' default is 0 that means every sample must be considered
24
-#' @param mtopp integer value specifying the first j samples in each group.
25
-#' default is 0 that means every sample must be considered
26
-#' @param regions_ordering list of ORDER objects where every object contains the name of region schema value
27
-#' The ORDER's available are: ASC, DESC.
28
-#' Every condition accepts only one string value. (e.g. DESC("pvalue") )
29
-#' @param rtop integer value specifying the first m samples in each group.
30
-#' default is 0 that means every sample must be considered
31
-#' @param rtopg integer value specifying the first i samples in each group.
32
-#' default is 0 that means every sample must be considered
33
-#' @param rtopp integer value specifying the first i samples in each group.
34
-#' default is 0 that means every sample must be considered
35
-#'
36
-#'
37
-#' @return DAGgraph class object. It contains the value associated to the graph used 
38
-#' as input for the subsequent GMQL function
39
-#' 
40
-#' @details
41
-#' mtop, mtopg,mtopp, rtop, rtopg and rtopp are normally numbers: if you specify a vector,
42
-#' only the first element will be used
43
-#' mtop and mtopg and mtopp are mutalbe exclusive, so rtop and rtopg and rtopp
44
-#'
45
-#'
46
-#' @references \url{http://www.bioinformatics.deib.polimi.it/genomic_computing/GMQL/doc/GMQLUserTutorial.pdf}
47
-#'
48
-#' @examples
49
-#' 
50
-#' ## it orders the samples according to the Region_count metadata attribute and takes the two samples 
51
-#' ## that have the highest count. 
52
-#'
53
-#' initGMQL("gtf")
54
-#' test_path <- system.file("example","DATA_SET_VAR_GTF",package = "GMQL")
55
-#' r = readDataset(test_path)
56
-#' o = order(r,list(DESC("Region_Count")), mtop = 2)
57
-#'
58
-#' @export
59
-#'
60
-order <- function(input_data, metadata_ordering = NULL, mtop = 0, mtopg = 0,mtopp = 0,
61
-                  regions_ordering = NULL,rtop = 0,rtopg = 0,rtopp = 0)
62
-{
63
-  if(!is.numeric(mtop) || !is.numeric(mtopg) || !is.numeric(rtop) || !is.numeric(rtopg)
64
-     || !is.numeric(mtopp)|| !is.numeric(rtopp))
65
-    stop("mtop, rtop, rtopg and mtopg must be integer")
66
-
67
-  if(length(mtop)>0 || length(mtopg)>0 || length(rtop)>0 || length(rtopg)>0
68
-     || length(mtopp)>0 || length(rtopp)>0)
69
-    warning("only the first element is taken by rtop, mtop, mtopg, rtopg, rtopp, mtopp")
70
-
71
-  # we consider only the first element even if input is a vector of Int
72
-  # we cut the other arguments
73
-
74
-  mtop = as.integer(mtop[1])
75
-  mtopg = as.integer(mtopg[1])
76
-  mtopp = as.integer(mtopp[1])
77
-
78
-  rtop = as.integer(rtop[1])
79
-  rtopg = as.integer(rtopg[1])
80
-  rtopp = as.integer(rtopp[1])
81
-
82
-  if(mtop > 0 && mtopg >0)
83
-  {
84
-    warning("cannot be used together.\nWe set mtopg = 0")
85
-    mtopg = 0L
86
-  }
87
-
88
-  if(mtop >0 && mtopp>0)
89
-  {
90
-    warning("cannot be used together.\nWe set mtopp = 0")
91
-    mtopp = 0L
92
-  }
93
-
94
-  if(mtopg >0 && mtopp>0)
95
-  {
96
-    warning("cannot be used together.\nWe set mtopp = 0")
97
-    mtopp = 0L
98
-  }
99
-
100
-  if(rtop > 0 && rtopg >0)
101
-  {
102
-    warning("cannot be used together.\nWe set rtopg = 0")
103
-    rtopg = 0L
104
-  }
105
-
106
-  if(rtop >0 && rtopp>0)
107
-  {
108
-    warning("cannot be used together.\nWe set rtopp = 0")
109
-    rtopp = 0L
110
-  }
111
-
112
-  if(rtopg >0 && rtopp>0)
113
-  {
114
-    warning("cannot be used together.\nWe set rtopp = 0")
115
-    rtopp = 0L
116
-  }
117
-
118
-  if(!is.null(metadata_ordering))
119
-    meta_matrix <- .ordering_meta(metadata_ordering)
120
-  else
121
-    meta_matrix <- scalaNull("Array[Array[String]]")
122
-
123
-  if(!is.null(regions_ordering))
124
-    region_matrix <- .ordering_meta(regions_ordering)
125
-  else
126
-    region_matrix <- scalaNull("Array[Array[String]]")
127
-
128
-  response <- WrappeR$order(meta_matrix,mtopg,mtop,mtopp,region_matrix,rtopg,rtop,rtopp,input_data$value)
129
-  error <- strtoi(response[1])
130
-  data <- response[2]
131
-  if(error!=0)
132
-    stop(data)
133
-  else
134
-    DAGgraph(data)
135
-}
136
-
137
-
138
-.ordering_meta <- function(ordering)
139
-{
140
-  if(is.list(ordering))
141
-  {
142
-    order_matrix <- t(sapply(ordering,function(x){
143
-      new_value <- as.character(x)
144
-      if(length(new_value)==1)
145
-        new_value = c("ASC",new_value)
146
-      else if(!identical("ASC",new_value[1]) && !identical("DESC",new_value[1]))
147
-        stop("no more than one value")
148
-      matrix <- matrix(new_value)
149
-    }))
150
-  }
151
-  else if(is.character(ordering))
152
-  {
153
-    order_matrix <- t(sapply(ordering, function(x) {
154
-      new_value = c("ASC",x)
155
-      matrix <- matrix(new_value)
156
-    }))
157
-  }
158
-  else
159
-    stop("only list or character")
160
-}
161
-
162
-
163 0
deleted file mode 100644
... ...
@@ -1,128 +0,0 @@
1
-#' GMQL Operation: PROJECT
2
-#'
3
-#' It creates, from an existing dataset, a new dataset with all the samples from input dataset
4
-#' but keeping for each sample in the input dataset only those metadata and/or region attributes
5
-#' expressed in the operator parameter list.
6
-#' Region coordinates and values of the remaining metadata remain equal to those in the input dataset.
7
-#' It allows to:
8
-#' \itemize{
9
-#' \item{Remove existing metadata and/or region attributes from a dataset}
10
-#' \item{Create new metadata and/or region attributes in the result}
11
-#' }
12
-#'
13
-#' @param input_data string pointer taken from GMQL function
14
-#' @param metadata vector of string made up by metadata attribute
15
-#' @param regions vector of string made up by schema field attribute
16
-#' @param all_but_reg logical value indicating which schema filed attribute you want to exclude.
17
-#' If FALSE only the regions you choose is kept in the output of the project operation,
18
-#' if TRUE the schema region are all except ones include in region parameter.
19
-#' if regions is not defined \emph{all_but_reg} is not considerd.
20
-#' @param all_but_meta logical value indicating which metadata you want to exclude.
21
-#' If FALSE only the metadata you choose is kept in the output of the project operation,
22
-#' if TRUE the metadata are all except ones include in region parameter.
23
-#' if metadata is not defined \emph{all_but_meta} is not considerd.
24
-#' @param regions_update single string predicate made up by operation on schema field attribute
25
-#' @param metadata_update single string predicate made up by operation on metadata attribute
26
-#'
27
-#' @return DAGgraph class object. It contains the value associated to the graph used 
28
-#' as input for the subsequent GMQL function#'
29
-#'
30
-#' @references \url{http://www.bioinformatics.deib.polimi.it/genomic_computing/GMQL/doc/GMQLUserTutorial.pdf}
31
-#'
32
-#'
33
-#' @examples
34
-#' 
35
-#' ## it creates a new dataset called CTCF_NORM_SCORE by preserving all region attributes apart from score,
36
-#' ## and creating a new region attribute called new_score by dividing the existing score value 
37
-#' ## of each region by 1000.0 and incrementing it by 100.
38
-#' ## It also generates, for each sample of the new dataset, 
39
-#' ## a new metadata attribute called normalized with value 1, which can be used in future selections.
40
-#' 
41
-#' initGMQL("gtf")
42
-#' test_path <- system.file("example","DATA_SET_VAR_GTF",package = "GMQL")
43
-#' input = readDataset(test_path)
44
-#' CTCF_NORM_SCORE = project(input,metadata_update="normalized AS 1", regions_update="new_score AS (score / 1000.0) + 100" , regions=c("score"), all_but_reg=TRUE)
45
-#' 
46
-#' 
47
-#' \dontrun{
48
-#' 
49
-#' ## it produces an output dataset that contains the same samples as the input dataset. 
50
-#' ## Each output sample only contains, as region attributes, 
51
-#' ## the four basic coordinates (chr, left, right, strand) and the specified region attributes 
52
-#' ## 'variant_classification' and 'variant_type', and as metadata attributes only the specified ones, 
53
-#' ## i.e. manually_curated__tissue_status and manually_curated__tumor_tag.
54
-#' 
55
-#' initGMQL("gtf")
56
-#' test_path <- system.file("example","DATA_SET_VAR_GTF",package = "GMQL")
57
-#' DS_in = readDataset(test_path)
58
-#' DS_out = project(DS_in,regions=c("variant_classification", "variant_type"), 
59
-#' metadata=c("manually_curated__tissue_status","manually_curated__tumor_tag"))
60
-#' 
61
-#' }
62
-#' 
63
-#' @export
64
-#'
65
-#'
66
-project <-function(input_data, metadata = NULL,metadata_update=NULL,all_but_meta = FALSE,
67
-                   regions = NULL, regions_update = NULL,all_but_reg=FALSE)
68
-{
69
-  if(!is.null(metadata))
70
-  {
71
-    if(!is.character(metadata))
72
-      stop("metadata: no valid input")
73
-
74
-    metadata <- metadata[!metadata %in% ""]
75
-    metadata <- metadata[!duplicated(metadata)]
76
-
77
-    if(length(metadata)==0)
78
-      metadata <- scalaNull("Array[String]")
79
-    
80
-    metadata <- (I(as.character(metadata)))
81
-  }
82
-  else
83
-    metadata <- scalaNull("Array[String]")
84
-
85
-  if(!is.null(regions))
86
-  {
87
-    if(!is.character(regions))
88
-      stop("regions: no valid input")
89
-
90
-    regions = regions[!regions %in% ""]
91
-    regions = regions[!duplicated(regions)]
92
-
93
-    if(length(regions)==0)
94
-      regions <- scalaNull("Array[String]")
95
-    
96
-    regions <- (I(as.character(regions)))
97
-    
98
-  }
99
-  else
100
-    regions <- scalaNull("Array[String]")
101
-
102
-  if(!is.null(regions_update))
103
-    .check_predicate(regions_update)
104
-  else
105
-    regions_update <- scalaNull("String")
106
-  
107
-  if(!is.null(metadata_update))
108
-    .check_predicate(metadata_update)
109
-  else
110
-    metadata_update <- scalaNull("String")
111
-  
112
-  if(length(all_but_meta)>1)
113
-    warning("all_but_meta: no multiple values")
114
-  
115
-  if(length(all_but_reg)>1)
116
-    warning("all_but_reg: no multiple values")
117
-  all_but_reg <- all_but_reg[1]
118
-  all_but_meta <- all_but_meta[1]
119
-  
120
-  response <- WrappeR$project(metadata,metadata_update,all_but_meta,
121
-                              regions,regions_update,all_but_reg,input_data$value)
122
-  error <- strtoi(response[1])
123
-  data <- response[2]