Browse code

delete jar

Simone authored on 01/01/2018 13:44:59
Showing28 changed files

... ...
@@ -8,21 +8,18 @@ Description: This package brings the GenoMetric Query Language (GMQL)
8 8
     functionalities into the R environment. GMQL is a high-level, declarative
9 9
     language to query and compare multiple and heterogeneous genomic datasets
10 10
     for biomedical knowledge discovery. It allows expressing easily queries and
11
-    processing over genomic regions and their metadata, in a way similar to what
12
-    can be done with the Structured Query Language (SQL) over a relational database,
13
-    to extract genomic regions of interest and compute their properties. GMQL
14
-    adopts algorithms designed for big data and their efficient implementation
15
-    using cloud-computing technologies, including Apache Hadoop framework and
16
-    Spark engine; these make GMQL able to run on modern high performance computing
11
+    processing over genomic regions and their metadata, to extract genomic regions 
12
+    of interest and compute their properties. GMQL adopts algorithms designed 
13
+    for big data and their efficient implementation using cloud-computing 
14
+    technologies, including Apache Hadoop framework and Spark engine; 
15
+    these make GMQL able to run on modern high performance computing
17 16
     infrastructures, CPU clusters and network infrastructures, in order to achieve
18
-    scalability and performance on big data. With GMQL very complex genomic
19
-    operations can be written as simple queries, with implicit iteration over
20
-    thousands of heterogeneous samples, and computed efficiently in few minutes over
21
-    servers or clouds. This RGMQL package is built over a scalable data management
22
-    engine written in Scala programming language, released as Scala API; it provides
23
-    a set of functions to create, manipulate and extract genomic data from different
24
-    data sources both from local and remote datasets. These RGMQL functions allow
25
-    performing complex queries and processing without knowing the GMQL syntax,
17
+    scalability and performance on big data. This RGMQL package is built over 
18
+    a scalable data management engine written in Scala programming language, 
19
+    released as Scala API; it provides a set of functions to create, 
20
+    manipulate and extract genomic data from different data sources both 
21
+    from local and remote datasets. These RGMQL functions allow performing 
22
+    complex queries and processing without knowing the GMQL syntax,
26 23
     but leveraging on R idiomatic paradigm and logic. RGMQL provides two different
27 24
     approaches in writing GMQL queries and processing scripts: a) REST calls b)
28 25
     standard R APIs The REST approach let users to log into a remote infrastructure
... ...
@@ -67,7 +64,7 @@ Imports:
67 64
     glue,
68 65
     BiocGenerics
69 66
 Depends:
70
-    R(>= 3.4.2)
67
+    R(<= 3.4.2), RGMQLScalaLibs
71 68
 VignetteBuilder: knitr
72 69
 Suggests: 
73 70
     BiocStyle,
... ...
@@ -45,11 +45,11 @@ import_gmql <- function(dataset_path, is_gtf)
45 45
     if(!dir.exists(datasetName))
46 46
         stop("Directory does not exists")
47 47
 
48
-    if(length(list.files(datasetName))==0)
48
+    if(!length(list.files(datasetName)))
49 49
         stop("no samples present in this dataset")
50 50
 
51 51
     regions <- list.files(datasetName, pattern = "*.gtf$",full.names = TRUE)
52
-    if(length(regions) != 0)
52
+    if(length(regions))
53 53
     {
54 54
         name_samples <- lapply(regions, function(x){
55 55
             gsub("*.gtf", "", basename(x))})
... ...
@@ -62,7 +62,7 @@ import_gmql <- function(dataset_path, is_gtf)
62 62
         stop("No GTF files present")
63 63
 
64 64
     meta <- list.files(datasetName, pattern = "*.gtf.meta$",full.names = TRUE)
65
-    if(length(meta) != 0)
65
+    if(length(meta))
66 66
     {
67 67
         meta_list <- lapply(meta, .add_metadata)
68 68
         names(meta_list) <- name_samples
... ...
@@ -83,17 +83,17 @@ import_gmql <- function(dataset_path, is_gtf)
83 83
     if(!dir.exists(datasetName))
84 84
         stop("Directory does not exists")
85 85
 
86
-    if(length(list.files(datasetName))==0)
86
+    if(!length(list.files(datasetName)))
87 87
         stop("no samples present in this dataset")
88 88
 
89 89
     regions <- list.files(datasetName, pattern = "*.gdm$",full.names = TRUE)
90
-    if(length(regions) != 0)
90
+    if(length(regions))
91 91
     {
92 92
         name_samples <- lapply(regions, function(x){
93 93
             gsub("*.gdm", "",basename(x))})
94 94
         vector_field <- .schema_header(datasetName)
95 95
 
96
-        names(vector_field)=NULL
96
+        names(vector_field) <- NULL
97 97
         sampleList <- lapply(regions,function(x){
98 98
             df <- read.delim(x,col.names = vector_field,header = FALSE)
99 99
             g <- GenomicRanges::makeGRangesFromDataFrame(df,
... ...
@@ -108,7 +108,7 @@ import_gmql <- function(dataset_path, is_gtf)
108 108
         stop("No GDM files present")
109 109
 
110 110
     meta <- list.files(datasetName, pattern = "*.gdm.meta$",full.names = TRUE)
111
-    if(length(meta) != 0)
111
+    if(length(meta))
112 112
     {
113 113
         meta_list <- lapply(meta, .add_metadata)
114 114
         names(meta_list) <- name_samples
... ...
@@ -134,7 +134,7 @@ export_gmql <- function(samples, dir_out, is_gtf)
134 134
         },files_sub_dir)
135 135
     }
136 136
     # first regions to get column names
137
-    col_names <- sapply(elementMetadata(samples[[1]]),class) 
137
+    col_names <- vapply(elementMetadata(samples[[1]]),class,character(1)) 
138 138
     # write schema XML
139 139
     .write_schema(col_names,files_sub_dir,to_GTF)
140 140
     c = .counter(0)
... ...
@@ -145,9 +145,9 @@ export_gmql <- function(samples, dir_out, is_gtf)
145 145
 .write_metadata <- function(meta_list,sample_name)
146 146
 {
147 147
     #create my own list if metadata empty
148
-    if(length(meta_list)==0){
148
+    if(!length(meta_list))
149 149
         meta_list <- list(Provider = "Polimi", Application = "R-GMQL")
150
-    }
150
+    
151 151
     names_list <- names(meta_list)
152 152
     value_list <- unlist(meta_list)
153 153
     file_meta_name = paste0(sample_name,".meta")
... ...
@@ -72,6 +72,8 @@ take_value.META_AGGREGATES <- function(obj){
72 72
 #' function sum, performing all the type conversions needed  }
73 73
 #' \item{COUNT: It prepares input parameter to be passed to the library 
74 74
 #' function count, performing all the type conversions needed }
75
+#' \item{COUNTSAMP: It prepares input parameter to be passed to the library 
76
+#' function third quartile, performing all the type conversions needed }
75 77
 #' \item{MIN: It prepares input parameter to be passed to the library 
76 78
 #' function minimum, performing all the type conversions needed  }
77 79
 #' \item{MAX: It prepares input parameter to be passed to the library 
... ...
@@ -197,6 +199,25 @@ as.character.COUNT <- function(obj) {
197 199
 check.COUNT <- function(obj){}
198 200
 
199 201
 
202
+#' @name AGGREGATES-Object
203
+#' @aliases COUNTSAMP
204
+#' @rdname aggr-class
205
+#' @export
206
+#'
207
+COUNTSAMP <- function()
208
+{
209
+    list <- list()
210
+    ## Set the name for the class
211
+    class(list) <- c("COUNTSAMP","AGGREGATES","META_AGGREGATES")
212
+    return(list)
213
+}
214
+as.character.COUNTSAMP <- function(obj) {
215
+    class <- class(obj)[1]
216
+    c(class,"")
217
+}
218
+check.COUNTSAMP <- function(obj){}
219
+
220
+
200 221
 #' @name AGGREGATES-Object
201 222
 #' @aliases MIN
202 223
 #' @rdname aggr-class
... ...
@@ -12,7 +12,7 @@
12 12
 {
13 13
     x <- scan(files, what="", sep="\n")
14 14
     y <- strsplit(x, "\t")
15
-    names(y) <- sapply(y, `[[`, 1)
15
+    names(y) <- vapply(y, `[[`,character(1), 1)
16 16
     listMeta <- lapply(y, `[`, -1)
17 17
 }
18 18
 
... ...
@@ -23,7 +23,7 @@
23 23
     schema_name_xml <- list.files(datasetName, pattern = "*.xml$",
24 24
                                 full.names = TRUE)
25 25
     
26
-    if(length(schema_name)==0 && length(schema_name_xml) == 0)
26
+    if(!length(schema_name) && !length(schema_name_xml))
27 27
         stop("schema not present")
28 28
     
29 29
     xml_schema <- xml2::read_xml(schema_name)
... ...
@@ -37,24 +37,24 @@
37 37
     if(!is.list(meta_data))
38 38
         stop("meta_data: invalid input")
39 39
     
40
-    if(!all(sapply(meta_data, function(x) is(x,class))))
40
+    if(!all(vapply(meta_data, function(x) is(x,class), logical(1))))
41 41
         stop("All elements must be META_AGGREGATES object")
42 42
     
43 43
     names <- names(meta_data)
44 44
     if(is.null(names))
45 45
     {
46 46
         warning("You did not assign a names to a list.\nWe build it for you")
47
-        names <- sapply(meta_data, take_value.META_AGGREGATES)
47
+        names <- vapply(meta_data, take_value.META_AGGREGATES,character(1))
48 48
     }
49 49
     else
50 50
     {
51 51
         if("" %in% names)
52 52
             stop("No partial names assignment is allowed")
53 53
     }
54
-    aggregate_matrix <- t(sapply(meta_data, function(x) {
54
+    aggregate_matrix <- t(vapply(meta_data, function(x) {
55 55
         new_value = as.character(x)
56 56
         matrix <- matrix(new_value)
57
-    }))
57
+    },character(2)))
58 58
     
59 59
     m_names <- matrix(names)
60 60
     metadata_matrix <- cbind(m_names,aggregate_matrix)
... ...
@@ -28,7 +28,7 @@
28 28
 #' 
29 29
 #' @examples
30 30
 #' 
31
-#' "where is my example?"
31
+#' "Where is my example?"
32 32
 #' 
33 33
 #' @name Evaluation-Function
34 34
 #' @aliases condition_evaluation
... ...
@@ -51,10 +51,10 @@ conds <- function(default = c(""), full = c(""), exact = c(""))
51 51
         join_condition_matrix <- NULL
52 52
     else
53 53
     {
54
-        join_condition_matrix <- t(sapply(array, function(x) {
54
+        join_condition_matrix <- t(vapply(array, function(x) {
55 55
             new_value = c(cond, x)
56 56
             matrix <- matrix(new_value)
57
-        }))
57
+        },character(2)))
58 58
     }
59 59
     join_condition_matrix
60 60
 }
... ...
@@ -86,25 +86,25 @@ filter_and_extract <- function(data, metadata = NULL,
86 86
     gtf_meta_files <- list.files(datasetName, pattern = "*.gtf.meta$",
87 87
                                     full.names = TRUE)
88 88
     
89
-    if(length(gdm_meta_files)==0 && length(gtf_meta_files)==0)
89
+    if(!length(gdm_meta_files) && !length(gtf_meta_files))
90 90
         stop("no samples present or no files format supported")
91 91
     
92
-    if(length(gdm_meta_files)>=1 && length(gtf_meta_files)>=1)
92
+    if(length(gdm_meta_files) && length(gtf_meta_files))
93 93
         stop("GMQL dataset cannot be mixed dataset: no GTF and GDM together")
94 94
     
95 95
     vector_field <- .schema_header(datasetName)
96 96
     
97
-    if(length(gdm_meta_files)>0)
97
+    if(length(gdm_meta_files))
98 98
     {
99 99
         samples_with_suffix <- .check_metadata_files(metadata,metadata_prefix,
100 100
                                                 gdm_meta_files, suffix)
101 101
         
102
-        samples_file <- sapply(samples_with_suffix, function(x) x$sample)
103
-        suffix_vec <- sapply(samples_with_suffix, function(x) x$suffix)
102
+        samples_file <- lapply(samples_with_suffix, function(x) x$sample)
103
+        suffix_vec <- lapply(samples_with_suffix, function(x) x$suffix)
104 104
         suffixes <- unlist(suffix_vec)
105 105
         samples_to_read <- unlist(samples_file)
106 106
         
107
-        if(length(samples_to_read)>0)
107
+        if(length(samples_to_read))
108 108
             samples_to_read <- gsub(".meta$", "", samples_to_read)
109 109
         else
110 110
             samples_to_read <- gsub(".meta$", "", gdm_meta_files)
... ...
@@ -117,12 +117,12 @@ filter_and_extract <- function(data, metadata = NULL,
117 117
         samples_with_suffix <- .check_metadata_files(metadata,metadata_prefix,
118 118
                                                     gtf_meta_files, suffix)
119 119
         
120
-        samples_file <- sapply(samples_with_suffix, function(x) x$sample)
121
-        suffix_vec <- sapply(samples_with_suffix, function(x) x$suffix)
120
+        samples_file <- lapply(samples_with_suffix, function(x) x$sample)
121
+        suffix_vec <- lapply(samples_with_suffix, function(x) x$suffix)
122 122
         suffixes <- unlist(suffix_vec)
123 123
         samples_to_read <- unlist(samples_file)
124 124
         
125
-        if(length(samples_to_read)>0)
125
+        if(length(samples_to_read))
126 126
             samples_to_read <- gsub(".meta$", "", samples_to_read)
127 127
         else
128 128
             samples_to_read <- gsub(".meta$", "", gtf_meta_files)
... ...
@@ -137,13 +137,13 @@ filter_and_extract <- function(data, metadata = NULL,
137 137
     if(!is(rangesList,"GRangesList"))
138 138
         stop("only GrangesList admitted")
139 139
     
140
-    if(length(rangesList)<=0)
140
+    if(!length(rangesList))
141 141
         stop("rangesList empty")
142 142
     
143 143
     meta_list <- metadata(rangesList)
144 144
     samples <- .check_metadata_list(metadata, metadata_prefix, meta_list, 
145 145
                                         suffix)
146
-    if(length(unlist(samples))<=0)
146
+    if(!length(unlist(samples)))
147 147
         samples <- rangesList
148 148
     else
149 149
     {
... ...
@@ -174,8 +174,8 @@ filter_and_extract <- function(data, metadata = NULL,
174 174
     vec_meta <- paste0(metadata_prefix,metadata)
175 175
     list <- mapply(function(x,index){
176 176
         vec_names <- names(x)
177
-        a <- sapply(vec_meta, function(y) {
178
-            which(y==vec_names)
177
+        a <- lapply(vec_meta, function(y) {
178
+            which(y == vec_names)
179 179
         })
180 180
         ## we would like that manage more index from grep
181 181
         found <- as.logical(length(unlist(a)))
... ...
@@ -191,9 +191,7 @@ filter_and_extract <- function(data, metadata = NULL,
191 191
     meta_list <- lapply(meta_files, function(x){
192 192
         list <- .add_metadata(x)
193 193
         vec_names <- names(list)
194
-        a <- sapply(vec_meta, function(y) {
195
-            grep(y,vec_names)
196
-        })
194
+        a <- lapply(vec_meta, function(y)grep(y,vec_names))
197 195
         ## we would like that manage more index from grep
198 196
         found <- as.logical(length(unlist(a)))
199 197
         index <- grep(suffix,vec_names)
... ...
@@ -249,7 +247,7 @@ filter_and_extract <- function(data, metadata = NULL,
249 247
             if(!is.null(regions))
250 248
                 col_names <- col_names[col_names %in% regions]
251 249
             
252
-            if(length(col_names)!=0)
250
+            if(length(col_names))
253 251
                 r <- subset(region_frame,TRUE,col_names)
254 252
         },regions,vector_field)
255 253
 
... ...
@@ -48,8 +48,8 @@
48 48
 #' \item{an expression built using PARAMETER object: (ALL() + N) / K or
49 49
 #' ALL() / K, with N and K integer values  }
50 50
 #' }
51
-#' @param groupBy \code{\link{condition_evaluation}} function to support 
52
-#' methods with groupBy or JoinBy input paramter
51
+#' @param groupBy \code{\link{conds}} function to support methods with 
52
+#' groupBy or JoinBy input parameter
53 53
 #' 
54 54
 #' @param ... a series of expressions separated by comma in the form 
55 55
 #' \emph{key} = \emph{aggregate}. The \emph{aggregate} is an object of 
... ...
@@ -115,8 +115,7 @@
115 115
 #' ## regions the minimum pvalue of the overlapping regions (min_pvalue) 
116 116
 #' ## and their Jaccard indexes (JaccardIntersect and JaccardResult).
117 117
 #' 
118
-#' res = cover(exp, 2, 3, groupBy = condition_evaluation(c("cell")), 
119
-#' min_pValue = MIN("pvalue"))
118
+#' res = cover(exp, 2, 3, groupBy = conds("cell"), min_pValue = MIN("pvalue"))
120 119
 #' 
121 120
 #' @name cover
122 121
 #' @rdname cover
... ...
@@ -141,9 +140,7 @@ setMethod("cover", "GMQLDataset",
141 140
                 gmql_cover(val, q_min, q_max, groupBy, aggregates, flag)
142 141
             })
143 142
 
144
-
145
-
146
-gmql_cover <- function(input_data, min_acc, max_acc, groupBy, aggregates, flag)
143
+gmql_cover <- function(input_data, min_acc, max_acc, groupBy,aggregates,flag)
147 144
 {
148 145
     if(!is.null(groupBy))
149 146
     {
... ...
@@ -156,7 +153,7 @@ gmql_cover <- function(input_data, min_acc, max_acc, groupBy, aggregates, flag)
156 153
                 join_matrix <- .jarray(cond, dispatch = TRUE)
157 154
         }
158 155
         else
159
-            stop("use function condition_evaluation()")
156
+            stop("use function conds()")
160 157
     }
161 158
     else
162 159
         join_matrix <- .jnull("java/lang/String")
... ...
@@ -184,7 +181,7 @@ gmql_cover <- function(input_data, min_acc, max_acc, groupBy, aggregates, flag)
184 181
     
185 182
     error <- strtoi(response[1])
186 183
     val <- response[2]
187
-    if(error!=0)
184
+    if(error)
188 185
         stop(val)
189 186
     else
190 187
         GMQLDataset(val)
... ...
@@ -195,14 +192,7 @@ gmql_cover <- function(input_data, min_acc, max_acc, groupBy, aggregates, flag)
195 192
     if(length(param) > 1)
196 193
         stop("length > 1")
197 194
 
198
-    if(is.numeric(param))
199
-    {
200
-        if(param <= 0)
201
-            stop("No negative value")
202
-        else
203
-            return(as.character(param))
204
-    }
205
-    else if(is.character(param))
195
+    if(is.character(param))
206 196
     {
207 197
         if(is_min && identical(param,"ANY"))
208 198
             stop("min cannot assume ANY as value")
... ...
@@ -214,7 +204,7 @@ gmql_cover <- function(input_data, min_acc, max_acc, groupBy, aggregates, flag)
214 204
     
215 205
 }
216 206
 
217
-.trasform_cover <- function(predicate=NULL)
207
+.trasform_cover <- function(predicate)
218 208
 {
219 209
     predicate <- gsub("\\(\\)","",predicate)
220 210
 }
... ...
@@ -17,8 +17,8 @@
17 17
 #' 
18 18
 #' @param x GMQLDataset class object
19 19
 #' @param y GMQLDataset class object
20
-#' @param joinBy \code{\link{condition_evaluation}} function to support 
21
-#' methods with groupBy or JoinBy input paramter
20
+#' @param joinBy \code{\link{conds}} function to support methods with 
21
+#' groupBy or JoinBy input paramter
22 22
 #' 
23 23
 #' @param is_exact single logical value: TRUE means that the region difference 
24 24
 #' is executed only on regions in left_input_data with exactly the same 
... ...
@@ -30,7 +30,6 @@
30 30
 #' @return GMQLDataset object. It contains the value to use as input 
31 31
 #' for the subsequent GMQLDataset method
32 32
 #' 
33
-#'
34 33
 #' @examples
35 34
 #' ## This statement initializes and runs the GMQL server for local execution 
36 35
 #' ## and creation of results on disk. Then, with system.file() it defines 
... ...
@@ -55,7 +54,7 @@
55 54
 #' ## do not overlap any region in s2; 
56 55
 #' ## metadata of the result are the same as the metadata of s1.
57 56
 #' 
58
-#' out_t = setdiff(data1, data2, condition_evaluation(c("cell")))
57
+#' out_t = setdiff(data1, data2, conds("cell"))
59 58
 #'
60 59
 #' @name setdiff
61 60
 #' @aliases setdiff,GMQLDataset,GMQLDataset-method
... ...
@@ -87,7 +86,7 @@ gmql_difference <- function(left_data, right_data, is_exact, joinBy)
87 86
                                         is_exact)
88 87
     error <- strtoi(response[1])
89 88
     val <- response[2]
90
-    if(error!=0)
89
+    if(error)
91 90
         stop(val)
92 91
     else
93 92
         GMQLDataset(val)
... ...
@@ -79,7 +79,7 @@ gmql_extend <-function(input_data, meta)
79 79
     response <- WrappeR$extend(metadata_matrix, input_data)
80 80
     error <- strtoi(response[1])
81 81
     val <- response[2]
82
-    if(error!=0)
82
+    if(error)
83 83
         stop(val)
84 84
     else
85 85
         GMQLDataset(val)
... ...
@@ -1,7 +1,7 @@
1 1
 group_by.GMQLDateset <- function(.data, groupBy_meta = NULL, 
2 2
     groupBy_regions = NULL, region_aggregates = NULL, meta_aggregates = NULL)
3 3
 {
4
-    ptr_data = .data@value
4
+    ptr_data = value(.data)
5 5
     gmql_group(ptr_data, groupBy_meta, groupBy_regions, region_aggregates, 
6 6
                 meta_aggregates)
7 7
 }
... ...
@@ -14,16 +14,9 @@ group_by.GMQLDateset <- function(.data, groupBy_meta = NULL,
14 14
 #' @importFrom dplyr group_by
15 15
 #' 
16 16
 #' @param .data GMQLDataset object
17
-#' @param groupBy_meta it define condition evaluation on metadata.
18
-#' \itemize{
19
-#' \item{\code{\link{FN}}: Fullname evaluation, two attributes match 
20
-#' if they both end with value and, if they have a further prefixes,
21
-#' the two prefix sequence are identical}
22
-#' \item{\code{\link{EX}}: Exact evaluation, only attributes exactly 
23
-#' as value will match; no further prefixes are allowed. }
24
-#' \item{\code{\link{DF}}: Default evaluation, the two attributes match 
25
-#' if both end with value.}
26
-#' }
17
+#' @param groupBy_meta \code{\link{conds}} function to support methods with 
18
+#' groupBy or JoinBy input parameter
19
+#' 
27 20
 #' @param groupBy_regions vector of string made up by schema field attribute
28 21
 #' @param region_aggregates It accept a series of aggregate function on 
29 22
 #' region attribute. 
... ...
@@ -89,7 +82,7 @@ gmql_group <- function(input_data, group_meta, group_reg, region_aggregates,
89 82
                 join_matrix <- .jarray(cond, dispatch = TRUE)
90 83
         }
91 84
         else
92
-            stop("use function condition_evaluation()")
85
+            stop("use function conds()")
93 86
     }
94 87
     else
95 88
         join_matrix <- .jnull("java/lang/String")
... ...
@@ -132,7 +125,7 @@ gmql_group <- function(input_data, group_meta, group_reg, region_aggregates,
132 125
                                 region_matrix, input_data)
133 126
     error <- strtoi(response[1])
134 127
     val <- response[2]
135
-    if(error!=0)
128
+    if(error)
136 129
         stop(val)
137 130
     else
138 131
         GMQLDataset(val)
... ...
@@ -43,12 +43,12 @@
43 43
 #' having left (right) coordinates equal to the minimum (maximum) of the 
44 44
 #' corresponding coordinate values in the 'x' and 'y' regions satisfying 
45 45
 #' the genometric predicate)}
46
-#' \item{LEFT_DISTINCT: It outputs the duplicate elimination of "x" output 
46
+#' \item{LEFT_DIST: It outputs the duplicate elimination of "x" output 
47 47
 #' regions with the same values, regardless the "y" paired region and its 
48 48
 #' values. In this case, the output regions attributes and their values are 
49 49
 #' all those of "x", and the output metadata are equal to the "x" metadata, 
50 50
 #' without additional prefixes}
51
-#' \item{RIGHT_DISTINCT: It outputs the duplicate elimination of "y" output 
51
+#' \item{RIGHT_DIST: It outputs the duplicate elimination of "y" output 
52 52
 #' regions with the same values, regardless the "x" paired region and its 
53 53
 #' values. In this case, the output regions attributes and their values are 
54 54
 #' all those of "y", and the output metadata are equal to the "y" metadata, 
... ...
@@ -109,19 +109,20 @@ gmql_join <- function(left_data, right_data, genometric_predicate, joinBy,
109 109
 {
110 110
     if(!is.null(genometric_predicate))
111 111
     {
112
-        if(length(genometric_predicate) >4)
112
+        if(length(genometric_predicate) > 4)
113 113
             stop("genometric_predicate: only 4 DISTAL condition")
114 114
         
115 115
         if(!is.list(genometric_predicate))
116 116
             stop("genometric_predicate must be a list")
117 117
     
118
-        if(!all(sapply(genometric_predicate, function(x) {is(x,"DISTAL")} )))
118
+        if(!all(vapply(genometric_predicate, function(x) {is(x,"DISTAL")},
119
+                        logical(1))))
119 120
             stop("All elements should be DISTAL object")
120 121
         
121
-        genomatrix <- t(sapply(genometric_predicate, function(x) {
122
+        genomatrix <- t(vapply(genometric_predicate, function(x) {
122 123
                 new_value = as.character(x)
123 124
                 array <- c(new_value)
124
-        }))
125
+        },character(2)))
125 126
         
126 127
         genomatrix <- .jarray(genomatrix, dispatch = TRUE)
127 128
     }
... ...
@@ -166,7 +167,7 @@ gmql_join <- function(left_data, right_data, genometric_predicate, joinBy,
166 167
                                 left_data, right_data)
167 168
     error <- strtoi(response[1])
168 169
     val <- response[2]
169
-    if(error!=0)
170
+    if(error)
170 171
         stop(val)
171 172
     else
172 173
         GMQLDataset(val)
... ...
@@ -39,16 +39,8 @@
39 39
 #' }
40 40
 #' "mixed style" is not allowed
41 41
 #'
42
-#' @param joinBy list of evalation functions to define evaluation on metadata:
43
-#' \itemize{
44
-#' \item{ \code{\link{FN}}(value): Fullname evaluation, two attributes match 
45
-#' if they both end with \emph{value} and, if they have further prefixes,
46
-#' the two prefix sequence are identical.}
47
-#' \item{ \code{\link{EX}}(value): Exact evaluation, only attributes exactly 
48
-#' as \emph{value} match; no further prefixes are allowed.}
49
-#' \item{ \code{\link{DF}}(value): Default evaluation, the two attributes match 
50
-#' if both end with \emph{value}.}
51
-#' }
42
+#' @param joinBy \code{\link{conds}} function to support methods with 
43
+#' groupBy or JoinBy input parameter
52 44
 #' @param count_name string defining the metadata count name; if it is 
53 45
 #' not specifying the name is "count_left_right" 
54 46
 #' 
... ...
@@ -78,15 +70,14 @@
78 70
 #' # but with a different value from the one(s) of ref sample(s), 
79 71
 #' # are disregarded.
80 72
 #' 
81
-#' out = map(ref, exp, minScore = MIN("score"), 
82
-#' joinBy = list(DF("cell_tissue")))
73
+#' out = map(ref, exp, minScore = MIN("score"), joinBy = conds("cell_tissue"))
83 74
 #' 
84 75
 #' @name map
85 76
 #' @rdname map
86 77
 #' @aliases map-method
87 78
 #' @export
88 79
 setMethod("map", "GMQLDataset",
89
-            function(x, y, ..., joinBy = NULL, count_name = NULL)
80
+            function(x, y, ..., joinBy = NULL, count_name = "")
90 81
             {
91 82
                 left_data <- value(x)
92 83
                 right_data <- value(y)
... ...
@@ -120,6 +111,9 @@ gmql_map <- function(left_data, right_data, aggregates, joinBy, count_name)
120 111
     {
121 112
         if(!is.character(count_name))
122 113
             stop("count_name: must be string")
114
+        
115
+        if(identical(count_name,""))
116
+            count_name <- .jnull("java/lang/String")
123 117
     }
124 118
     else
125 119
         count_name <- .jnull("java/lang/String")
... ...
@@ -129,7 +123,7 @@ gmql_map <- function(left_data, right_data, aggregates, joinBy, count_name)
129 123
                             right_data)
130 124
     error <- strtoi(response[1])
131 125
     val <- response[2]
132
-    if(error!=0)
126
+    if(error)
133 127
         stop(val)
134 128
     else
135 129
         GMQLDataset(val)
... ...
@@ -40,7 +40,7 @@ execute <- function()
40 40
     response <- WrappeR$execute()
41 41
     error <- strtoi(response[1])
42 42
     val <- response[2]
43
-    if(error!=0)
43
+    if(error)
44 44
         stop(val)
45 45
     else
46 46
     {
... ...
@@ -62,13 +62,13 @@ execute <- function()
62 62
     remote <- WrappeR$is_remote_processing()
63 63
     if(remote)
64 64
     {
65
-        sapply(data_list,function(x){
65
+        lapply(data_list,function(x){
66 66
             if(!is.null(x[[1]]) && !is.na(x[[1]]))
67 67
                 upload_dataset(url,x[[2]],x[[1]],x[[3]],FALSE)})
68 68
     }
69 69
     else
70 70
     {
71
-        sapply(data_list,function(x){
71
+        lapply(data_list,function(x){
72 72
             if(!is.null(x[[2]]) && !is.na(x[[2]]))
73 73
                 download_dataset(url,x[[2]],x[[1]])})
74 74
     }
... ...
@@ -147,7 +147,7 @@ gmql_materialize <- function(input_data, dir_out, name)
147 147
     response <- WrappeR$materialize(input_data, res_dir_out)
148 148
     error <- strtoi(response[1])
149 149
     val <- response[2]
150
-    if(error!=0)
150
+    if(error)
151 151
         stop(val)
152 152
     else
153 153
         invisible(NULL)
... ...
@@ -204,7 +204,7 @@ setMethod("take", "GMQLDataset",
204 204
                 gmql_take(ptr_data, rows)
205 205
             })
206 206
 
207
-gmql_take <- function(input_data, rows = 0L)
207
+gmql_take <- function(input_data, rows)
208 208
 {
209 209
     rows <- as.integer(rows[1])
210 210
     if(rows<0)
... ...
@@ -214,7 +214,7 @@ gmql_take <- function(input_data, rows = 0L)
214 214
     response <- WrappeR$take(input_data, rows)
215 215
     error <- strtoi(response[1])
216 216
     data <- response[2]
217
-    if(error!=0)
217
+    if(error)
218 218
         stop(data)
219 219
     
220 220
     reg <- .jevalArray(WrappeR$get_reg(),simplify = TRUE)
... ...
@@ -40,7 +40,7 @@
40 40
 #' ## antibody_target and cell metadata
41 41
 #' ## attributes.
42 42
 #'
43
-#' merged = aggregate(exp, condition_evaluation(c("antibody_target","cell")))
43
+#' merged = aggregate(exp, conds(c("antibody_target","cell")))
44 44
 #'
45 45
 #' @name aggregate
46 46
 #' @rdname aggregate
... ...
@@ -72,7 +72,7 @@ gmql_merge <- function(input_data, groupBy)
72 72
     response <- WrappeR$merge(join_matrix, input_data)
73 73
     error <- strtoi(response[1])
74 74
     val <- response[2]
75
-    if(error!=0)
75
+    if(error)
76 76
         stop(val)
77 77
     else
78 78
         GMQLDataset(val)
... ...
@@ -130,13 +130,12 @@ gmql_order <- function(input_data, metadata_ordering, regions_ordering,
130 130
                         region_matrix, input_data)
131 131
     error <- strtoi(response[1])
132 132
     val <- response[2]
133
-    if(error!=0)
133
+    if(error)
134 134
         stop(val)
135 135
     else
136 136
         GMQLDataset(val)
137 137
 }
138 138
 
139
-
140 139
 .ordering_meta <- function(ordering)
141 140
 {
142 141
     order_matrix <- do.call(rbind, ordering)
... ...
@@ -2,7 +2,7 @@ select.GMQLDataset <- function(.data, metadata = NULL, metadata_update = NULL,
2 2
                             all_but_meta = FALSE, regions = NULL, 
3 3
                             regions_update = NULL, all_but_reg = FALSE) 
4 4
 {
5
-    data = .data@value
5
+    data = value(.data)
6 6
     r_update <- substitute(regions_update)
7 7
     if(!is.null(r_update))
8 8
     {
... ...
@@ -132,7 +132,7 @@ gmql_project <-function(input_data, metadata, metadata_update, all_but_meta,
132 132
         metadata <- metadata[!metadata %in% ""]
133 133
         metadata <- metadata[!duplicated(metadata)]
134 134
         
135
-        if(length(metadata)==0)
135
+        if(!length(metadata))
136 136
             metadata <- .jnull("java/lang/String")
137 137
         
138 138
         metadata <- .jarray(metadata)
... ...
@@ -148,7 +148,7 @@ gmql_project <-function(input_data, metadata, metadata_update, all_but_meta,
148 148
         regions = regions[!regions %in% ""]
149 149
         regions = regions[!duplicated(regions)]
150 150
         
151
-        if(length(regions)==0)
151
+        if(!length(regions))
152 152
             regions <- .jnull("java/lang/String")
153 153
         
154 154
         regions <- .jarray(regions)
... ...
@@ -171,13 +171,13 @@ gmql_project <-function(input_data, metadata, metadata_update, all_but_meta,
171 171
                     regions, regions_update, all_but_reg, input_data)
172 172
     error <- strtoi(response[1])
173 173
     val <- response[2]
174
-    if(error!=0)
174
+    if(error)
175 175
         stop(val)
176 176
     else
177 177
         GMQLDataset(val)
178 178
 }
179 179
 
180
-.trasform_update <- function(predicate=NULL)
180
+.trasform_update <- function(predicate)
181 181
 {
182 182
     predicate <- gsub("list\\(","",predicate)
183 183
     predicate <- gsub("\\)$","",predicate)
... ...
@@ -83,7 +83,7 @@ read_dataset <- function(dataset, parser = "CustomParser", is_local=TRUE,
83 83
         
84 84
         schema_XML <- list.files(dataset, pattern = "*.schema$",
85 85
                                     full.names = TRUE)
86
-        if(length(schema_XML) == 0)
86
+        if(!length(schema_XML))
87 87
             stop("schema must be present")
88 88
         
89 89
         schema_matrix <- .jnull("java/lang/String")
... ...
@@ -99,12 +99,12 @@ read_dataset <- function(dataset, parser = "CustomParser", is_local=TRUE,
99 99
             stop("You have to log on using login function")
100 100
         
101 101
         list <- show_schema(url,dataset)
102
-        schema_names <- sapply(list$fields, function(x){x$name})
103
-        schema_type <- sapply(list$fields, function(x){x$type})
102
+        schema_names <- vapply(list$fields, function(x){x$name},character(1))
103
+        schema_type <- vapply(list$fields, function(x){x$type},character(1))
104 104
         schema_matrix <- cbind(schema_type,schema_names)
105 105
         #schema_type <- list$type
106 106
         
107
-        if(is.null(schema_matrix) || length(schema_matrix)==0)
107
+        if(is.null(schema_matrix) || !length(schema_matrix))
108 108
             schema_matrix <- .jnull("java/lang/String")
109 109
         else
110 110
             schema_matrix <- .jarray(schema_matrix, dispatch = TRUE)
... ...
@@ -116,7 +116,7 @@ read_dataset <- function(dataset, parser = "CustomParser", is_local=TRUE,
116 116
                                         schema_matrix, schema_XML)
117 117
     error <- strtoi(response[1])
118 118
     data <- response[2]
119
-    if(error!=0)
119
+    if(error)
120 120
         stop(data)
121 121
     else
122 122
         GMQLDataset(data)
... ...
@@ -138,11 +138,12 @@ read <- function(samples)
138 138
         stop("only GrangesList")
139 139
     
140 140
     meta <- S4Vectors::metadata(samples)
141
-    if(is.null(meta) || length(meta)==0) 
141
+    if(is.null(meta) || !length(meta)) 
142 142
     {
143 143
         #repeat meta for each sample in samples list
144 144
         len <- length(samples)
145
-        warning("No metadata.\nWe provide two metadata for you")
145
+        warning("No metadata.\nWe provide two metadata for you:
146
+                \n1.provider = PoliMi\n2.application = RGMQL\n")
146 147
         index_meta <- rep(seq_len(len),each = len)
147 148
         rep_meta <- rep(c("provider","PoliMi", "application", "RGMQL"),
148 149
                             times = len)
... ...
@@ -160,10 +161,11 @@ read <- function(samples)
160 161
     
161 162
     df <- data.frame(samples)
162 163
     df <- df[-2] #delete group_name
163
-    region_matrix <- as.matrix(sapply(df, as.character))
164
+    len_df <- dim(df)[1] # number of rows
165
+    region_matrix <- as.matrix(vapply(df, as.character,character(len_df)))
164 166
     region_matrix[is.na(region_matrix)] <- "NA"
165 167
     region_matrix <- region_matrix[,setdiff(colnames(region_matrix),"width")]
166
-    col_types <- sapply(df,class)
168
+    col_types <- vapply(df,class,character(1))
167 169
     col_names <- names(col_types)
168 170
     #re order the schema?
169 171
     if("phase" %in% col_names) # if GTF, change
... ...
@@ -98,7 +98,7 @@ filter.GMQLDateset <- function(.data, m_predicate = NULL, r_predicate = NULL,
98 98
 #' ## less than 0.01 are conserved in output
99 99
 #' 
100 100
 #' jun_tf <- filter(data, antibody_target == "JUN", pValue < 0.01, 
101
-#' semijoin(join_data, TRUE, list(DF("cell"))))
101
+#' semijoin(join_data, TRUE, conds("cell")))
102 102
 #' 
103 103
 #' 
104 104
 #' @name filter
... ...
@@ -125,7 +125,7 @@ gmql_select <- function(input_data, predicate, region_predicate, s_join)
125 125
                                     input_data)
126 126
     error <- strtoi(response[1])
127 127
     data <- response[2]
128
-    if(error!=0)
128
+    if(error)
129 129
         stop(data)
130 130
     else
131 131
         GMQLDataset(data)
... ...
@@ -137,6 +137,7 @@ gmql_select <- function(input_data, predicate, region_predicate, s_join)
137 137
 #' This function is used as support to the filter method to define 
138 138
 #' semijoin conditions on metadata  
139 139
 #' 
140
+#' 
140 141
 #' @param .data GMQLDataset class object
141 142
 #' 
142 143
 #' @param not_in logical value: TRUE => for a given sample of input dataset
... ...
@@ -184,39 +185,44 @@ gmql_select <- function(input_data, predicate, region_predicate, s_join)
184 185
 #' # less than 0.01 are conserved in output
185 186
 #' 
186 187
 #' jun_tf <- filter(data, antibody_target == "JUN", pValue < 0.01, 
187
-#' semijoin(join_data, TRUE, list(DF("cell"))))
188
+#' semijoin(join_data, TRUE, conds("cell")))
188 189
 #' 
189 190
 #' @return semijoin condition as list
190 191
 #' @export
191 192
 #' 
192
-semijoin <- function(.data, not_in = FALSE, groupBy = NULL)
193
+semijoin <- function(.data, not_in = FALSE, groupBy)
193 194
 {
194
-    if(!is.list(groupBy))
195
-        stop("groupBy: must be a list")
196
-    
197
-    semij_cond = groupBy
195
+    if(!is.null(groupBy))
196
+    {
197
+        if("condition" %in% names(groupBy))
198
+        {
199
+            cond <- .join_condition(groupBy)
200
+            if(is.null(cond))
201
+                stop("groupBy cannot be NULL")
202
+        }
203
+        else
204
+            stop("use function conds()")
205
+    }
206
+    else
207
+        stop("groupBy cannot be NULL")
198 208
     
199
-    if(is.null(data))
200
-        stop("data cannot be NULL")
209
+    if(is.null(.data))
210
+        stop(".data cannot be NULL")
201 211
     
202
-    if(!isClass("GMQLDataset", data))
212
+    if(!isClass("GMQLDataset", .data))
203 213
         stop("data: Must be a GMQLDataset object") 
204 214
     
205 215
     .check_logical(not_in)
206 216
     ptr_data <- value(.data)
207
-    
208 217
     data_cond <- cbind(ptr_data,not_in)
209
-    cond <- .join_condition(semij_cond)
210
-    cond <- rbind(data_cond,cond)
211
-    join_condition_matrix <- .jarray(cond, dispatch = TRUE)
218
+    all_conds <- rbind(data_cond,cond)
219
+    join_condition_matrix <- .jarray(all_conds, dispatch = TRUE)
212 220
     
213 221
     semijoin <- list("semijoin" = join_condition_matrix)
214 222
 }
215 223
 
216 224
 
217
-
218
-
219
-.trasform <- function(predicate=NULL)
225
+.trasform <- function(predicate)
220 226
 {
221 227
     predicate <- gsub("&|&&","AND",predicate)
222 228
     predicate <- gsub("\\||\\|\\|","OR",predicate)
... ...
@@ -64,7 +64,7 @@ gmql_union <- function(left_data, right_data)
64 64
     response <- WrappeR$union(left_data, right_data)
65 65
     error <- strtoi(response[1])
66 66
     val <- response[2]
67
-    if(error!=0)
67
+    if(error)
68 68
         stop(val)
69 69
     else
70 70
         GMQLDataset(val)
... ...
@@ -4,13 +4,41 @@
4 4
 .onLoad <- function(libname, pkgname) {
5 5
     .jpackage(pkgname, lib.loc = libname)
6 6
     # tools::vignetteEngine("knitr", pattern = "[.]Rmd$", package = "knitr")
7
-    .jinit(force.init = TRUE)
7
+    initGMQLscalaAPI()
8 8
 }
9 9
 
10 10
 .onAttach <- function(libname, pkgname) {
11
-    #packageStartupMessage("GMQL successfully loaded")
11
+    packageStartupMessage("GMQL successfully loaded")
12 12
 }
13 13
 
14 14
 .onUnload <- function(libpath) {
15 15
     #.rscalaPackageUnload()
16 16
 }
17
+
18
+
19
+#' @importFrom utils download.file
20
+#' @importFrom rJava .jinit .jaddClassPath
21
+#' 
22
+initGMQLscalaAPI <- function(libLoc, mem = "12G") {
23
+    # Check if library directory is missing
24
+    
25
+    # Starting the java engine
26
+    .jinit(force.init = TRUE)
27
+    if (missing(libLoc)) {
28
+        libLoc = system.file("extdata", "java", package = "RGMQLScalaLibs")
29
+    }
30
+    
31
+    path = Sys.glob(paste0(libLoc, "/*.jar"))
32
+    available_local_files <- list.files(libLoc, full.names = FALSE, 
33
+                                        pattern = "\\.jar$")
34
+    
35
+    # Check if all the files are there
36
+    if (!"GMQL.jar" %in% available_local_files) 
37
+        stop("GMQL jar not available")
38
+    
39
+    if (length(path) > 0)
40
+        rJava::.jaddClassPath(path)
41
+
42
+rJava::.jaddClassPath(dirname(path))
43
+}
44
+
... ...
@@ -53,14 +53,14 @@ DESC <- function(...)
53 53
     ords <- c(...)
54 54
     ords = ords[!ords %in% ""]
55 55
     ords = ords[!duplicated(ords)]
56
-    if(length(ords)<=0)
56
+    if(!length(ords))
57 57
         order_matrix <- .jnull("java/lang/String")
58 58
     else
59 59
     {
60
-        order_matrix <- t(sapply(ords, function(x) {
60
+        order_matrix <- t(vapply(ords, function(x) {
61 61
             new_value = c("DESC",x)
62 62
             matrix <- matrix(new_value)
63
-        }))
63
+        },character(2)))
64 64
     }
65 65
     order_matrix
66 66
 }
... ...
@@ -75,14 +75,14 @@ ASC <- function(...)
75 75
     ords <- c(...)
76 76
     ords = ords[!ords %in% ""]
77 77
     ords = ords[!duplicated(ords)]
78
-    if(length(ords)<=0)
78
+    if(!length(ords))
79 79
         order_matrix <- .jnull("java/lang/String")
80 80
     else
81 81
     {
82
-        order_matrix <- t(sapply(ords, function(x) {
82
+        order_matrix <- t(vapply(ords, function(x) {
83 83
             new_value = c("ASC",x)
84 84
             matrix <- matrix(new_value)
85
-        }))
85
+        },character(2)))
86 86
     }
87 87
     order_matrix
88 88
 }
... ...
@@ -82,6 +82,7 @@ login_gmql <- function(url, username = NULL, password = NULL)
82 82
     {
83 83
         assign("authToken",content$authToken,.GlobalEnv)
84 84
         WrappeR <- J("it/polimi/genomics/r/Wrapper")
85
+        url <- paste0(url,"/")
85 86
         WrappeR$save_tokenAndUrl(authToken,url)
86 87
         print(paste("your Token is",authToken))
87 88
     }
... ...
@@ -793,7 +794,7 @@ upload_dataset <- function(url, datasetName, folderPath, schemaName = NULL,
793 794
         folderPath <- paste0(folderPath,"/files")
794 795
     
795 796
     files <- list.files(folderPath,full.names = TRUE)
796
-    if(length(files)==0)
797
+    if(!length(files))
797 798
         stop("no files present")
798 799
     count = .counter(0)
799 800
     
... ...
@@ -801,9 +802,9 @@ upload_dataset <- function(url, datasetName, folderPath, schemaName = NULL,
801 802
         file <- httr::upload_file(x)
802 803
     })
803 804
     
804
-    list_files_names <- sapply(list_files, function(x) {
805
+    list_files_names <- vapply(list_files, function(x) {
805 806
         paste0("file",count())
806
-    })
807
+    },character(1))
807 808
     
808 809
     names(list_files) <- list_files_names
809 810
     req <- httr::GET(url)
... ...
@@ -817,7 +818,7 @@ upload_dataset <- function(url, datasetName, folderPath, schemaName = NULL,
817 818
     {
818 819
         schema_name <- list.files(folderPath, pattern = "*.schema$",
819 820
                                     full.names = TRUE)
820
-        if(length(schema_name)==0)
821
+        if(!length(schema_name))
821 822
             stop("schema must be present")
822 823
         
823 824
         list_files <- list(list("schema" = httr::upload_file(schema_name)),
... ...
@@ -832,7 +833,7 @@ upload_dataset <- function(url, datasetName, folderPath, schemaName = NULL,
832 833
         {
833 834
             schema_name <- list.files(folderPath, pattern = "*.schema$",
834 835
                                         full.names = TRUE)
835
-            if(length(schema_name)==0)
836
+            if(!length(schema_name))
836 837
                 stop("schema must be present")
837 838
             
838 839
             list_files <- list(list("schema" = httr::upload_file(schema_name)),
... ...
@@ -982,9 +983,7 @@ download_as_GRangesList <- function(url,datasetName)
982 983
 {
983 984
     list <- show_samples_list(url,datasetName)
984 985
     samples <- list$samples
985
-    sample_list_name <- sapply(samples, function(x){
986
-        name <- x$name
987
-    })
986
+    sample_list_name <- vapply(samples, function(x) x$name, character(1))
988 987
     
989 988
     sampleList <- lapply(samples, function(x){
990 989
         name <- x$name
... ...
@@ -1050,7 +1049,7 @@ sample_metadata <- function(url, datasetName,sampleName)
1050 1049
     #trasform text to list
1051 1050
     metadata <- strsplit(content, "\n")
1052 1051
     metadata <- strsplit(unlist(metadata), "\t")
1053
-    names(metadata) <- sapply(metadata, `[[`, 1)
1052
+    names(metadata) <- vapply(metadata, `[[`, character(1),1)
1054 1053
     listMeta <- lapply(metadata, `[`, -1)
1055 1054
     
1056 1055
     if(req$status_code !=200)
... ...
@@ -1126,13 +1125,11 @@ sample_region <- function(url, datasetName,sampleName)
1126 1125
         temp <- tempfile("temp") #use temporary files
1127 1126
         write.table(content,temp,quote = FALSE,sep = '\t',col.names = FALSE,
1128 1127
                         row.names = FALSE)
1129
-        if(schema_type=="gtf")
1128
+        if(identical(schema_type, "gtf"))
1130 1129
             samples <- rtracklayer::import(temp,format = "gtf")
1131 1130
         else
1132 1131
         {
1133
-            vector_field <- sapply(list$fields,function(x){
1134
-                name <- x$name
1135
-            })
1132
+            vector_field <- vapply(list$fields,function(x)x$name,character(1))
1136 1133
             df <- data.table::fread(temp,header = FALSE,sep = "\t")
1137 1134
             a <- df[1,2]
1138 1135
             if(is.na(as.numeric(a)))
1139 1136
deleted file mode 100644
1140 1137
Binary files a/inst/java/GMQL.jar and /dev/null differ
1141 1138
similarity index 97%
1142 1139
rename from vignettes/my-vignette.Rmd
1143 1140
rename to vignettes/RGMQL-vignette.Rmd
... ...
@@ -7,7 +7,7 @@ date: "`r Sys.Date()`"
7 7
 bibliography: bibliography.bib
8 8
 output: BiocStyle::pdf_document
9 9
 vignette: >
10
-    %\VignetteIndexEntry{Vignette Title}
10
+    %\VignetteIndexEntry{RGMQL: GenoMetric Query Language for R/Bioconductor}
11 11
     %\VignetteEngine{knitr::rmarkdown}
12 12
     %\VignetteEncoding{UTF-8}
13 13
 link-citations: true
... ...
@@ -218,12 +218,12 @@ CPUs/system while managing datasets (both GMQL or generic text plain datasets).
218 218
 ### Initialization
219 219
 
220 220
 Load and attach the RGMQL package in a R session using library function:
221
-```{r, initialization, eval = TRUE}
221
+```{r, initialization}
222 222
 library('RGMQL')
223 223
 ```
224 224
 Before starting using any GMQL operation we need to initialise the GMQL 
225 225
 context with the following code:
226
-```{r, init, eval = TRUE}
226
+```{r, init}
227 227
 init_gmql()
228 228
 ```
229 229
 The function *init_gmql()* initializes the context of scalable data management 
... ...
@@ -245,7 +245,7 @@ A local dataset is a folder with sample files (region files and correspondent
245 245
 metadata files) on the user computer.
246 246
 As data are already in the user computer, we simply execute:
247 247
 
248
-```{r, read GMQL dataset, eval = TRUE}
248
+```{r, read GMQL dataset}
249 249
 gmql_dataset_path <- system.file("example", "EXON", package = "RGMQL")
250 250
 data_out = read_dataset(gmql_dataset_path)
251 251
 ```
... ...
@@ -260,7 +260,7 @@ For better integration in the R environment and with other R packages,
260 260
 we provide the *read()* function to read directly from R memory/environment 
261 261
 using GRangesList as input.
262 262
 
263
-```{r, read GRangesList, eval = TRUE}
263
+```{r, read GRangesList}
264 264
 library("GenomicRanges")
265 265
 gr1 <- GRanges(seqnames = "chr2",
266 266
 	ranges = IRanges(103, 106), strand = "+", score = 5L, GC = 0.45)
... ...
@@ -294,7 +294,7 @@ Consider mutation data samples of human breast cancer cases.
294 294
 For each sample, quantify the mutations in each exon and select the exons 
295 295
 with at least one mutation. Return the list of samples ordered by 
296 296
 the number of such exons.
297
-```{r, query, eval = TRUE}
297
+```{r, query}
298 298
 
299 299
 # These statements define the paths to the folders "EXON" and "MUT" in the 
300 300
 # subdirectory "example" of the package "RGMQL"
... ...
@@ -341,7 +341,7 @@ exon_res = arrange(exon3, list(DESC("exon_count")))
341 341
 
342 342
 If you want to store persistently the result, you can materialize it into 
343 343
 specific path defined as input parameter.
344
-```{r, materialize, eval = TRUE}
344
+```{r, materialize}
345 345
 # Materialize the result dataset on disk
346 346
 collect(exon_res)
347 347
 ```
... ...
@@ -363,7 +363,7 @@ After the execution, the context of scalable data management engine is stopped
363 363
 and a new invocation of *init_gmql()* is needed.
364 364
 
365 365
 Beside *execute()* we can use: 
366
-```{r, take, eval = TRUE}
366
+```{r, take}
367 367
 g <- take(exon_res, rows = 45)
368 368
 ```
369 369
 to execute all *collect()* commands in the RGMQL query and extract data 
... ...
@@ -405,7 +405,7 @@ with user and password, or as guest.
405 405
 Upon successful logon, you get a request token that you must use 
406 406
 in every subsequent REST call.
407 407
 Login can be performed using the function:
408
-```{r, eval = TRUE}
408
+```{r, init with login}
409 409
 test_url = "http://genomic.deib.polimi.it/gmql-rest-r/"
410 410
 login_gmql(test_url)
411 411
 ```
... ...
@@ -455,7 +455,7 @@ download_dataset(test_url, name_dataset)
455 455
 ```
456 456
 
457 457
 Once download is done, we can logout from remote repository using:
458
-```{r, logout, eval = TRUE}
458
+```{r, logout}
459 459
 logout_gmql(test_url)
460 460
 ```
461 461
 *logout_gmql()* deletes the $authToken$ from R environment.
... ...
@@ -473,13 +473,13 @@ infrastructure with login function:
473 473
 login_gmql(test_url)
474 474
 ```
475 475
 Otherwise, we can initialize the data engine with a remote url:
476
-```{r, initialize remote, eval = TRUE}
476
+```{r, initialize remote}
477 477
 init_gmql(url = test_url)
478 478
 ```
479 479
 in this way login is automatically performed as specified above
480 480
 
481 481
 After initialization, we can start building our query:
482
-```{r, remote query, eval = TRUE}
482
+```{r, remote query}
483 483
 
484 484
 ## Read the remote dataset HG19_TCGA_dnaseq 
485 485
 ## Read the remote dataset HG19_BED_ANNOTATION 
... ...
@@ -530,7 +530,7 @@ data management engine is not stopped, and can be used for further queries.
530 530
 ## Mixed Processing
531 531
 
532 532
 The processing flavour can be switched using the function:
533
-```{r, switch mode, eval = TRUE}
533
+```{r, switch mode}
534 534
 remote_processing(TRUE)
535 535
 ```
536 536
 An user can switch processing mode until the first *collect()* has been performed.
... ...
@@ -538,7 +538,7 @@ An user can switch processing mode until the first *collect()* has been performe
538 538
 This kind of processing comes from the fact that the *read()* function can 
539 539
 accept either a local dataset or a remote repository dataset, 
540 540
 even in the same query as in the following example:
541
-```{r, mixed query, eval = TRUE}
541
+```{r, mixed query}
542 542
 
543 543
 
544 544
 # This statement defines the path to the folder "MUT" in the subdirectory 
... ...
@@ -613,7 +613,7 @@ packages.
613 613
 ## Import/Export
614 614
 
615 615
 We can import a GMQL dataset into R environment as follows:
616
-```{r, import, eval = TRUE}
616
+```{r, import}
617 617
 # This statement defines the path to the folder "EXON" in the subdirectory 
618 618
 # "example" of the package "RGMQL"
619 619
 
... ...
@@ -627,7 +627,7 @@ data
627 627
 the second parameter *is_gtf* must specify the file format: .GTF or .GDM.
628 628
 
629 629
 We can export a GRangesList as GMQL dataset as follows:
630
-```{r, export, eval = TRUE}
630
+```{r, export}
631 631
 # This statement defines the path to the subdirectory "example" of the 
632 632
 # package "RGMQL"
633 633
 
... ...
@@ -644,7 +644,7 @@ the second parameter *is_gtf* specifies the file format: .GTF or .GDM.
644 644
 
645 645
 We can also import only a part of a GMQL dataset into R environment, 
646 646
 by filtering its content as follows:
647
-```{r, filter_extract, eval = TRUE}
647
+```{r, filter_extract}
648 648
 # This statement defines the path to the folder "TEAD" in the subdirectory 
649 649
 # "example" of the package "RGMQL"
650 650
 
651 651
similarity index 100%
652 652
rename from vignettes/my-vignette.fdb_latexmk
653 653
rename to vignettes/RGMQL-vignette.fdb_latexmk
654 654
similarity index 100%
655 655
rename from vignettes/my-vignette.fls
656 656
rename to vignettes/RGMQL-vignette.fls
657 657
similarity index 100%
658 658
rename from vignettes/my-vignette.md
659 659
rename to vignettes/RGMQL-vignette.md