... | ... |
@@ -8,21 +8,18 @@ Description: This package brings the GenoMetric Query Language (GMQL) |
8 | 8 |
functionalities into the R environment. GMQL is a high-level, declarative |
9 | 9 |
language to query and compare multiple and heterogeneous genomic datasets |
10 | 10 |
for biomedical knowledge discovery. It allows expressing easily queries and |
11 |
- processing over genomic regions and their metadata, in a way similar to what |
|
12 |
- can be done with the Structured Query Language (SQL) over a relational database, |
|
13 |
- to extract genomic regions of interest and compute their properties. GMQL |
|
14 |
- adopts algorithms designed for big data and their efficient implementation |
|
15 |
- using cloud-computing technologies, including Apache Hadoop framework and |
|
16 |
- Spark engine; these make GMQL able to run on modern high performance computing |
|
11 |
+ processing over genomic regions and their metadata, to extract genomic regions |
|
12 |
+ of interest and compute their properties. GMQL adopts algorithms designed |
|
13 |
+ for big data and their efficient implementation using cloud-computing |
|
14 |
+ technologies, including Apache Hadoop framework and Spark engine; |
|
15 |
+ these make GMQL able to run on modern high performance computing |
|
17 | 16 |
infrastructures, CPU clusters and network infrastructures, in order to achieve |
18 |
- scalability and performance on big data. With GMQL very complex genomic |
|
19 |
- operations can be written as simple queries, with implicit iteration over |
|
20 |
- thousands of heterogeneous samples, and computed efficiently in few minutes over |
|
21 |
- servers or clouds. This RGMQL package is built over a scalable data management |
|
22 |
- engine written in Scala programming language, released as Scala API; it provides |
|
23 |
- a set of functions to create, manipulate and extract genomic data from different |
|
24 |
- data sources both from local and remote datasets. These RGMQL functions allow |
|
25 |
- performing complex queries and processing without knowing the GMQL syntax, |
|
17 |
+ scalability and performance on big data. This RGMQL package is built over |
|
18 |
+ a scalable data management engine written in Scala programming language, |
|
19 |
+ released as Scala API; it provides a set of functions to create, |
|
20 |
+ manipulate and extract genomic data from different data sources both |
|
21 |
+ from local and remote datasets. These RGMQL functions allow performing |
|
22 |
+ complex queries and processing without knowing the GMQL syntax, |
|
26 | 23 |
but leveraging on R idiomatic paradigm and logic. RGMQL provides two different |
27 | 24 |
approaches in writing GMQL queries and processing scripts: a) REST calls b) |
28 | 25 |
standard R APIs The REST approach let users to log into a remote infrastructure |
... | ... |
@@ -67,7 +64,7 @@ Imports: |
67 | 64 |
glue, |
68 | 65 |
BiocGenerics |
69 | 66 |
Depends: |
70 |
- R(>= 3.4.2) |
|
67 |
+ R(<= 3.4.2), RGMQLScalaLibs |
|
71 | 68 |
VignetteBuilder: knitr |
72 | 69 |
Suggests: |
73 | 70 |
BiocStyle, |
... | ... |
@@ -45,11 +45,11 @@ import_gmql <- function(dataset_path, is_gtf) |
45 | 45 |
if(!dir.exists(datasetName)) |
46 | 46 |
stop("Directory does not exists") |
47 | 47 |
|
48 |
- if(length(list.files(datasetName))==0) |
|
48 |
+ if(!length(list.files(datasetName))) |
|
49 | 49 |
stop("no samples present in this dataset") |
50 | 50 |
|
51 | 51 |
regions <- list.files(datasetName, pattern = "*.gtf$",full.names = TRUE) |
52 |
- if(length(regions) != 0) |
|
52 |
+ if(length(regions)) |
|
53 | 53 |
{ |
54 | 54 |
name_samples <- lapply(regions, function(x){ |
55 | 55 |
gsub("*.gtf", "", basename(x))}) |
... | ... |
@@ -62,7 +62,7 @@ import_gmql <- function(dataset_path, is_gtf) |
62 | 62 |
stop("No GTF files present") |
63 | 63 |
|
64 | 64 |
meta <- list.files(datasetName, pattern = "*.gtf.meta$",full.names = TRUE) |
65 |
- if(length(meta) != 0) |
|
65 |
+ if(length(meta)) |
|
66 | 66 |
{ |
67 | 67 |
meta_list <- lapply(meta, .add_metadata) |
68 | 68 |
names(meta_list) <- name_samples |
... | ... |
@@ -83,17 +83,17 @@ import_gmql <- function(dataset_path, is_gtf) |
83 | 83 |
if(!dir.exists(datasetName)) |
84 | 84 |
stop("Directory does not exists") |
85 | 85 |
|
86 |
- if(length(list.files(datasetName))==0) |
|
86 |
+ if(!length(list.files(datasetName))) |
|
87 | 87 |
stop("no samples present in this dataset") |
88 | 88 |
|
89 | 89 |
regions <- list.files(datasetName, pattern = "*.gdm$",full.names = TRUE) |
90 |
- if(length(regions) != 0) |
|
90 |
+ if(length(regions)) |
|
91 | 91 |
{ |
92 | 92 |
name_samples <- lapply(regions, function(x){ |
93 | 93 |
gsub("*.gdm", "",basename(x))}) |
94 | 94 |
vector_field <- .schema_header(datasetName) |
95 | 95 |
|
96 |
- names(vector_field)=NULL |
|
96 |
+ names(vector_field) <- NULL |
|
97 | 97 |
sampleList <- lapply(regions,function(x){ |
98 | 98 |
df <- read.delim(x,col.names = vector_field,header = FALSE) |
99 | 99 |
g <- GenomicRanges::makeGRangesFromDataFrame(df, |
... | ... |
@@ -108,7 +108,7 @@ import_gmql <- function(dataset_path, is_gtf) |
108 | 108 |
stop("No GDM files present") |
109 | 109 |
|
110 | 110 |
meta <- list.files(datasetName, pattern = "*.gdm.meta$",full.names = TRUE) |
111 |
- if(length(meta) != 0) |
|
111 |
+ if(length(meta)) |
|
112 | 112 |
{ |
113 | 113 |
meta_list <- lapply(meta, .add_metadata) |
114 | 114 |
names(meta_list) <- name_samples |
... | ... |
@@ -134,7 +134,7 @@ export_gmql <- function(samples, dir_out, is_gtf) |
134 | 134 |
},files_sub_dir) |
135 | 135 |
} |
136 | 136 |
# first regions to get column names |
137 |
- col_names <- sapply(elementMetadata(samples[[1]]),class) |
|
137 |
+ col_names <- vapply(elementMetadata(samples[[1]]),class,character(1)) |
|
138 | 138 |
# write schema XML |
139 | 139 |
.write_schema(col_names,files_sub_dir,to_GTF) |
140 | 140 |
c = .counter(0) |
... | ... |
@@ -145,9 +145,9 @@ export_gmql <- function(samples, dir_out, is_gtf) |
145 | 145 |
.write_metadata <- function(meta_list,sample_name) |
146 | 146 |
{ |
147 | 147 |
#create my own list if metadata empty |
148 |
- if(length(meta_list)==0){ |
|
148 |
+ if(!length(meta_list)) |
|
149 | 149 |
meta_list <- list(Provider = "Polimi", Application = "R-GMQL") |
150 |
- } |
|
150 |
+ |
|
151 | 151 |
names_list <- names(meta_list) |
152 | 152 |
value_list <- unlist(meta_list) |
153 | 153 |
file_meta_name = paste0(sample_name,".meta") |
... | ... |
@@ -72,6 +72,8 @@ take_value.META_AGGREGATES <- function(obj){ |
72 | 72 |
#' function sum, performing all the type conversions needed } |
73 | 73 |
#' \item{COUNT: It prepares input parameter to be passed to the library |
74 | 74 |
#' function count, performing all the type conversions needed } |
75 |
+#' \item{COUNTSAMP: It prepares input parameter to be passed to the library |
|
76 |
+#' function third quartile, performing all the type conversions needed } |
|
75 | 77 |
#' \item{MIN: It prepares input parameter to be passed to the library |
76 | 78 |
#' function minimum, performing all the type conversions needed } |
77 | 79 |
#' \item{MAX: It prepares input parameter to be passed to the library |
... | ... |
@@ -197,6 +199,25 @@ as.character.COUNT <- function(obj) { |
197 | 199 |
check.COUNT <- function(obj){} |
198 | 200 |
|
199 | 201 |
|
202 |
+#' @name AGGREGATES-Object |
|
203 |
+#' @aliases COUNTSAMP |
|
204 |
+#' @rdname aggr-class |
|
205 |
+#' @export |
|
206 |
+#' |
|
207 |
+COUNTSAMP <- function() |
|
208 |
+{ |
|
209 |
+ list <- list() |
|
210 |
+ ## Set the name for the class |
|
211 |
+ class(list) <- c("COUNTSAMP","AGGREGATES","META_AGGREGATES") |
|
212 |
+ return(list) |
|
213 |
+} |
|
214 |
+as.character.COUNTSAMP <- function(obj) { |
|
215 |
+ class <- class(obj)[1] |
|
216 |
+ c(class,"") |
|
217 |
+} |
|
218 |
+check.COUNTSAMP <- function(obj){} |
|
219 |
+ |
|
220 |
+ |
|
200 | 221 |
#' @name AGGREGATES-Object |
201 | 222 |
#' @aliases MIN |
202 | 223 |
#' @rdname aggr-class |
... | ... |
@@ -12,7 +12,7 @@ |
12 | 12 |
{ |
13 | 13 |
x <- scan(files, what="", sep="\n") |
14 | 14 |
y <- strsplit(x, "\t") |
15 |
- names(y) <- sapply(y, `[[`, 1) |
|
15 |
+ names(y) <- vapply(y, `[[`,character(1), 1) |
|
16 | 16 |
listMeta <- lapply(y, `[`, -1) |
17 | 17 |
} |
18 | 18 |
|
... | ... |
@@ -23,7 +23,7 @@ |
23 | 23 |
schema_name_xml <- list.files(datasetName, pattern = "*.xml$", |
24 | 24 |
full.names = TRUE) |
25 | 25 |
|
26 |
- if(length(schema_name)==0 && length(schema_name_xml) == 0) |
|
26 |
+ if(!length(schema_name) && !length(schema_name_xml)) |
|
27 | 27 |
stop("schema not present") |
28 | 28 |
|
29 | 29 |
xml_schema <- xml2::read_xml(schema_name) |
... | ... |
@@ -37,24 +37,24 @@ |
37 | 37 |
if(!is.list(meta_data)) |
38 | 38 |
stop("meta_data: invalid input") |
39 | 39 |
|
40 |
- if(!all(sapply(meta_data, function(x) is(x,class)))) |
|
40 |
+ if(!all(vapply(meta_data, function(x) is(x,class), logical(1)))) |
|
41 | 41 |
stop("All elements must be META_AGGREGATES object") |
42 | 42 |
|
43 | 43 |
names <- names(meta_data) |
44 | 44 |
if(is.null(names)) |
45 | 45 |
{ |
46 | 46 |
warning("You did not assign a names to a list.\nWe build it for you") |
47 |
- names <- sapply(meta_data, take_value.META_AGGREGATES) |
|
47 |
+ names <- vapply(meta_data, take_value.META_AGGREGATES,character(1)) |
|
48 | 48 |
} |
49 | 49 |
else |
50 | 50 |
{ |
51 | 51 |
if("" %in% names) |
52 | 52 |
stop("No partial names assignment is allowed") |
53 | 53 |
} |
54 |
- aggregate_matrix <- t(sapply(meta_data, function(x) { |
|
54 |
+ aggregate_matrix <- t(vapply(meta_data, function(x) { |
|
55 | 55 |
new_value = as.character(x) |
56 | 56 |
matrix <- matrix(new_value) |
57 |
- })) |
|
57 |
+ },character(2))) |
|
58 | 58 |
|
59 | 59 |
m_names <- matrix(names) |
60 | 60 |
metadata_matrix <- cbind(m_names,aggregate_matrix) |
... | ... |
@@ -28,7 +28,7 @@ |
28 | 28 |
#' |
29 | 29 |
#' @examples |
30 | 30 |
#' |
31 |
-#' "where is my example?" |
|
31 |
+#' "Where is my example?" |
|
32 | 32 |
#' |
33 | 33 |
#' @name Evaluation-Function |
34 | 34 |
#' @aliases condition_evaluation |
... | ... |
@@ -51,10 +51,10 @@ conds <- function(default = c(""), full = c(""), exact = c("")) |
51 | 51 |
join_condition_matrix <- NULL |
52 | 52 |
else |
53 | 53 |
{ |
54 |
- join_condition_matrix <- t(sapply(array, function(x) { |
|
54 |
+ join_condition_matrix <- t(vapply(array, function(x) { |
|
55 | 55 |
new_value = c(cond, x) |
56 | 56 |
matrix <- matrix(new_value) |
57 |
- })) |
|
57 |
+ },character(2))) |
|
58 | 58 |
} |
59 | 59 |
join_condition_matrix |
60 | 60 |
} |
... | ... |
@@ -86,25 +86,25 @@ filter_and_extract <- function(data, metadata = NULL, |
86 | 86 |
gtf_meta_files <- list.files(datasetName, pattern = "*.gtf.meta$", |
87 | 87 |
full.names = TRUE) |
88 | 88 |
|
89 |
- if(length(gdm_meta_files)==0 && length(gtf_meta_files)==0) |
|
89 |
+ if(!length(gdm_meta_files) && !length(gtf_meta_files)) |
|
90 | 90 |
stop("no samples present or no files format supported") |
91 | 91 |
|
92 |
- if(length(gdm_meta_files)>=1 && length(gtf_meta_files)>=1) |
|
92 |
+ if(length(gdm_meta_files) && length(gtf_meta_files)) |
|
93 | 93 |
stop("GMQL dataset cannot be mixed dataset: no GTF and GDM together") |
94 | 94 |
|
95 | 95 |
vector_field <- .schema_header(datasetName) |
96 | 96 |
|
97 |
- if(length(gdm_meta_files)>0) |
|
97 |
+ if(length(gdm_meta_files)) |
|
98 | 98 |
{ |
99 | 99 |
samples_with_suffix <- .check_metadata_files(metadata,metadata_prefix, |
100 | 100 |
gdm_meta_files, suffix) |
101 | 101 |
|
102 |
- samples_file <- sapply(samples_with_suffix, function(x) x$sample) |
|
103 |
- suffix_vec <- sapply(samples_with_suffix, function(x) x$suffix) |
|
102 |
+ samples_file <- lapply(samples_with_suffix, function(x) x$sample) |
|
103 |
+ suffix_vec <- lapply(samples_with_suffix, function(x) x$suffix) |
|
104 | 104 |
suffixes <- unlist(suffix_vec) |
105 | 105 |
samples_to_read <- unlist(samples_file) |
106 | 106 |
|
107 |
- if(length(samples_to_read)>0) |
|
107 |
+ if(length(samples_to_read)) |
|
108 | 108 |
samples_to_read <- gsub(".meta$", "", samples_to_read) |
109 | 109 |
else |
110 | 110 |
samples_to_read <- gsub(".meta$", "", gdm_meta_files) |
... | ... |
@@ -117,12 +117,12 @@ filter_and_extract <- function(data, metadata = NULL, |
117 | 117 |
samples_with_suffix <- .check_metadata_files(metadata,metadata_prefix, |
118 | 118 |
gtf_meta_files, suffix) |
119 | 119 |
|
120 |
- samples_file <- sapply(samples_with_suffix, function(x) x$sample) |
|
121 |
- suffix_vec <- sapply(samples_with_suffix, function(x) x$suffix) |
|
120 |
+ samples_file <- lapply(samples_with_suffix, function(x) x$sample) |
|
121 |
+ suffix_vec <- lapply(samples_with_suffix, function(x) x$suffix) |
|
122 | 122 |
suffixes <- unlist(suffix_vec) |
123 | 123 |
samples_to_read <- unlist(samples_file) |
124 | 124 |
|
125 |
- if(length(samples_to_read)>0) |
|
125 |
+ if(length(samples_to_read)) |
|
126 | 126 |
samples_to_read <- gsub(".meta$", "", samples_to_read) |
127 | 127 |
else |
128 | 128 |
samples_to_read <- gsub(".meta$", "", gtf_meta_files) |
... | ... |
@@ -137,13 +137,13 @@ filter_and_extract <- function(data, metadata = NULL, |
137 | 137 |
if(!is(rangesList,"GRangesList")) |
138 | 138 |
stop("only GrangesList admitted") |
139 | 139 |
|
140 |
- if(length(rangesList)<=0) |
|
140 |
+ if(!length(rangesList)) |
|
141 | 141 |
stop("rangesList empty") |
142 | 142 |
|
143 | 143 |
meta_list <- metadata(rangesList) |
144 | 144 |
samples <- .check_metadata_list(metadata, metadata_prefix, meta_list, |
145 | 145 |
suffix) |
146 |
- if(length(unlist(samples))<=0) |
|
146 |
+ if(!length(unlist(samples))) |
|
147 | 147 |
samples <- rangesList |
148 | 148 |
else |
149 | 149 |
{ |
... | ... |
@@ -174,8 +174,8 @@ filter_and_extract <- function(data, metadata = NULL, |
174 | 174 |
vec_meta <- paste0(metadata_prefix,metadata) |
175 | 175 |
list <- mapply(function(x,index){ |
176 | 176 |
vec_names <- names(x) |
177 |
- a <- sapply(vec_meta, function(y) { |
|
178 |
- which(y==vec_names) |
|
177 |
+ a <- lapply(vec_meta, function(y) { |
|
178 |
+ which(y == vec_names) |
|
179 | 179 |
}) |
180 | 180 |
## we would like that manage more index from grep |
181 | 181 |
found <- as.logical(length(unlist(a))) |
... | ... |
@@ -191,9 +191,7 @@ filter_and_extract <- function(data, metadata = NULL, |
191 | 191 |
meta_list <- lapply(meta_files, function(x){ |
192 | 192 |
list <- .add_metadata(x) |
193 | 193 |
vec_names <- names(list) |
194 |
- a <- sapply(vec_meta, function(y) { |
|
195 |
- grep(y,vec_names) |
|
196 |
- }) |
|
194 |
+ a <- lapply(vec_meta, function(y)grep(y,vec_names)) |
|
197 | 195 |
## we would like that manage more index from grep |
198 | 196 |
found <- as.logical(length(unlist(a))) |
199 | 197 |
index <- grep(suffix,vec_names) |
... | ... |
@@ -249,7 +247,7 @@ filter_and_extract <- function(data, metadata = NULL, |
249 | 247 |
if(!is.null(regions)) |
250 | 248 |
col_names <- col_names[col_names %in% regions] |
251 | 249 |
|
252 |
- if(length(col_names)!=0) |
|
250 |
+ if(length(col_names)) |
|
253 | 251 |
r <- subset(region_frame,TRUE,col_names) |
254 | 252 |
},regions,vector_field) |
255 | 253 |
|
... | ... |
@@ -48,8 +48,8 @@ |
48 | 48 |
#' \item{an expression built using PARAMETER object: (ALL() + N) / K or |
49 | 49 |
#' ALL() / K, with N and K integer values } |
50 | 50 |
#' } |
51 |
-#' @param groupBy \code{\link{condition_evaluation}} function to support |
|
52 |
-#' methods with groupBy or JoinBy input paramter |
|
51 |
+#' @param groupBy \code{\link{conds}} function to support methods with |
|
52 |
+#' groupBy or JoinBy input parameter |
|
53 | 53 |
#' |
54 | 54 |
#' @param ... a series of expressions separated by comma in the form |
55 | 55 |
#' \emph{key} = \emph{aggregate}. The \emph{aggregate} is an object of |
... | ... |
@@ -115,8 +115,7 @@ |
115 | 115 |
#' ## regions the minimum pvalue of the overlapping regions (min_pvalue) |
116 | 116 |
#' ## and their Jaccard indexes (JaccardIntersect and JaccardResult). |
117 | 117 |
#' |
118 |
-#' res = cover(exp, 2, 3, groupBy = condition_evaluation(c("cell")), |
|
119 |
-#' min_pValue = MIN("pvalue")) |
|
118 |
+#' res = cover(exp, 2, 3, groupBy = conds("cell"), min_pValue = MIN("pvalue")) |
|
120 | 119 |
#' |
121 | 120 |
#' @name cover |
122 | 121 |
#' @rdname cover |
... | ... |
@@ -141,9 +140,7 @@ setMethod("cover", "GMQLDataset", |
141 | 140 |
gmql_cover(val, q_min, q_max, groupBy, aggregates, flag) |
142 | 141 |
}) |
143 | 142 |
|
144 |
- |
|
145 |
- |
|
146 |
-gmql_cover <- function(input_data, min_acc, max_acc, groupBy, aggregates, flag) |
|
143 |
+gmql_cover <- function(input_data, min_acc, max_acc, groupBy,aggregates,flag) |
|
147 | 144 |
{ |
148 | 145 |
if(!is.null(groupBy)) |
149 | 146 |
{ |
... | ... |
@@ -156,7 +153,7 @@ gmql_cover <- function(input_data, min_acc, max_acc, groupBy, aggregates, flag) |
156 | 153 |
join_matrix <- .jarray(cond, dispatch = TRUE) |
157 | 154 |
} |
158 | 155 |
else |
159 |
- stop("use function condition_evaluation()") |
|
156 |
+ stop("use function conds()") |
|
160 | 157 |
} |
161 | 158 |
else |
162 | 159 |
join_matrix <- .jnull("java/lang/String") |
... | ... |
@@ -184,7 +181,7 @@ gmql_cover <- function(input_data, min_acc, max_acc, groupBy, aggregates, flag) |
184 | 181 |
|
185 | 182 |
error <- strtoi(response[1]) |
186 | 183 |
val <- response[2] |
187 |
- if(error!=0) |
|
184 |
+ if(error) |
|
188 | 185 |
stop(val) |
189 | 186 |
else |
190 | 187 |
GMQLDataset(val) |
... | ... |
@@ -195,14 +192,7 @@ gmql_cover <- function(input_data, min_acc, max_acc, groupBy, aggregates, flag) |
195 | 192 |
if(length(param) > 1) |
196 | 193 |
stop("length > 1") |
197 | 194 |
|
198 |
- if(is.numeric(param)) |
|
199 |
- { |
|
200 |
- if(param <= 0) |
|
201 |
- stop("No negative value") |
|
202 |
- else |
|
203 |
- return(as.character(param)) |
|
204 |
- } |
|
205 |
- else if(is.character(param)) |
|
195 |
+ if(is.character(param)) |
|
206 | 196 |
{ |
207 | 197 |
if(is_min && identical(param,"ANY")) |
208 | 198 |
stop("min cannot assume ANY as value") |
... | ... |
@@ -214,7 +204,7 @@ gmql_cover <- function(input_data, min_acc, max_acc, groupBy, aggregates, flag) |
214 | 204 |
|
215 | 205 |
} |
216 | 206 |
|
217 |
-.trasform_cover <- function(predicate=NULL) |
|
207 |
+.trasform_cover <- function(predicate) |
|
218 | 208 |
{ |
219 | 209 |
predicate <- gsub("\\(\\)","",predicate) |
220 | 210 |
} |
... | ... |
@@ -17,8 +17,8 @@ |
17 | 17 |
#' |
18 | 18 |
#' @param x GMQLDataset class object |
19 | 19 |
#' @param y GMQLDataset class object |
20 |
-#' @param joinBy \code{\link{condition_evaluation}} function to support |
|
21 |
-#' methods with groupBy or JoinBy input paramter |
|
20 |
+#' @param joinBy \code{\link{conds}} function to support methods with |
|
21 |
+#' groupBy or JoinBy input paramter |
|
22 | 22 |
#' |
23 | 23 |
#' @param is_exact single logical value: TRUE means that the region difference |
24 | 24 |
#' is executed only on regions in left_input_data with exactly the same |
... | ... |
@@ -30,7 +30,6 @@ |
30 | 30 |
#' @return GMQLDataset object. It contains the value to use as input |
31 | 31 |
#' for the subsequent GMQLDataset method |
32 | 32 |
#' |
33 |
-#' |
|
34 | 33 |
#' @examples |
35 | 34 |
#' ## This statement initializes and runs the GMQL server for local execution |
36 | 35 |
#' ## and creation of results on disk. Then, with system.file() it defines |
... | ... |
@@ -55,7 +54,7 @@ |
55 | 54 |
#' ## do not overlap any region in s2; |
56 | 55 |
#' ## metadata of the result are the same as the metadata of s1. |
57 | 56 |
#' |
58 |
-#' out_t = setdiff(data1, data2, condition_evaluation(c("cell"))) |
|
57 |
+#' out_t = setdiff(data1, data2, conds("cell")) |
|
59 | 58 |
#' |
60 | 59 |
#' @name setdiff |
61 | 60 |
#' @aliases setdiff,GMQLDataset,GMQLDataset-method |
... | ... |
@@ -87,7 +86,7 @@ gmql_difference <- function(left_data, right_data, is_exact, joinBy) |
87 | 86 |
is_exact) |
88 | 87 |
error <- strtoi(response[1]) |
89 | 88 |
val <- response[2] |
90 |
- if(error!=0) |
|
89 |
+ if(error) |
|
91 | 90 |
stop(val) |
92 | 91 |
else |
93 | 92 |
GMQLDataset(val) |
... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
group_by.GMQLDateset <- function(.data, groupBy_meta = NULL, |
2 | 2 |
groupBy_regions = NULL, region_aggregates = NULL, meta_aggregates = NULL) |
3 | 3 |
{ |
4 |
- ptr_data = .data@value |
|
4 |
+ ptr_data = value(.data) |
|
5 | 5 |
gmql_group(ptr_data, groupBy_meta, groupBy_regions, region_aggregates, |
6 | 6 |
meta_aggregates) |
7 | 7 |
} |
... | ... |
@@ -14,16 +14,9 @@ group_by.GMQLDateset <- function(.data, groupBy_meta = NULL, |
14 | 14 |
#' @importFrom dplyr group_by |
15 | 15 |
#' |
16 | 16 |
#' @param .data GMQLDataset object |
17 |
-#' @param groupBy_meta it define condition evaluation on metadata. |
|
18 |
-#' \itemize{ |
|
19 |
-#' \item{\code{\link{FN}}: Fullname evaluation, two attributes match |
|
20 |
-#' if they both end with value and, if they have a further prefixes, |
|
21 |
-#' the two prefix sequence are identical} |
|
22 |
-#' \item{\code{\link{EX}}: Exact evaluation, only attributes exactly |
|
23 |
-#' as value will match; no further prefixes are allowed. } |
|
24 |
-#' \item{\code{\link{DF}}: Default evaluation, the two attributes match |
|
25 |
-#' if both end with value.} |
|
26 |
-#' } |
|
17 |
+#' @param groupBy_meta \code{\link{conds}} function to support methods with |
|
18 |
+#' groupBy or JoinBy input parameter |
|
19 |
+#' |
|
27 | 20 |
#' @param groupBy_regions vector of string made up by schema field attribute |
28 | 21 |
#' @param region_aggregates It accept a series of aggregate function on |
29 | 22 |
#' region attribute. |
... | ... |
@@ -89,7 +82,7 @@ gmql_group <- function(input_data, group_meta, group_reg, region_aggregates, |
89 | 82 |
join_matrix <- .jarray(cond, dispatch = TRUE) |
90 | 83 |
} |
91 | 84 |
else |
92 |
- stop("use function condition_evaluation()") |
|
85 |
+ stop("use function conds()") |
|
93 | 86 |
} |
94 | 87 |
else |
95 | 88 |
join_matrix <- .jnull("java/lang/String") |
... | ... |
@@ -132,7 +125,7 @@ gmql_group <- function(input_data, group_meta, group_reg, region_aggregates, |
132 | 125 |
region_matrix, input_data) |
133 | 126 |
error <- strtoi(response[1]) |
134 | 127 |
val <- response[2] |
135 |
- if(error!=0) |
|
128 |
+ if(error) |
|
136 | 129 |
stop(val) |
137 | 130 |
else |
138 | 131 |
GMQLDataset(val) |
... | ... |
@@ -43,12 +43,12 @@ |
43 | 43 |
#' having left (right) coordinates equal to the minimum (maximum) of the |
44 | 44 |
#' corresponding coordinate values in the 'x' and 'y' regions satisfying |
45 | 45 |
#' the genometric predicate)} |
46 |
-#' \item{LEFT_DISTINCT: It outputs the duplicate elimination of "x" output |
|
46 |
+#' \item{LEFT_DIST: It outputs the duplicate elimination of "x" output |
|
47 | 47 |
#' regions with the same values, regardless the "y" paired region and its |
48 | 48 |
#' values. In this case, the output regions attributes and their values are |
49 | 49 |
#' all those of "x", and the output metadata are equal to the "x" metadata, |
50 | 50 |
#' without additional prefixes} |
51 |
-#' \item{RIGHT_DISTINCT: It outputs the duplicate elimination of "y" output |
|
51 |
+#' \item{RIGHT_DIST: It outputs the duplicate elimination of "y" output |
|
52 | 52 |
#' regions with the same values, regardless the "x" paired region and its |
53 | 53 |
#' values. In this case, the output regions attributes and their values are |
54 | 54 |
#' all those of "y", and the output metadata are equal to the "y" metadata, |
... | ... |
@@ -109,19 +109,20 @@ gmql_join <- function(left_data, right_data, genometric_predicate, joinBy, |
109 | 109 |
{ |
110 | 110 |
if(!is.null(genometric_predicate)) |
111 | 111 |
{ |
112 |
- if(length(genometric_predicate) >4) |
|
112 |
+ if(length(genometric_predicate) > 4) |
|
113 | 113 |
stop("genometric_predicate: only 4 DISTAL condition") |
114 | 114 |
|
115 | 115 |
if(!is.list(genometric_predicate)) |
116 | 116 |
stop("genometric_predicate must be a list") |
117 | 117 |
|
118 |
- if(!all(sapply(genometric_predicate, function(x) {is(x,"DISTAL")} ))) |
|
118 |
+ if(!all(vapply(genometric_predicate, function(x) {is(x,"DISTAL")}, |
|
119 |
+ logical(1)))) |
|
119 | 120 |
stop("All elements should be DISTAL object") |
120 | 121 |
|
121 |
- genomatrix <- t(sapply(genometric_predicate, function(x) { |
|
122 |
+ genomatrix <- t(vapply(genometric_predicate, function(x) { |
|
122 | 123 |
new_value = as.character(x) |
123 | 124 |
array <- c(new_value) |
124 |
- })) |
|
125 |
+ },character(2))) |
|
125 | 126 |
|
126 | 127 |
genomatrix <- .jarray(genomatrix, dispatch = TRUE) |
127 | 128 |
} |
... | ... |
@@ -166,7 +167,7 @@ gmql_join <- function(left_data, right_data, genometric_predicate, joinBy, |
166 | 167 |
left_data, right_data) |
167 | 168 |
error <- strtoi(response[1]) |
168 | 169 |
val <- response[2] |
169 |
- if(error!=0) |
|
170 |
+ if(error) |
|
170 | 171 |
stop(val) |
171 | 172 |
else |
172 | 173 |
GMQLDataset(val) |
... | ... |
@@ -39,16 +39,8 @@ |
39 | 39 |
#' } |
40 | 40 |
#' "mixed style" is not allowed |
41 | 41 |
#' |
42 |
-#' @param joinBy list of evalation functions to define evaluation on metadata: |
|
43 |
-#' \itemize{ |
|
44 |
-#' \item{ \code{\link{FN}}(value): Fullname evaluation, two attributes match |
|
45 |
-#' if they both end with \emph{value} and, if they have further prefixes, |
|
46 |
-#' the two prefix sequence are identical.} |
|
47 |
-#' \item{ \code{\link{EX}}(value): Exact evaluation, only attributes exactly |
|
48 |
-#' as \emph{value} match; no further prefixes are allowed.} |
|
49 |
-#' \item{ \code{\link{DF}}(value): Default evaluation, the two attributes match |
|
50 |
-#' if both end with \emph{value}.} |
|
51 |
-#' } |
|
42 |
+#' @param joinBy \code{\link{conds}} function to support methods with |
|
43 |
+#' groupBy or JoinBy input parameter |
|
52 | 44 |
#' @param count_name string defining the metadata count name; if it is |
53 | 45 |
#' not specifying the name is "count_left_right" |
54 | 46 |
#' |
... | ... |
@@ -78,15 +70,14 @@ |
78 | 70 |
#' # but with a different value from the one(s) of ref sample(s), |
79 | 71 |
#' # are disregarded. |
80 | 72 |
#' |
81 |
-#' out = map(ref, exp, minScore = MIN("score"), |
|
82 |
-#' joinBy = list(DF("cell_tissue"))) |
|
73 |
+#' out = map(ref, exp, minScore = MIN("score"), joinBy = conds("cell_tissue")) |
|
83 | 74 |
#' |
84 | 75 |
#' @name map |
85 | 76 |
#' @rdname map |
86 | 77 |
#' @aliases map-method |
87 | 78 |
#' @export |
88 | 79 |
setMethod("map", "GMQLDataset", |
89 |
- function(x, y, ..., joinBy = NULL, count_name = NULL) |
|
80 |
+ function(x, y, ..., joinBy = NULL, count_name = "") |
|
90 | 81 |
{ |
91 | 82 |
left_data <- value(x) |
92 | 83 |
right_data <- value(y) |
... | ... |
@@ -120,6 +111,9 @@ gmql_map <- function(left_data, right_data, aggregates, joinBy, count_name) |
120 | 111 |
{ |
121 | 112 |
if(!is.character(count_name)) |
122 | 113 |
stop("count_name: must be string") |
114 |
+ |
|
115 |
+ if(identical(count_name,"")) |
|
116 |
+ count_name <- .jnull("java/lang/String") |
|
123 | 117 |
} |
124 | 118 |
else |
125 | 119 |
count_name <- .jnull("java/lang/String") |
... | ... |
@@ -129,7 +123,7 @@ gmql_map <- function(left_data, right_data, aggregates, joinBy, count_name) |
129 | 123 |
right_data) |
130 | 124 |
error <- strtoi(response[1]) |
131 | 125 |
val <- response[2] |
132 |
- if(error!=0) |
|
126 |
+ if(error) |
|
133 | 127 |
stop(val) |
134 | 128 |
else |
135 | 129 |
GMQLDataset(val) |
... | ... |
@@ -40,7 +40,7 @@ execute <- function() |
40 | 40 |
response <- WrappeR$execute() |
41 | 41 |
error <- strtoi(response[1]) |
42 | 42 |
val <- response[2] |
43 |
- if(error!=0) |
|
43 |
+ if(error) |
|
44 | 44 |
stop(val) |
45 | 45 |
else |
46 | 46 |
{ |
... | ... |
@@ -62,13 +62,13 @@ execute <- function() |
62 | 62 |
remote <- WrappeR$is_remote_processing() |
63 | 63 |
if(remote) |
64 | 64 |
{ |
65 |
- sapply(data_list,function(x){ |
|
65 |
+ lapply(data_list,function(x){ |
|
66 | 66 |
if(!is.null(x[[1]]) && !is.na(x[[1]])) |
67 | 67 |
upload_dataset(url,x[[2]],x[[1]],x[[3]],FALSE)}) |
68 | 68 |
} |
69 | 69 |
else |
70 | 70 |
{ |
71 |
- sapply(data_list,function(x){ |
|
71 |
+ lapply(data_list,function(x){ |
|
72 | 72 |
if(!is.null(x[[2]]) && !is.na(x[[2]])) |
73 | 73 |
download_dataset(url,x[[2]],x[[1]])}) |
74 | 74 |
} |
... | ... |
@@ -147,7 +147,7 @@ gmql_materialize <- function(input_data, dir_out, name) |
147 | 147 |
response <- WrappeR$materialize(input_data, res_dir_out) |
148 | 148 |
error <- strtoi(response[1]) |
149 | 149 |
val <- response[2] |
150 |
- if(error!=0) |
|
150 |
+ if(error) |
|
151 | 151 |
stop(val) |
152 | 152 |
else |
153 | 153 |
invisible(NULL) |
... | ... |
@@ -204,7 +204,7 @@ setMethod("take", "GMQLDataset", |
204 | 204 |
gmql_take(ptr_data, rows) |
205 | 205 |
}) |
206 | 206 |
|
207 |
-gmql_take <- function(input_data, rows = 0L) |
|
207 |
+gmql_take <- function(input_data, rows) |
|
208 | 208 |
{ |
209 | 209 |
rows <- as.integer(rows[1]) |
210 | 210 |
if(rows<0) |
... | ... |
@@ -214,7 +214,7 @@ gmql_take <- function(input_data, rows = 0L) |
214 | 214 |
response <- WrappeR$take(input_data, rows) |
215 | 215 |
error <- strtoi(response[1]) |
216 | 216 |
data <- response[2] |
217 |
- if(error!=0) |
|
217 |
+ if(error) |
|
218 | 218 |
stop(data) |
219 | 219 |
|
220 | 220 |
reg <- .jevalArray(WrappeR$get_reg(),simplify = TRUE) |
... | ... |
@@ -40,7 +40,7 @@ |
40 | 40 |
#' ## antibody_target and cell metadata |
41 | 41 |
#' ## attributes. |
42 | 42 |
#' |
43 |
-#' merged = aggregate(exp, condition_evaluation(c("antibody_target","cell"))) |
|
43 |
+#' merged = aggregate(exp, conds(c("antibody_target","cell"))) |
|
44 | 44 |
#' |
45 | 45 |
#' @name aggregate |
46 | 46 |
#' @rdname aggregate |
... | ... |
@@ -72,7 +72,7 @@ gmql_merge <- function(input_data, groupBy) |
72 | 72 |
response <- WrappeR$merge(join_matrix, input_data) |
73 | 73 |
error <- strtoi(response[1]) |
74 | 74 |
val <- response[2] |
75 |
- if(error!=0) |
|
75 |
+ if(error) |
|
76 | 76 |
stop(val) |
77 | 77 |
else |
78 | 78 |
GMQLDataset(val) |
... | ... |
@@ -130,13 +130,12 @@ gmql_order <- function(input_data, metadata_ordering, regions_ordering, |
130 | 130 |
region_matrix, input_data) |
131 | 131 |
error <- strtoi(response[1]) |
132 | 132 |
val <- response[2] |
133 |
- if(error!=0) |
|
133 |
+ if(error) |
|
134 | 134 |
stop(val) |
135 | 135 |
else |
136 | 136 |
GMQLDataset(val) |
137 | 137 |
} |
138 | 138 |
|
139 |
- |
|
140 | 139 |
.ordering_meta <- function(ordering) |
141 | 140 |
{ |
142 | 141 |
order_matrix <- do.call(rbind, ordering) |
... | ... |
@@ -2,7 +2,7 @@ select.GMQLDataset <- function(.data, metadata = NULL, metadata_update = NULL, |
2 | 2 |
all_but_meta = FALSE, regions = NULL, |
3 | 3 |
regions_update = NULL, all_but_reg = FALSE) |
4 | 4 |
{ |
5 |
- data = .data@value |
|
5 |
+ data = value(.data) |
|
6 | 6 |
r_update <- substitute(regions_update) |
7 | 7 |
if(!is.null(r_update)) |
8 | 8 |
{ |
... | ... |
@@ -132,7 +132,7 @@ gmql_project <-function(input_data, metadata, metadata_update, all_but_meta, |
132 | 132 |
metadata <- metadata[!metadata %in% ""] |
133 | 133 |
metadata <- metadata[!duplicated(metadata)] |
134 | 134 |
|
135 |
- if(length(metadata)==0) |
|
135 |
+ if(!length(metadata)) |
|
136 | 136 |
metadata <- .jnull("java/lang/String") |
137 | 137 |
|
138 | 138 |
metadata <- .jarray(metadata) |
... | ... |
@@ -148,7 +148,7 @@ gmql_project <-function(input_data, metadata, metadata_update, all_but_meta, |
148 | 148 |
regions = regions[!regions %in% ""] |
149 | 149 |
regions = regions[!duplicated(regions)] |
150 | 150 |
|
151 |
- if(length(regions)==0) |
|
151 |
+ if(!length(regions)) |
|
152 | 152 |
regions <- .jnull("java/lang/String") |
153 | 153 |
|
154 | 154 |
regions <- .jarray(regions) |
... | ... |
@@ -171,13 +171,13 @@ gmql_project <-function(input_data, metadata, metadata_update, all_but_meta, |
171 | 171 |
regions, regions_update, all_but_reg, input_data) |
172 | 172 |
error <- strtoi(response[1]) |
173 | 173 |
val <- response[2] |
174 |
- if(error!=0) |
|
174 |
+ if(error) |
|
175 | 175 |
stop(val) |
176 | 176 |
else |
177 | 177 |
GMQLDataset(val) |
178 | 178 |
} |
179 | 179 |
|
180 |
-.trasform_update <- function(predicate=NULL) |
|
180 |
+.trasform_update <- function(predicate) |
|
181 | 181 |
{ |
182 | 182 |
predicate <- gsub("list\\(","",predicate) |
183 | 183 |
predicate <- gsub("\\)$","",predicate) |
... | ... |
@@ -83,7 +83,7 @@ read_dataset <- function(dataset, parser = "CustomParser", is_local=TRUE, |
83 | 83 |
|
84 | 84 |
schema_XML <- list.files(dataset, pattern = "*.schema$", |
85 | 85 |
full.names = TRUE) |
86 |
- if(length(schema_XML) == 0) |
|
86 |
+ if(!length(schema_XML)) |
|
87 | 87 |
stop("schema must be present") |
88 | 88 |
|
89 | 89 |
schema_matrix <- .jnull("java/lang/String") |
... | ... |
@@ -99,12 +99,12 @@ read_dataset <- function(dataset, parser = "CustomParser", is_local=TRUE, |
99 | 99 |
stop("You have to log on using login function") |
100 | 100 |
|
101 | 101 |
list <- show_schema(url,dataset) |
102 |
- schema_names <- sapply(list$fields, function(x){x$name}) |
|
103 |
- schema_type <- sapply(list$fields, function(x){x$type}) |
|
102 |
+ schema_names <- vapply(list$fields, function(x){x$name},character(1)) |
|
103 |
+ schema_type <- vapply(list$fields, function(x){x$type},character(1)) |
|
104 | 104 |
schema_matrix <- cbind(schema_type,schema_names) |
105 | 105 |
#schema_type <- list$type |
106 | 106 |
|
107 |
- if(is.null(schema_matrix) || length(schema_matrix)==0) |
|
107 |
+ if(is.null(schema_matrix) || !length(schema_matrix)) |
|
108 | 108 |
schema_matrix <- .jnull("java/lang/String") |
109 | 109 |
else |
110 | 110 |
schema_matrix <- .jarray(schema_matrix, dispatch = TRUE) |
... | ... |
@@ -116,7 +116,7 @@ read_dataset <- function(dataset, parser = "CustomParser", is_local=TRUE, |
116 | 116 |
schema_matrix, schema_XML) |
117 | 117 |
error <- strtoi(response[1]) |
118 | 118 |
data <- response[2] |
119 |
- if(error!=0) |
|
119 |
+ if(error) |
|
120 | 120 |
stop(data) |
121 | 121 |
else |
122 | 122 |
GMQLDataset(data) |
... | ... |
@@ -138,11 +138,12 @@ read <- function(samples) |
138 | 138 |
stop("only GrangesList") |
139 | 139 |
|
140 | 140 |
meta <- S4Vectors::metadata(samples) |
141 |
- if(is.null(meta) || length(meta)==0) |
|
141 |
+ if(is.null(meta) || !length(meta)) |
|
142 | 142 |
{ |
143 | 143 |
#repeat meta for each sample in samples list |
144 | 144 |
len <- length(samples) |
145 |
- warning("No metadata.\nWe provide two metadata for you") |
|
145 |
+ warning("No metadata.\nWe provide two metadata for you: |
|
146 |
+ \n1.provider = PoliMi\n2.application = RGMQL\n") |
|
146 | 147 |
index_meta <- rep(seq_len(len),each = len) |
147 | 148 |
rep_meta <- rep(c("provider","PoliMi", "application", "RGMQL"), |
148 | 149 |
times = len) |
... | ... |
@@ -160,10 +161,11 @@ read <- function(samples) |
160 | 161 |
|
161 | 162 |
df <- data.frame(samples) |
162 | 163 |
df <- df[-2] #delete group_name |
163 |
- region_matrix <- as.matrix(sapply(df, as.character)) |
|
164 |
+ len_df <- dim(df)[1] # number of rows |
|
165 |
+ region_matrix <- as.matrix(vapply(df, as.character,character(len_df))) |
|
164 | 166 |
region_matrix[is.na(region_matrix)] <- "NA" |
165 | 167 |
region_matrix <- region_matrix[,setdiff(colnames(region_matrix),"width")] |
166 |
- col_types <- sapply(df,class) |
|
168 |
+ col_types <- vapply(df,class,character(1)) |
|
167 | 169 |
col_names <- names(col_types) |
168 | 170 |
#re order the schema? |
169 | 171 |
if("phase" %in% col_names) # if GTF, change |
... | ... |
@@ -98,7 +98,7 @@ filter.GMQLDateset <- function(.data, m_predicate = NULL, r_predicate = NULL, |
98 | 98 |
#' ## less than 0.01 are conserved in output |
99 | 99 |
#' |
100 | 100 |
#' jun_tf <- filter(data, antibody_target == "JUN", pValue < 0.01, |
101 |
-#' semijoin(join_data, TRUE, list(DF("cell")))) |
|
101 |
+#' semijoin(join_data, TRUE, conds("cell"))) |
|
102 | 102 |
#' |
103 | 103 |
#' |
104 | 104 |
#' @name filter |
... | ... |
@@ -125,7 +125,7 @@ gmql_select <- function(input_data, predicate, region_predicate, s_join) |
125 | 125 |
input_data) |
126 | 126 |
error <- strtoi(response[1]) |
127 | 127 |
data <- response[2] |
128 |
- if(error!=0) |
|
128 |
+ if(error) |
|
129 | 129 |
stop(data) |
130 | 130 |
else |
131 | 131 |
GMQLDataset(data) |
... | ... |
@@ -137,6 +137,7 @@ gmql_select <- function(input_data, predicate, region_predicate, s_join) |
137 | 137 |
#' This function is used as support to the filter method to define |
138 | 138 |
#' semijoin conditions on metadata |
139 | 139 |
#' |
140 |
+#' |
|
140 | 141 |
#' @param .data GMQLDataset class object |
141 | 142 |
#' |
142 | 143 |
#' @param not_in logical value: TRUE => for a given sample of input dataset |
... | ... |
@@ -184,39 +185,44 @@ gmql_select <- function(input_data, predicate, region_predicate, s_join) |
184 | 185 |
#' # less than 0.01 are conserved in output |
185 | 186 |
#' |
186 | 187 |
#' jun_tf <- filter(data, antibody_target == "JUN", pValue < 0.01, |
187 |
-#' semijoin(join_data, TRUE, list(DF("cell")))) |
|
188 |
+#' semijoin(join_data, TRUE, conds("cell"))) |
|
188 | 189 |
#' |
189 | 190 |
#' @return semijoin condition as list |
190 | 191 |
#' @export |
191 | 192 |
#' |
192 |
-semijoin <- function(.data, not_in = FALSE, groupBy = NULL) |
|
193 |
+semijoin <- function(.data, not_in = FALSE, groupBy) |
|
193 | 194 |
{ |
194 |
- if(!is.list(groupBy)) |
|
195 |
- stop("groupBy: must be a list") |
|
196 |
- |
|
197 |
- semij_cond = groupBy |
|
195 |
+ if(!is.null(groupBy)) |
|
196 |
+ { |
|
197 |
+ if("condition" %in% names(groupBy)) |
|
198 |
+ { |
|
199 |
+ cond <- .join_condition(groupBy) |
|
200 |
+ if(is.null(cond)) |
|
201 |
+ stop("groupBy cannot be NULL") |
|
202 |
+ } |
|
203 |
+ else |
|
204 |
+ stop("use function conds()") |
|
205 |
+ } |
|
206 |
+ else |
|
207 |
+ stop("groupBy cannot be NULL") |
|
198 | 208 |
|
199 |
- if(is.null(data)) |
|
200 |
- stop("data cannot be NULL") |
|
209 |
+ if(is.null(.data)) |
|
210 |
+ stop(".data cannot be NULL") |
|
201 | 211 |
|
202 |
- if(!isClass("GMQLDataset", data)) |
|
212 |
+ if(!isClass("GMQLDataset", .data)) |
|
203 | 213 |
stop("data: Must be a GMQLDataset object") |
204 | 214 |
|
205 | 215 |
.check_logical(not_in) |
206 | 216 |
ptr_data <- value(.data) |
207 |
- |
|
208 | 217 |
data_cond <- cbind(ptr_data,not_in) |
209 |
- cond <- .join_condition(semij_cond) |
|
210 |
- cond <- rbind(data_cond,cond) |
|
211 |
- join_condition_matrix <- .jarray(cond, dispatch = TRUE) |
|
218 |
+ all_conds <- rbind(data_cond,cond) |
|
219 |
+ join_condition_matrix <- .jarray(all_conds, dispatch = TRUE) |
|
212 | 220 |
|
213 | 221 |
semijoin <- list("semijoin" = join_condition_matrix) |
214 | 222 |
} |
215 | 223 |
|
216 | 224 |
|
217 |
- |
|
218 |
- |
|
219 |
-.trasform <- function(predicate=NULL) |
|
225 |
+.trasform <- function(predicate) |
|
220 | 226 |
{ |
221 | 227 |
predicate <- gsub("&|&&","AND",predicate) |
222 | 228 |
predicate <- gsub("\\||\\|\\|","OR",predicate) |
... | ... |
@@ -4,13 +4,41 @@ |
4 | 4 |
.onLoad <- function(libname, pkgname) { |
5 | 5 |
.jpackage(pkgname, lib.loc = libname) |
6 | 6 |
# tools::vignetteEngine("knitr", pattern = "[.]Rmd$", package = "knitr") |
7 |
- .jinit(force.init = TRUE) |
|
7 |
+ initGMQLscalaAPI() |
|
8 | 8 |
} |
9 | 9 |
|
10 | 10 |
.onAttach <- function(libname, pkgname) { |
11 |
- #packageStartupMessage("GMQL successfully loaded") |
|
11 |
+ packageStartupMessage("GMQL successfully loaded") |
|
12 | 12 |
} |
13 | 13 |
|
14 | 14 |
.onUnload <- function(libpath) { |
15 | 15 |
#.rscalaPackageUnload() |
16 | 16 |
} |
17 |
+ |
|
18 |
+ |
|
19 |
+#' @importFrom utils download.file |
|
20 |
+#' @importFrom rJava .jinit .jaddClassPath |
|
21 |
+#' |
|
22 |
+initGMQLscalaAPI <- function(libLoc, mem = "12G") { |
|
23 |
+ # Check if library directory is missing |
|
24 |
+ |
|
25 |
+ # Starting the java engine |
|
26 |
+ .jinit(force.init = TRUE) |
|
27 |
+ if (missing(libLoc)) { |
|
28 |
+ libLoc = system.file("extdata", "java", package = "RGMQLScalaLibs") |
|
29 |
+ } |
|
30 |
+ |
|
31 |
+ path = Sys.glob(paste0(libLoc, "/*.jar")) |
|
32 |
+ available_local_files <- list.files(libLoc, full.names = FALSE, |
|
33 |
+ pattern = "\\.jar$") |
|
34 |
+ |
|
35 |
+ # Check if all the files are there |
|
36 |
+ if (!"GMQL.jar" %in% available_local_files) |
|
37 |
+ stop("GMQL jar not available") |
|
38 |
+ |
|
39 |
+ if (length(path) > 0) |
|
40 |
+ rJava::.jaddClassPath(path) |
|
41 |
+ |
|
42 |
+rJava::.jaddClassPath(dirname(path)) |
|
43 |
+} |
|
44 |
+ |
... | ... |
@@ -53,14 +53,14 @@ DESC <- function(...) |
53 | 53 |
ords <- c(...) |
54 | 54 |
ords = ords[!ords %in% ""] |
55 | 55 |
ords = ords[!duplicated(ords)] |
56 |
- if(length(ords)<=0) |
|
56 |
+ if(!length(ords)) |
|
57 | 57 |
order_matrix <- .jnull("java/lang/String") |
58 | 58 |
else |
59 | 59 |
{ |
60 |
- order_matrix <- t(sapply(ords, function(x) { |
|
60 |
+ order_matrix <- t(vapply(ords, function(x) { |
|
61 | 61 |
new_value = c("DESC",x) |
62 | 62 |
matrix <- matrix(new_value) |
63 |
- })) |
|
63 |
+ },character(2))) |
|
64 | 64 |
} |
65 | 65 |
order_matrix |
66 | 66 |
} |
... | ... |
@@ -75,14 +75,14 @@ ASC <- function(...) |
75 | 75 |
ords <- c(...) |
76 | 76 |
ords = ords[!ords %in% ""] |
77 | 77 |
ords = ords[!duplicated(ords)] |
78 |
- if(length(ords)<=0) |
|
78 |
+ if(!length(ords)) |
|
79 | 79 |
order_matrix <- .jnull("java/lang/String") |
80 | 80 |
else |
81 | 81 |
{ |
82 |
- order_matrix <- t(sapply(ords, function(x) { |
|
82 |
+ order_matrix <- t(vapply(ords, function(x) { |
|
83 | 83 |
new_value = c("ASC",x) |
84 | 84 |
matrix <- matrix(new_value) |
85 |
- })) |
|
85 |
+ },character(2))) |
|
86 | 86 |
} |
87 | 87 |
order_matrix |
88 | 88 |
} |
... | ... |
@@ -82,6 +82,7 @@ login_gmql <- function(url, username = NULL, password = NULL) |
82 | 82 |
{ |
83 | 83 |
assign("authToken",content$authToken,.GlobalEnv) |
84 | 84 |
WrappeR <- J("it/polimi/genomics/r/Wrapper") |
85 |
+ url <- paste0(url,"/") |
|
85 | 86 |
WrappeR$save_tokenAndUrl(authToken,url) |
86 | 87 |
print(paste("your Token is",authToken)) |
87 | 88 |
} |
... | ... |
@@ -793,7 +794,7 @@ upload_dataset <- function(url, datasetName, folderPath, schemaName = NULL, |
793 | 794 |
folderPath <- paste0(folderPath,"/files") |
794 | 795 |
|
795 | 796 |
files <- list.files(folderPath,full.names = TRUE) |
796 |
- if(length(files)==0) |
|
797 |
+ if(!length(files)) |
|
797 | 798 |
stop("no files present") |
798 | 799 |
count = .counter(0) |
799 | 800 |
|
... | ... |
@@ -801,9 +802,9 @@ upload_dataset <- function(url, datasetName, folderPath, schemaName = NULL, |
801 | 802 |
file <- httr::upload_file(x) |
802 | 803 |
}) |
803 | 804 |
|
804 |
- list_files_names <- sapply(list_files, function(x) { |
|
805 |
+ list_files_names <- vapply(list_files, function(x) { |
|
805 | 806 |
paste0("file",count()) |
806 |
- }) |
|
807 |
+ },character(1)) |
|
807 | 808 |
|
808 | 809 |
names(list_files) <- list_files_names |
809 | 810 |
req <- httr::GET(url) |
... | ... |
@@ -817,7 +818,7 @@ upload_dataset <- function(url, datasetName, folderPath, schemaName = NULL, |
817 | 818 |
{ |
818 | 819 |
schema_name <- list.files(folderPath, pattern = "*.schema$", |
819 | 820 |
full.names = TRUE) |
820 |
- if(length(schema_name)==0) |
|
821 |
+ if(!length(schema_name)) |
|
821 | 822 |
stop("schema must be present") |
822 | 823 |
|
823 | 824 |
list_files <- list(list("schema" = httr::upload_file(schema_name)), |
... | ... |
@@ -832,7 +833,7 @@ upload_dataset <- function(url, datasetName, folderPath, schemaName = NULL, |
832 | 833 |
{ |
833 | 834 |
schema_name <- list.files(folderPath, pattern = "*.schema$", |
834 | 835 |
full.names = TRUE) |
835 |
- if(length(schema_name)==0) |
|
836 |
+ if(!length(schema_name)) |
|
836 | 837 |
stop("schema must be present") |
837 | 838 |
|
838 | 839 |
list_files <- list(list("schema" = httr::upload_file(schema_name)), |
... | ... |
@@ -982,9 +983,7 @@ download_as_GRangesList <- function(url,datasetName) |
982 | 983 |
{ |
983 | 984 |
list <- show_samples_list(url,datasetName) |
984 | 985 |
samples <- list$samples |
985 |
- sample_list_name <- sapply(samples, function(x){ |
|
986 |
- name <- x$name |
|
987 |
- }) |
|
986 |
+ sample_list_name <- vapply(samples, function(x) x$name, character(1)) |
|
988 | 987 |
|
989 | 988 |
sampleList <- lapply(samples, function(x){ |
990 | 989 |
name <- x$name |
... | ... |
@@ -1050,7 +1049,7 @@ sample_metadata <- function(url, datasetName,sampleName) |
1050 | 1049 |
#trasform text to list |
1051 | 1050 |
metadata <- strsplit(content, "\n") |
1052 | 1051 |
metadata <- strsplit(unlist(metadata), "\t") |
1053 |
- names(metadata) <- sapply(metadata, `[[`, 1) |
|
1052 |
+ names(metadata) <- vapply(metadata, `[[`, character(1),1) |
|
1054 | 1053 |
listMeta <- lapply(metadata, `[`, -1) |
1055 | 1054 |
|
1056 | 1055 |
if(req$status_code !=200) |
... | ... |
@@ -1126,13 +1125,11 @@ sample_region <- function(url, datasetName,sampleName) |
1126 | 1125 |
temp <- tempfile("temp") #use temporary files |
1127 | 1126 |
write.table(content,temp,quote = FALSE,sep = '\t',col.names = FALSE, |
1128 | 1127 |
row.names = FALSE) |
1129 |
- if(schema_type=="gtf") |
|
1128 |
+ if(identical(schema_type, "gtf")) |
|
1130 | 1129 |
samples <- rtracklayer::import(temp,format = "gtf") |
1131 | 1130 |
else |
1132 | 1131 |
{ |
1133 |
- vector_field <- sapply(list$fields,function(x){ |
|
1134 |
- name <- x$name |
|
1135 |
- }) |
|
1132 |
+ vector_field <- vapply(list$fields,function(x)x$name,character(1)) |
|
1136 | 1133 |
df <- data.table::fread(temp,header = FALSE,sep = "\t") |
1137 | 1134 |
a <- df[1,2] |
1138 | 1135 |
if(is.na(as.numeric(a))) |
1141 | 1138 |
similarity index 97% |
1142 | 1139 |
rename from vignettes/my-vignette.Rmd |
1143 | 1140 |
rename to vignettes/RGMQL-vignette.Rmd |
... | ... |
@@ -7,7 +7,7 @@ date: "`r Sys.Date()`" |
7 | 7 |
bibliography: bibliography.bib |
8 | 8 |
output: BiocStyle::pdf_document |
9 | 9 |
vignette: > |
10 |
- %\VignetteIndexEntry{Vignette Title} |
|
10 |
+ %\VignetteIndexEntry{RGMQL: GenoMetric Query Language for R/Bioconductor} |
|
11 | 11 |
%\VignetteEngine{knitr::rmarkdown} |
12 | 12 |
%\VignetteEncoding{UTF-8} |
13 | 13 |
link-citations: true |
... | ... |
@@ -218,12 +218,12 @@ CPUs/system while managing datasets (both GMQL or generic text plain datasets). |
218 | 218 |
### Initialization |
219 | 219 |
|
220 | 220 |
Load and attach the RGMQL package in a R session using library function: |
221 |
-```{r, initialization, eval = TRUE} |
|
221 |
+```{r, initialization} |
|
222 | 222 |
library('RGMQL') |
223 | 223 |
``` |
224 | 224 |
Before starting using any GMQL operation we need to initialise the GMQL |
225 | 225 |
context with the following code: |
226 |
-```{r, init, eval = TRUE} |
|
226 |
+```{r, init} |
|
227 | 227 |
init_gmql() |
228 | 228 |
``` |
229 | 229 |
The function *init_gmql()* initializes the context of scalable data management |
... | ... |
@@ -245,7 +245,7 @@ A local dataset is a folder with sample files (region files and correspondent |
245 | 245 |
metadata files) on the user computer. |
246 | 246 |
As data are already in the user computer, we simply execute: |
247 | 247 |
|
248 |
-```{r, read GMQL dataset, eval = TRUE} |
|
248 |
+```{r, read GMQL dataset} |
|
249 | 249 |
gmql_dataset_path <- system.file("example", "EXON", package = "RGMQL") |
250 | 250 |
data_out = read_dataset(gmql_dataset_path) |
251 | 251 |
``` |
... | ... |
@@ -260,7 +260,7 @@ For better integration in the R environment and with other R packages, |
260 | 260 |
we provide the *read()* function to read directly from R memory/environment |
261 | 261 |
using GRangesList as input. |
262 | 262 |
|
263 |
-```{r, read GRangesList, eval = TRUE} |
|
263 |
+```{r, read GRangesList} |
|
264 | 264 |
library("GenomicRanges") |
265 | 265 |
gr1 <- GRanges(seqnames = "chr2", |
266 | 266 |
ranges = IRanges(103, 106), strand = "+", score = 5L, GC = 0.45) |
... | ... |
@@ -294,7 +294,7 @@ Consider mutation data samples of human breast cancer cases. |
294 | 294 |
For each sample, quantify the mutations in each exon and select the exons |
295 | 295 |
with at least one mutation. Return the list of samples ordered by |
296 | 296 |
the number of such exons. |
297 |
-```{r, query, eval = TRUE} |
|
297 |
+```{r, query} |
|
298 | 298 |
|
299 | 299 |
# These statements define the paths to the folders "EXON" and "MUT" in the |
300 | 300 |
# subdirectory "example" of the package "RGMQL" |
... | ... |
@@ -341,7 +341,7 @@ exon_res = arrange(exon3, list(DESC("exon_count"))) |
341 | 341 |
|
342 | 342 |
If you want to store persistently the result, you can materialize it into |
343 | 343 |
specific path defined as input parameter. |
344 |
-```{r, materialize, eval = TRUE} |
|
344 |
+```{r, materialize} |
|
345 | 345 |
# Materialize the result dataset on disk |
346 | 346 |
collect(exon_res) |
347 | 347 |
``` |
... | ... |
@@ -363,7 +363,7 @@ After the execution, the context of scalable data management engine is stopped |
363 | 363 |
and a new invocation of *init_gmql()* is needed. |
364 | 364 |
|
365 | 365 |
Beside *execute()* we can use: |
366 |
-```{r, take, eval = TRUE} |
|
366 |
+```{r, take} |
|
367 | 367 |
g <- take(exon_res, rows = 45) |
368 | 368 |
``` |
369 | 369 |
to execute all *collect()* commands in the RGMQL query and extract data |
... | ... |
@@ -405,7 +405,7 @@ with user and password, or as guest. |
405 | 405 |
Upon successful logon, you get a request token that you must use |
406 | 406 |
in every subsequent REST call. |
407 | 407 |
Login can be performed using the function: |
408 |
-```{r, eval = TRUE} |
|
408 |
+```{r, init with login} |
|
409 | 409 |
test_url = "http://genomic.deib.polimi.it/gmql-rest-r/" |
410 | 410 |
login_gmql(test_url) |
411 | 411 |
``` |
... | ... |
@@ -455,7 +455,7 @@ download_dataset(test_url, name_dataset) |
455 | 455 |
``` |
456 | 456 |
|
457 | 457 |
Once download is done, we can logout from remote repository using: |
458 |
-```{r, logout, eval = TRUE} |
|
458 |
+```{r, logout} |
|
459 | 459 |
logout_gmql(test_url) |
460 | 460 |
``` |
461 | 461 |
*logout_gmql()* deletes the $authToken$ from R environment. |
... | ... |
@@ -473,13 +473,13 @@ infrastructure with login function: |
473 | 473 |
login_gmql(test_url) |
474 | 474 |
``` |
475 | 475 |
Otherwise, we can initialize the data engine with a remote url: |
476 |
-```{r, initialize remote, eval = TRUE} |
|
476 |
+```{r, initialize remote} |
|
477 | 477 |
init_gmql(url = test_url) |
478 | 478 |
``` |
479 | 479 |
in this way login is automatically performed as specified above |
480 | 480 |
|
481 | 481 |
After initialization, we can start building our query: |
482 |
-```{r, remote query, eval = TRUE} |
|
482 |
+```{r, remote query} |
|
483 | 483 |
|
484 | 484 |
## Read the remote dataset HG19_TCGA_dnaseq |
485 | 485 |
## Read the remote dataset HG19_BED_ANNOTATION |
... | ... |
@@ -530,7 +530,7 @@ data management engine is not stopped, and can be used for further queries. |
530 | 530 |
## Mixed Processing |
531 | 531 |
|
532 | 532 |
The processing flavour can be switched using the function: |
533 |
-```{r, switch mode, eval = TRUE} |
|
533 |
+```{r, switch mode} |
|
534 | 534 |
remote_processing(TRUE) |
535 | 535 |
``` |
536 | 536 |
An user can switch processing mode until the first *collect()* has been performed. |
... | ... |
@@ -538,7 +538,7 @@ An user can switch processing mode until the first *collect()* has been performe |
538 | 538 |
This kind of processing comes from the fact that the *read()* function can |
539 | 539 |
accept either a local dataset or a remote repository dataset, |
540 | 540 |
even in the same query as in the following example: |
541 |
-```{r, mixed query, eval = TRUE} |
|
541 |
+```{r, mixed query} |
|
542 | 542 |
|
543 | 543 |
|
544 | 544 |
# This statement defines the path to the folder "MUT" in the subdirectory |
... | ... |
@@ -613,7 +613,7 @@ packages. |
613 | 613 |
## Import/Export |
614 | 614 |
|
615 | 615 |
We can import a GMQL dataset into R environment as follows: |
616 |
-```{r, import, eval = TRUE} |
|
616 |
+```{r, import} |
|
617 | 617 |
# This statement defines the path to the folder "EXON" in the subdirectory |
618 | 618 |
# "example" of the package "RGMQL" |
619 | 619 |
|
... | ... |
@@ -627,7 +627,7 @@ data |
627 | 627 |
the second parameter *is_gtf* must specify the file format: .GTF or .GDM. |
628 | 628 |
|
629 | 629 |
We can export a GRangesList as GMQL dataset as follows: |
630 |
-```{r, export, eval = TRUE} |
|
630 |
+```{r, export} |
|
631 | 631 |
# This statement defines the path to the subdirectory "example" of the |
632 | 632 |
# package "RGMQL" |
633 | 633 |
|
... | ... |
@@ -644,7 +644,7 @@ the second parameter *is_gtf* specifies the file format: .GTF or .GDM. |
644 | 644 |
|
645 | 645 |
We can also import only a part of a GMQL dataset into R environment, |
646 | 646 |
by filtering its content as follows: |
647 |
-```{r, filter_extract, eval = TRUE} |
|
647 |
+```{r, filter_extract} |
|
648 | 648 |
# This statement defines the path to the folder "TEAD" in the subdirectory |
649 | 649 |
# "example" of the package "RGMQL" |
650 | 650 |
|