43d9e4a1 |
#' GMQL Function: EXECUTE
#'
|
83eb0624 |
#' It executes GMQL query.
|
5b358a31 |
#' The function works only after invoking at least one collect
|
43d9e4a1 |
#'
#' @importFrom rJava J
#'
#' @return None
#'
#' @examples
|
18697071 |
#' ## This statement initializes and runs the GMQL server for local execution
|
83eb0624 |
#' ## and creation of results on disk. Then, with system.file() it defines
|
18697071 |
#' ## the path to the folder "DATASET" in the subdirectory "example"
|
83eb0624 |
#' ## of the package "RGMQL" and opens such folder as a GMQL dataset
#' ## named "data"
#'
|
94a33e57 |
#' init_gmql()
|
18697071 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL")
#' data = read_gmql(test_path)
|
83eb0624 |
#'
|
18697071 |
#' ## The following statement materializes the dataset "data", previoulsy read,
#' ## at the specific destination test_path into local folder "ds1" opportunely
#' ## created
|
83eb0624 |
#'
#' collect(data, dir_out = test_path)
|
43d9e4a1 |
#'
|
83eb0624 |
#' ## This statement executes GMQL query.
|
43d9e4a1 |
#' \dontrun{
|
83eb0624 |
#'
|
43d9e4a1 |
#' execute()
#' }
#' @export
#'
|
73652f4d |
execute <- function() {
WrappeR <- J("it/polimi/genomics/r/Wrapper")
remote_proc <- WrappeR$is_remote_processing()
datasets <- .jevalArray(WrappeR$get_dataset_list(), simplify = TRUE)
if(!remote_proc)
.download_or_upload(datasets)
response <- WrappeR$execute()
error <- strtoi(response[1])
val <- response[2]
if(error)
stop(val)
else {
if(remote_proc) {
isGTF <- FALSE
outformat <- WrappeR$outputMaterialize()
if(identical(outformat, "gtf"))
isGTF <- TRUE
url <- WrappeR$get_url()
.download_or_upload(datasets)
res <- serialize_query(url,isGTF,val)
|
43d9e4a1 |
}
|
73652f4d |
}
|
43d9e4a1 |
}
|
c9b073a5 |
.download_or_upload <- function(datasets) {
|
73652f4d |
WrappeR <- J("it/polimi/genomics/r/Wrapper")
data_list <- apply(datasets, 1, as.list)
url <- WrappeR$get_url()
remote <- WrappeR$is_remote_processing()
if(remote) {
lapply(data_list,function(x){
if(!is.null(x[[1]]) && !is.na(x[[1]]))
|
709330ce |
upload_dataset(url,x[[2]],x[[1]],x[[3]])})
|
73652f4d |
} else {
lapply(data_list,function(x){
if(!is.null(x[[2]]) && !is.na(x[[2]]))
download_dataset(url,x[[2]],x[[1]])})
}
|
43d9e4a1 |
}
|
4ad24353 |
collect.GMQLDataset <- function(x, dir_out = getwd(), name = "ds1") {
|
73652f4d |
ptr_data <- value(x)
gmql_materialize(ptr_data, dir_out, name)
|
e5131ba8 |
}
|
07f76a54 |
|
5b358a31 |
#' Method collect
|
43d9e4a1 |
#'
|
e5131ba8 |
#' @description Wrapper to GMQL MATERIALIZE operator
#'
#' @description It saves the content of a dataset that contains samples
#' metadata and regions. It is normally used to persist the content of any
#' dataset generated during a GMQL query.
|
94a33e57 |
#' Any dataset can be materialized, but the operation can be time-consuming.
|
43d9e4a1 |
#' For best performance, materialize the relevant data only.
#'
#' @importFrom rJava J
|
e5131ba8 |
#' @importFrom dplyr collect
|
43d9e4a1 |
#'
|
fce655ee |
#' @param x GMQLDataset class object
|
73652f4d |
#' @param name name of the result dataset. By default it is the string "ds1"
|
18697071 |
#' @param dir_out destination folder path. By default it is the current
#' working directory of the R process
|
65260a4d |
#'
|
e5131ba8 |
#' @details
#'
#' An error occures if the directory already exist at the destination
#' folder path
|
65260a4d |
#'
|
43d9e4a1 |
#' @return None
#'
#' @examples
|
e5131ba8 |
#'
#' ## This statement initializes and runs the GMQL server for local execution
#' ## and creation of results on disk. Then, with system.file() it defines
#' ## the path to the folder "DATASET" in the subdirectory "example"
#' ## of the package "RGMQL" and opens such file as a GMQL dataset named
|
dda0cfa0 |
#' ## "data" using CustomParser
|
43d9e4a1 |
#'
|
94a33e57 |
#' init_gmql()
|
e5131ba8 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL")
|
18697071 |
#' data = read_gmql(test_path)
|
e5131ba8 |
#'
|
18697071 |
#' ## The following statement materializes the dataset 'data', previoulsy read,
#' ## at the specific destination test_path into local folder "ds1" opportunely
#' ## created
|
e5131ba8 |
#'
#' collect(data, dir_out = test_path)
|
43d9e4a1 |
#'
|
e5131ba8 |
#' @name collect
#' @rdname collect
#' @aliases collect,GMQLDataset-method
|
fce655ee |
#' @aliases collect-method
|
43d9e4a1 |
#' @export
|
e5131ba8 |
setMethod("collect", "GMQLDataset",collect.GMQLDataset)
|
07f76a54 |
|
73652f4d |
gmql_materialize <- function(input_data, name, dir_out) {
WrappeR <- J("it/polimi/genomics/r/Wrapper")
remote_proc <- WrappeR$is_remote_processing()
if(grepl("\\.",name))
stop("dataset name cannot contains dot")
if(!remote_proc) {
dir_out <- sub("/*[/]$","",dir_out)
res_dir_out <- file.path(dir_out,name)
if(!dir.exists(res_dir_out))
dir.create(res_dir_out)
}
else
res_dir_out <- name
response <- WrappeR$materialize(input_data, res_dir_out)
error <- strtoi(response[1])
val <- response[2]
if(error)
stop(val)
else
invisible(NULL)
|
43d9e4a1 |
}
|
5b358a31 |
#' Method take
|
43d9e4a1 |
#'
|
83eb0624 |
#' It saves the content of a dataset that contains samples metadata
|
dda0cfa0 |
#' and regions as GRangesList.
|
83eb0624 |
#' It is normally used to store in memory the content of any dataset
|
5b358a31 |
#' generated during a GMQL query. The operation can be very time-consuming.
|
dda0cfa0 |
#' If you invoked any materialization before take function,
|
83eb0624 |
#' all those datasets are materialized as folders.
|
43d9e4a1 |
#'
|
83eb0624 |
#' @importFrom GenomicRanges makeGRangesFromDataFrame
#' @importFrom S4Vectors metadata
|
43d9e4a1 |
#' @importFrom stats setNames
|
83eb0624 |
#' @importFrom rJava J .jevalArray
#' @importFrom GenomicRanges GRangesList
|
43d9e4a1 |
#'
|
f345b6e2 |
#' @param .data returned object from any GMQL function
|
18697071 |
#' @param rows number of regions rows for each sample that you want to
|
83eb0624 |
#' retrieve and store in memory.
|
18697071 |
#' By default it is 0, that means take all rows for each sample
|
65260a4d |
#'
|
83eb0624 |
#' @param ... Additional arguments for use in other specific methods of the
#' generic take function
|
65260a4d |
#'
|
83eb0624 |
#' @return GRangesList with associated metadata
|
43d9e4a1 |
#'
#' @examples
|
83eb0624 |
#' ## This statement initializes and runs the GMQL server for local execution
#' ## and creation of results on disk. Then, with system.file() it defines
#' ## the path to the folder "DATASET" in the subdirectory "example"
#' ## of the package "RGMQL" and opens such folder as a GMQL dataset
|
dda0cfa0 |
#' ## named "rd" using CustomParser
|
83eb0624 |
#'
|
94a33e57 |
#' init_gmql()
#' test_path <- system.file("example", "DATASET", package = "RGMQL")
|
18697071 |
#' rd = read_gmql(test_path)
#'
|
dda0cfa0 |
#' ## This statement creates a dataset called 'aggr' which contains one
|
18697071 |
#' ## sample for each antibody_target and cell value found within the metadata
|
dda0cfa0 |
#' ## of the 'rd' dataset sample; each created sample contains all regions
#' ## from all 'rd' samples with a specific value for their
|
18697071 |
#' ## antibody_target and cell metadata attributes.
#'
#' aggr = aggregate(rd, conds(c("antibody_target", "cell")))
#'
|
dda0cfa0 |
#' ## This statement performs the query and returns the resulted dataset as
|
18697071 |
#' ## GRangesList named 'taken'. It returns only the first 45 regions of
|
dda0cfa0 |
#' ## each sample present into GRangesList and all the medatata associated
#' ## with each sample
|
83eb0624 |
#'
|
65260a4d |
#' taken <- take(aggr, rows = 45)
|
43d9e4a1 |
#'
|
e5131ba8 |
#' @name take
#' @rdname take
|
65260a4d |
#' @aliases take-method
|
43d9e4a1 |
#' @export
|
65260a4d |
setMethod("take", "GMQLDataset",
|
73652f4d |
function(.data, rows = 0L)
{
ptr_data <- value(.data)
gmql_take(ptr_data, rows)
})
|
65260a4d |
|
4ad24353 |
gmql_take <- function(input_data, rows) {
|
73652f4d |
rows <- as.integer(rows[1])
if(rows<0)
stop("rows cannot be negative")
WrappeR <- J("it/polimi/genomics/r/Wrapper")
response <- WrappeR$take(input_data, rows)
error <- strtoi(response[1])
data <- response[2]
if(error)
stop(data)
reg <- .jevalArray(WrappeR$get_reg(),simplify = TRUE)
if(is.null(reg))
stop("no regions defined")
meta <- .jevalArray(WrappeR$get_meta(),simplify = TRUE)
if(is.null(meta))
stop("no metadata defined")
schema <- .jevalArray(WrappeR$get_schema(),simplify = TRUE)
if(is.null(schema))
stop("no schema defined")
reg_data_frame <- as.data.frame(reg)
if (!length(reg_data_frame)){
return(GRangesList())
}
list <- split(reg_data_frame, reg_data_frame[1])
seq_name <- c("seqname","start","end","strand",schema)
sampleList <- lapply(list, function(x){
x <- x[-1]
names(x) <- seq_name
|
4ad24353 |
# start_numeric = as.numeric(levels(x$start))[x$start]
|
73652f4d |
start_numeric = as.numeric(x$start)
start_numeric = start_numeric + 1
x$start = start_numeric
#levels(x$start)[x$start] = start_numeric
g <- GenomicRanges::makeGRangesFromDataFrame(
x,
keep.extra.columns = TRUE,
start.field = "start",
end.field = "end")
})
gRange_list <- GRangesList(sampleList)
len = length(gRange_list)
names(gRange_list) <- paste0("S_",seq_len(len))
meta_list <- .metadata_from_frame_to_list(meta)
names(meta_list) <- paste0("S_",seq_len(len))
S4Vectors::metadata(gRange_list) <- meta_list
return(gRange_list)
|
43d9e4a1 |
}
|
4ad24353 |
.metadata_from_frame_to_list <- function(metadata_frame) {
|
73652f4d |
meta_frame <- as.data.frame(metadata_frame)
list <- split(meta_frame, meta_frame[1])
name_value_list <- lapply(list, function(x){x <- x[-1]})
meta_list <- lapply(name_value_list, function(x){
stats::setNames(as.list(as.character(x[[2]])), x[[1]])
})
|
43d9e4a1 |
}
|