#' Method cover
#'
#' It takes as input a dataset containing one or more samples and returns 
#' another dataset (with a single sample, if no \emph{groupBy} option is 
#' specified) by “collapsing” the input dataset samples and their regions 
#' according to certain rules specified by the input parameters.
#' The attributes of the output genomic regions are only the region 
#' coordinates, and Jaccard indexes (\emph{JaccardIntersect} and 
#' \emph{JaccardResult}).
#' Jaccard Indexes are standard measures of similarity of the contributing 
#' regions, added as default region attributes.
#' The JaccardIntersect index is calculated as the ratio between the lengths 
#' of the intersection and of the union of the contributing regions; 
#' the JaccardResult index is calculated as the ratio between the lengths 
#' of the result and the union of the contributing regions.
#' If aggregate functions are specified, a new region attribute is added for 
#' each aggregate function specified.
#' Output metadata are the union of the input ones.
#' If \emph{groupBy} clause is specified, the input samples are partitioned 
#' in groups, each with distinct values of the grouping metadata attributes, 
#' and the \emph{cover} operation is separately applied to each group, 
#' yielding to one sample in the result for each group.
#' Input samples that do not satisfy the \emph{groupBy} condition 
#' are disregarded.
#' 
#' @include AllClasses.R
#' @importFrom methods is
#' @importFrom rJava J .jnull .jarray
#' 
#' @param .data GMQLDataset class object
#' @param min_acc minimum number of overlapping regions to be considered 
#' during execution. It is an integer number, declared also as string.
#' minAcc accepts also:
#' \itemize{
#' \item{PARAMETER class object: \code{\link{ALL}}, that represents the number 
#' of samples in the input dataset}
#' \item{an expression built using PARAMETER object: (ALL() + N) / K or
#' ALL() / K, with N and K integer values }
#' }
#' @param max_acc maximum number of overlapping regions to be considered 
#' during execution. It is an integer number, declared also as string.
#' maxAcc accept also:
#' \itemize{
#' \item{PARAMETER class object: \code{\link{ALL}}, that represents the number 
#' of samples in the input dataset}
#' \item{PARAMETER class object: \code{\link{ANY}}}, that acts as a wildcard, 
#' considering any amount of overlapping regions.
#' \item{an expression built using PARAMETER object: (ALL() + N) / K or
#' ALL() / K, with N and K integer values  }
#' }
#' @param groupBy \code{\link{conds}} function to support methods with 
#' groupBy or JoinBy input parameter
#' 
#' @param ... a series of expressions separated by comma in the form 
#' \emph{key} = \emph{aggregate}. The \emph{aggregate} is an object of 
#' class AGGREGATES. The aggregate functions available are: \code{\link{SUM}}, 
#' \code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, 
#' \code{\link{AVG}}, \code{\link{MEDIAN}}, \code{\link{STD}}, 
#' \code{\link{BAG}}, \code{\link{BAGD}}, \code{\link{Q1}}, 
#' \code{\link{Q2}}, \code{\link{Q3}}.
#' Every aggregate accepts a string value, except for COUNT, which does not 
#' have any value.
#' Argument of 'aggregate function' must exist in schema, i.e. among region 
#' attributes. Two styles are allowed:
#' \itemize{
#' \item list of key-value pairs: e.g. sum = SUM("pvalue")
#' \item list of values: e.g. SUM("pvalue")
#' }
#' "mixed style" is not allowed
#'
#' @param variation string identifying the cover GMQL operator variation.
#' The admissible strings are:
#' \itemize{
#' \item{FLAT: It returns the regions that start from the first end and stop 
#' at the last end of the regions which would contribute to each region 
#' of the \emph{cover}.}
#' \item{SUMMIT: It returns regions that start from a position
#' where the number of intersecting regions is not increasing afterwards and
#' stop at a position where either the number of intersecting regions 
#' decreases, or it violates the max accumulation index.}
#' \item{HISTOGRAM: It returns the non-overlapping regions contributing to 
#' the \emph{cover}, each with its accumulation index value, which is assigned 
#' to the \emph{AccIndex} region attribute.}
#' \item{COVER: default value.}
#' }
#' It can be all caps or lowercase
#' 
#' @return GMQLDataset object. It contains the value to use as input 
#' for the subsequent GMQLDataset method
#' 
#' @examples
#' 
#' ## This statement initializes and runs the GMQL server for local execution 
#' ## and creation of results on disk. Then, with system.file() it defines 
#' ## the path to the folder "DATASET" in the subdirectory "example"
#' ## of the package "RGMQL" and opens such file as a GMQL dataset named "exp" 
#' ## using CustomParser
#' 
#' init_gmql()
#' test_path <- system.file("example", "DATASET", package = "RGMQL")
#' exp = read_gmql(test_path)
#'   
#' ## The following statement produces an output dataset with a single output 
#' ## sample. The COVER operation considers all areas defined by a minimum 
#' ## of two overlapping regions in the input samples, up to any amount of 
#' ## overlapping regions.
#' 
#' res = cover(exp, 2, ANY())
#'
#' ## The following GMQL statement computes the result grouping the input 
#' ## exp samples by the values of their cell metadata attribute, 
#' ## thus one output res sample is generated for each cell value; 
#' ## output regions are produced where at least 2 and at most 3 regions 
#' ## of grouped exp samples overlap, setting as attributes of the resulting 
#' ## regions the minimum pvalue of the overlapping regions (min_pvalue) 
#' ## and their Jaccard indexes (JaccardIntersect and JaccardResult).
#' 
#' res = cover(exp, 2, 3, groupBy = conds("cell"), min_pValue = MIN("pvalue"))
#' 
#' @name cover
#' @rdname cover
#' @aliases cover,GMQLDataset-method
#' @aliases cover-method
#' @export
setMethod("cover","GMQLDataset", function(
  .data, min_acc, max_acc, groupBy = conds(), variation = "cover", ...
  ) {
  val <- value(.data)
  s_min <- substitute(min_acc)
  s_min <- .trasform_cover(deparse(s_min))                
  s_max <- substitute(max_acc)
  s_max <- .trasform_cover(deparse(s_max))
  
  q_max <- .check_cover_param(s_max,FALSE)
  q_min <- .check_cover_param(s_min,TRUE)
  
  flag = toupper(variation)
  aggregates = list(...)
  gmql_cover(val, q_min, q_max, groupBy, aggregates, flag)
})

gmql_cover <- function(
  input_data, 
  min_acc, 
  max_acc, 
  groupBy,
  aggregates,
  flag
) {
  if(!is.null(groupBy)) {
    if("condition" %in% names(groupBy)) {
      cond <- .join_condition(groupBy)
      
      if(is.null(cond))
        join_matrix <- .jnull("java/lang/String")
      else
        join_matrix <- .jarray(cond, dispatch = TRUE)
      
    } else
      stop("use function conds()")
    
  } else
    join_matrix <- .jnull("java/lang/String")
  
  if(!is.null(aggregates) && length(aggregates)) {
    aggr <- .aggregates(aggregates,"AGGREGATES")
    metadata_matrix <- .jarray(aggr, dispatch = TRUE)
  } else
    metadata_matrix <- .jnull("java/lang/String")
  
  WrappeR <- J("it/polimi/genomics/r/Wrapper")
  response <- switch(
    flag,
    "COVER" = WrappeR$cover(
      min_acc, max_acc, join_matrix, metadata_matrix, input_data),
    "FLAT" = WrappeR$flat(
      min_acc, max_acc, join_matrix,metadata_matrix, input_data),
    "SUMMIT" = WrappeR$summit(
      min_acc,max_acc, join_matrix, metadata_matrix, input_data),
    "HISTOGRAM" = WrappeR$histogram(
      min_acc, max_acc, join_matrix, metadata_matrix, input_data)
  )
  if(is.null(response))
    stop("no admissible variation: cover, flat, summit, histogram")
  
  error <- strtoi(response[1])
  val <- response[2]
  if(error)
    stop(val)
  else
    GMQLDataset(val)
}

.check_cover_param <- function(param, is_min) {
  if(length(param) > 1)
    stop("length > 1")
  
  if(is.character(param)) {
    if(is_min && identical(param,"ANY"))
      stop("min cannot assume ANY as value")
    
    return(param)
    
  } else
    stop("invalid input data")
}

.trasform_cover <- function(predicate) {
    predicate <- gsub("\\(\\)","",predicate)
}