% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/AllGenerics.R, R/gmql_cover.R
\docType{methods}
\name{cover}
\alias{cover}
\alias{cover,GMQLDataset-method}
\title{Method cover}
\usage{
cover(.data, ...)

\S4method{cover}{GMQLDataset}(.data, min_acc, max_acc, groupBy = conds(),
  variation = "cover", ...)
}
\arguments{
\item{.data}{GMQLDataset class object}

\item{...}{a series of expressions separated by comma in the form 
\emph{key} = \emph{aggregate}. The \emph{aggregate} is an object of 
class AGGREGATES. The aggregate functions available are: \code{\link{SUM}}, 
\code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, 
\code{\link{AVG}}, \code{\link{MEDIAN}}, \code{\link{STD}}, 
\code{\link{BAG}}, \code{\link{BAGD}}, \code{\link{Q1}}, 
\code{\link{Q2}}, \code{\link{Q3}}.
Every aggregate accepts a string value, except for COUNT, which does not 
have any value.
Argument of 'aggregate function' must exist in schema, i.e. among region 
attributes. Two styles are allowed:
\itemize{
\item list of key-value pairs: e.g. sum = SUM("pvalue")
\item list of values: e.g. SUM("pvalue")
}
"mixed style" is not allowed}

\item{min_acc}{minimum number of overlapping regions to be considered 
during execution. It is an integer number, declared also as string.
minAcc accepts also:
\itemize{
\item{PARAMETER class object: \code{\link{ALL}}, that represents the number 
of samples in the input dataset}
\item{an expression built using PARAMETER object: (ALL() + N) / K or
ALL() / K, with N and K integer values }
}}

\item{max_acc}{maximum number of overlapping regions to be considered 
during execution. It is an integer number, declared also as string.
maxAcc accept also:
\itemize{
\item{PARAMETER class object: \code{\link{ALL}}, that represents the number 
of samples in the input dataset}
\item{PARAMETER class object: \code{\link{ANY}}}, that acts as a wildcard, 
considering any amount of overlapping regions.
\item{an expression built using PARAMETER object: (ALL() + N) / K or
ALL() / K, with N and K integer values  }
}}

\item{groupBy}{\code{\link{conds}} function to support methods with 
groupBy or JoinBy input parameter}

\item{variation}{string identifying the cover GMQL operator variation.
The admissible strings are:
\itemize{
\item{FLAT: It returns the regions that start from the first end and stop 
at the last end of the regions which would contribute to each region 
of the \emph{cover}.}
\item{SUMMIT: It returns regions that start from a position
where the number of intersecting regions is not increasing afterwards and
stop at a position where either the number of intersecting regions 
decreases, or it violates the max accumulation index.}
\item{HISTOGRAM: It returns the non-overlapping regions contributing to 
the \emph{cover}, each with its accumulation index value, which is assigned 
to the \emph{AccIndex} region attribute.}
\item{COVER: default value.}
}
It can be all caps or lowercase}
}
\value{
GMQLDataset object. It contains the value to use as input 
for the subsequent GMQLDataset method
}
\description{
Wrapper to GMQL COVER operator

It takes as input a dataset containing one or more samples and returns 
another dataset (with a single sample, if no \emph{groupBy} option is 
specified) by “collapsing” the input dataset samples and their regions 
according to certain rules specified by the input parameters.
The attributes of the output genomic regions are only the region 
coordinates, and Jaccard indexes (\emph{JaccardIntersect} and 
\emph{JaccardResult}).
Jaccard Indexes are standard measures of similarity of the contributing 
regions, added as default region attributes.
The JaccardIntersect index is calculated as the ratio between the lengths 
of the intersection and of the union of the contributing regions; 
the JaccardResult index is calculated as the ratio between the lengths 
of the result and the union of the contributing regions.
If aggregate functions are specified, a new region attribute is added for 
each aggregate function specified.
Output metadata are the union of the input ones.
If \emph{groupBy} clause is specified, the input samples are partitioned 
in groups, each with distinct values of the grouping metadata attributes, 
and the \emph{cover} operation is separately applied to each group, 
yielding to one sample in the result for each group.
Input samples that do not satisfy the \emph{groupBy} condition 
are disregarded.
}
\examples{

## This statement initializes and runs the GMQL server for local execution 
## and creation of results on disk. Then, with system.file() it defines 
## the path to the folder "DATASET" in the subdirectory "example"
## of the package "RGMQL" and opens such file as a GMQL dataset named "exp" 
## using CustomParser

init_gmql()
test_path <- system.file("example", "DATASET", package = "RGMQL")
exp = read_gmql(test_path)
  
## The following statement produces an output dataset with a single output 
## sample. The COVER operation considers all areas defined by a minimum 
## of two overlapping regions in the input samples, up to any amount of 
## overlapping regions.

res = cover(exp, 2, ANY())

## The following GMQL statement computes the result grouping the input 
## exp samples by the values of their cell metadata attribute, 
## thus one output res sample is generated for each cell value; 
## output regions are produced where at least 2 and at most 3 regions 
## of grouped exp samples overlap, setting as attributes of the resulting 
## regions the minimum pvalue of the overlapping regions (min_pvalue) 
## and their Jaccard indexes (JaccardIntersect and JaccardResult).

res = cover(exp, 2, 3, groupBy = conds("cell"), min_pValue = MIN("pvalue"))

}