% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/gmql_join.R
\docType{methods}
\name{merge}
\alias{merge}
\alias{merge,GMQLDataset,GMQLDataset-method}
\title{Method merge}
\usage{
\S4method{merge}{GMQLDataset,GMQLDataset}(x, y,
  genometric_predicate = NULL, region_output = "CAT",
  joinBy = conds(), reg_attr = c(""))
}
\arguments{
\item{x}{GMQLDataset class object}

\item{y}{GMQLDataset class object}

\item{genometric_predicate}{it is a list of DISTAL objects.
For details of DISTAL objects see:
\code{\link{DLE}}, \code{\link{DGE}}, \code{\link{DL}}, \code{\link{DG}},
\code{\link{MD}}, \code{\link{UP}}, \code{\link{DOWN}}}

\item{region_output}{single string that declares which region is given in 
output for each input pair of left dataset and right dataset regions 
satisfying the genometric predicate and/or the region attribute predicate:
\itemize{
\item{LEFT: It outputs the anchor regions from 'x' that satisfy the 
genometric and/or region attribute predicate}
\item{RIGHT: It outputs the experiment regions from 'y' that satisfy the 
genometric and/or region attribute predicate}
\item{INT (intersection): It outputs the overlapping part (intersection) 
of the 'x' and 'y' regions that satisfy the genometric  and/or region 
attribute predicate; if the intersection is empty, no output is produced}
\item{CAT: It outputs the concatenation between the 'x' and 'y' regions 
that satisfy the genometric  and/or region attribute predicate, 
(i.e. the output regions defined as having left (right) coordinates equal 
to the minimum (maximum) of the corresponding coordinate values in the 
'x' and 'y' regions satisfying the genometric  and/or region attribute 
predicate)}
\item{LEFT_DIST: It outputs the duplicate elimination of 'x' output 
regions with the same coordinates and values, regardless the 'y' paired 
region and its values. In this case, the output region attributes and their 
values are all and only those of 'x', and the output metadata are equal 
to the 'x' metadata, without additional prefixes}
\item{RIGHT_DIST: It outputs the duplicate elimination of 'y' output 
regions with the same coordinates and values, regardless the 'x' paired 
region and its values. In this case, the output regions attributes and their 
values are all and only those of 'y', and the output metadata are equal 
to the 'y' metadata, without additional prefixes}
\item{BOTH: It outputs the same regions as LEFT, but it adds in the output 
region attributes the coordinates of the 'y' paired region that, 
together with the 'x' output region, satisfies the genometric  and/or 
region attribute predicate}
}}

\item{joinBy}{\code{\link{condition_evaluation}} function to support 
methods with groupBy or JoinBy input paramter}

\item{reg_attr}{vector of strings made up by region field attribute names, 
whose values in the paired left and right dataset regions must be equal in 
order to consider the two paired regions.
If specified, \emph{region_output} cannot be INT or CAT.}
}
\value{
GMQLDataset object. It contains the value to use as input 
for the subsequent GMQLDataset method
}
\description{
Wrapper to GMQL JOIN operator

It takes in input two datasets, respectively known as anchor 
(left) and experiment (right) and returns a dataset of samples consisting 
of regions extracted from the operands according to the specified conditions
(a.k.a \emph{genometric_predicate} and \emph{region_attribute} predicate).
The number of generated output samples is the Cartesian product 
of the number of samples in the anchor and in the experiment dataset 
(if \emph{joinBy} is not specified).
The output metadata are the union of the input metadata, 
with their attribute names prefixed with left or right dataset name, 
respectively.
}
\examples{

## This statement initializes and runs the GMQL server for local execution 
## and creation of results on disk. Then, with system.file() it defines 
## the path to the folders "DATASET" and "DATASET_GDM" in the subdirectory 
## "example" of the package "RGMQL" and opens such folders as a GMQL 
## datasets named TSS and HM, respectively, using CustomParser

init_gmql()
test_path <- system.file("example", "DATASET", package = "RGMQL")
test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL")
TSS = read_gmql(test_path)
HM = read_gmql(test_path2)

## Given a dataset HM and one called TSS with a sample including 
## Transcription Start Site annotations, this statement searches for those 
## regions of HM that are at a minimal distance from a transcription start 
## site (TSS) and takes the first/closest one for each TSS, provided that 
## such distance is lesser than 120K bases and joined TSS and HM 
## samples are obtained from the same provider (joinby clause).

join_data = merge(TSS, HM, genometric_predicate = list(MD(1), DLE(120000)), 
    conds("provider"), region_output = "RIGHT")


}