... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
Package: RGMQL |
2 | 2 |
Type: Package |
3 | 3 |
Title: GenoMetric Query Language for R/Bioconductor |
4 |
-Version: 0.99.30 |
|
4 |
+Version: 0.99.31 |
|
5 | 5 |
Author: Simone Pallotta, Marco Masseroli |
6 | 6 |
Maintainer: Simone Pallotta <simonepallotta@hotmail.com> |
7 | 7 |
Description: This RGMQL package brings the GenoMetric Query Language (GMQL) |
... | ... |
@@ -1,7 +1,3 @@ |
1 |
- |
|
2 |
- |
|
3 |
- |
|
4 |
- |
|
5 | 1 |
#' GMQL Operation: COVER |
6 | 2 |
#' |
7 | 3 |
#' It takes as input a dataset containing one or more samples and returns |
... | ... |
@@ -134,11 +130,10 @@ |
134 | 130 |
#' } |
135 | 131 |
#' |
136 | 132 |
#' |
137 |
-#' @name cover |
|
138 |
-#' @rdname cover-methods |
|
139 |
-#' @aliases cover, cover-methods |
|
133 |
+#' @rdname GMQLDataset-class |
|
134 |
+#' @aliases cover, GMQLDataset--method |
|
140 | 135 |
#' |
141 |
-#' @exportMethod cover |
|
136 |
+#' @export |
|
142 | 137 |
#' |
143 | 138 |
setGeneric("cover", function(data, minAcc, maxAcc, ...) |
144 | 139 |
{ |
... | ... |
@@ -148,12 +143,11 @@ setGeneric("cover", function(data, minAcc, maxAcc, ...) |
148 | 143 |
min <- .check_cover_param(minAcc,TRUE) |
149 | 144 |
max <- .check_cover_param(maxAcc,FALSE) |
150 | 145 |
|
151 |
- gmql_cover(data,minAcc,maxAcc) |
|
146 |
+ gmql_cover(data,min,max,NULL,NULL,"COVER") |
|
152 | 147 |
}) |
153 | 148 |
|
154 |
-#' @name cover |
|
155 |
-#' @rdname cover-methods |
|
156 |
-#' @aliases cover, cover-methods |
|
149 |
+#' @rdname GMQLDataset-class |
|
150 |
+#' @aliases cover, GMQLDataset--method |
|
157 | 151 |
#' @export |
158 | 152 |
setMethod("cover", "GMQLDataset", |
159 | 153 |
function(data, minAcc, maxAcc, groupBy = NULL, aggregates = NULL, |
... | ... |
@@ -166,12 +160,12 @@ setMethod("cover", "GMQLDataset", |
166 | 160 |
max <- .check_cover_param(maxAcc,FALSE) |
167 | 161 |
flag = toupper(variation) |
168 | 162 |
|
169 |
- gmql_cover(data@value, minAcc, maxAcc, groupBy, aggregates, |
|
170 |
- flag) |
|
163 |
+ gmql_cover(data@value, min, max, groupBy, aggregates, |
|
164 |
+ flag) |
|
171 | 165 |
}) |
172 | 166 |
|
173 |
-gmql_cover <- function(input_data, minAcc, maxAcc, groupBy = NULL, |
|
174 |
- aggregates = NULL, flag = "cover") |
|
167 |
+gmql_cover <- function(data, minAcc, maxAcc, groupBy = NULL, |
|
168 |
+ aggregates = NULL, flag) |
|
175 | 169 |
{ |
176 | 170 |
if(!is.null(groupBy)) |
177 | 171 |
join_condition_matrix <- .jarray(.join_condition(groupBy), |
... | ... |
@@ -188,14 +182,13 @@ gmql_cover <- function(input_data, minAcc, maxAcc, groupBy = NULL, |
188 | 182 |
WrappeR <- J("it/polimi/genomics/r/Wrapper") |
189 | 183 |
response <- switch(flag, |
190 | 184 |
"COVER" = WrappeR$cover(minAcc, maxAcc, join_condition_matrix, |
191 |
- metadata_matrix, input_data), |
|
185 |
+ metadata_matrix, data), |
|
192 | 186 |
"FLAT" = WrappeR$flat(minAcc, maxAcc, join_condition_matrix, |
193 |
- metadata_matrix,input_data), |
|
187 |
+ metadata_matrix, data), |
|
194 | 188 |
"SUMMIT" = WrappeR$summit(minAcc,maxAcc, join_condition_matrix, |
195 |
- metadata_matrix, input_data), |
|
189 |
+ metadata_matrix, data), |
|
196 | 190 |
"HISTOGRAM" = WrappeR$histogram(minAcc, maxAcc, |
197 |
- join_condition_matrix, metadata_matrix, |
|
198 |
- input_data)) |
|
191 |
+ join_condition_matrix, metadata_matrix, data)) |
|
199 | 192 |
if(is.null(response)) |
200 | 193 |
stop("no admissible variation: cover, flat, summit, histogram") |
201 | 194 |
|
... | ... |
@@ -42,9 +42,10 @@ |
42 | 42 |
#' |
43 | 43 |
#' res <- union(data1, data2) |
44 | 44 |
#' |
45 |
-#' @rdname GMQLDataset-class |
|
46 |
-#' @aliases union, GMQLDataset-method |
|
45 |
+#' @rdname union-method |
|
46 |
+#' @aliases union, union-method |
|
47 | 47 |
#' @export |
48 |
+#' |
|
48 | 49 |
setMethod("union", c("GMQLDataset","GMQLDataset"), |
49 | 50 |
function(x, y) |
50 | 51 |
{ |
... | ... |
@@ -1,25 +1,107 @@ |
1 | 1 |
% Generated by roxygen2: do not edit by hand |
2 |
-% Please edit documentation in R/Dataset-class.R, R/Select.R, R/Union.R |
|
3 |
-\docType{class} |
|
4 |
-\name{GMQLDataset-class} |
|
2 |
+% Please edit documentation in R/Cover.R, R/Dataset-class.R, R/Select.R |
|
3 |
+\docType{methods} |
|
4 |
+\name{cover} |
|
5 |
+\alias{cover} |
|
6 |
+\alias{cover,} |
|
7 |
+\alias{GMQLDataset--method} |
|
8 |
+\alias{cover,GMQLDataset-method} |
|
9 |
+\alias{cover,} |
|
10 |
+\alias{GMQLDataset--method} |
|
5 | 11 |
\alias{GMQLDataset-class} |
6 | 12 |
\alias{GMQLDataset} |
7 | 13 |
\alias{filter} |
8 | 14 |
\alias{filter,} |
9 | 15 |
\alias{filter-methods} |
10 |
-\alias{union} |
|
11 |
-\alias{union,} |
|
12 |
-\alias{GMQLDataset-method} |
|
13 |
-\title{Class GMQLDataset} |
|
16 |
+\title{GMQL Operation: COVER} |
|
14 | 17 |
\usage{ |
18 |
+cover(data, minAcc, maxAcc, ...) |
|
19 |
+ |
|
20 |
+\S4method{cover}{GMQLDataset}(data, minAcc, maxAcc, groupBy = NULL, |
|
21 |
+ aggregates = NULL, variation = "cover") |
|
22 |
+ |
|
15 | 23 |
GMQLDataset(value) |
16 | 24 |
|
17 | 25 |
filter(data, m_predicate = NULL, r_predicate = NULL, semi_join = NULL, |
18 | 26 |
semi_join_negation = FALSE, semi_join_dataset = NULL) |
19 |
- |
|
20 |
-\S4method{union}{GMQLDataset,GMQLDataset}(x, y) |
|
21 | 27 |
} |
22 | 28 |
\arguments{ |
29 |
+\item{data}{GMQLDataset class object} |
|
30 |
+ |
|
31 |
+\item{minAcc}{minimum number of overlapping regions to be considered |
|
32 |
+during execution |
|
33 |
+Is a integer number, declared also as string. |
|
34 |
+minAcc accept also: |
|
35 |
+\itemize{ |
|
36 |
+\item{PARAMETER class object: \code{\link{ALL}} that represents the number |
|
37 |
+of samples in the input dataset} |
|
38 |
+\item{and expression built using PARAMETER object: (ALL() + N) / K or |
|
39 |
+ALL() / K } |
|
40 |
+}} |
|
41 |
+ |
|
42 |
+\item{maxAcc}{maximum number of overlapping regions to be considered |
|
43 |
+during execution |
|
44 |
+Is a integer number, declared also as string. |
|
45 |
+maxAcc accept also: |
|
46 |
+\itemize{ |
|
47 |
+\item{PARAMETER class object: \code{\link{ALL}} that represents the number |
|
48 |
+of samples in the input dataset} |
|
49 |
+\item{PARAMETER calss object: \code{\link{ANY}}} that acts as a wildcard, |
|
50 |
+considering any amount of overlapping. |
|
51 |
+\item{and expression built using PARAMETER object: (ALL() + N) / K or |
|
52 |
+ALL() / K } |
|
53 |
+}} |
|
54 |
+ |
|
55 |
+\item{groupBy}{list of CONDITION objects where every object contains |
|
56 |
+the name of metadata to be used in semijoin, or simple string concatenation |
|
57 |
+of name of metadata, e.g. c("cell_type", "attribute_tag", "size") |
|
58 |
+without declaring condition. |
|
59 |
+The CONDITION's available are: |
|
60 |
+\itemize{ |
|
61 |
+\item{\code{\link{FULL}}: Fullname evaluation, two attributes match |
|
62 |
+if they both end with value and, if they have a further prefixes, |
|
63 |
+the two prefix sequence are identical} |
|
64 |
+\item{\code{\link{EXACT}}: Exact evaluation, only attributes exactly |
|
65 |
+as value will match; no further prefixes are allowed. } |
|
66 |
+} |
|
67 |
+Every condition accepts only one string value. (e.g. FULL("cell_type") ) |
|
68 |
+In case of single concatenation with no CONDITION or list with some value |
|
69 |
+without conditon, the metadata are considered having default |
|
70 |
+evaluation: the two attributes match if both end with value.} |
|
71 |
+ |
|
72 |
+\item{aggregates}{list of element in the form \emph{key} = \emph{aggregate}. |
|
73 |
+The \emph{aggregate} is an object of class AGGREGATES |
|
74 |
+The aggregate functions available are: \code{\link{SUM}}, |
|
75 |
+\code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, |
|
76 |
+\code{\link{AVG}}, \code{\link{MEDIAN}}, \code{\link{STD}}, |
|
77 |
+\code{\link{BAG}}, \code{\link{BAGD}}, \code{\link{Q1}}, |
|
78 |
+\code{\link{Q2}}, \code{\link{Q3}}. |
|
79 |
+Every aggregate accepts a string value, execet for COUNT, which does not |
|
80 |
+have any value. |
|
81 |
+Argument of 'aggregate function' must exist in schema, i.e. among region |
|
82 |
+attributes. Two style are allowed: |
|
83 |
+\itemize{ |
|
84 |
+\item list of key-value pairs: e.g. sum = SUM("pvalue") |
|
85 |
+\item list of values: e.g. SUM("pvalue") |
|
86 |
+} |
|
87 |
+"mixed style" is not allowed} |
|
88 |
+ |
|
89 |
+\item{variation}{string identifying the cover GMQL function variation. |
|
90 |
+The admissible string are: |
|
91 |
+\itemize{ |
|
92 |
+\item{flat: returns the contiguous region that starts from the first end |
|
93 |
+and stops at the last end of the regions which would contribute |
|
94 |
+to each region of the \emph{cover}.} |
|
95 |
+\item{summit: returns regions that start from a position |
|
96 |
+where the number of intersecting regions is not increasing afterwards and |
|
97 |
+stops at a position where either the number of intersecting regions |
|
98 |
+decreases, or it violates the max accumulation index.} |
|
99 |
+\item{histogram: returns the non-overlapping regions contributing to |
|
100 |
+the cover, each with its accumulation index value, which is assigned to |
|
101 |
+the AccIndex region attribute.} |
|
102 |
+\item{cover: default value.} |
|
103 |
+}} |
|
104 |
+ |
|
23 | 105 |
\item{value}{value associated to GMQL dataset} |
24 | 106 |
|
25 | 107 |
\item{m_predicate}{logical predicate made up by R logical operation |
... | ... |
@@ -53,10 +135,6 @@ considering semi_join IN semi_join_dataset} |
53 | 135 |
|
54 | 136 |
\item{semi_join_dataset}{GMQLDataset class object} |
55 | 137 |
|
56 |
-\item{x}{GMQLDataset class object} |
|
57 |
- |
|
58 |
-\item{y}{GMQLDataset class object} |
|
59 |
- |
|
60 | 138 |
\item{x}{GMQLDataset class object} |
61 | 139 |
} |
62 | 140 |
\value{ |
... | ... |
@@ -67,6 +145,28 @@ GMQLDataset class object. It contains the value to use as input |
67 | 145 |
for the subsequent GMQL function |
68 | 146 |
} |
69 | 147 |
\description{ |
148 |
+It takes as input a dataset containing one or more samples and returns |
|
149 |
+another dataset (with a single sample, if no \emph{groupby} option is |
|
150 |
+specified) by “collapsing” the input dataset samples and their regions |
|
151 |
+according to certain rules specified by the input parameters. |
|
152 |
+The attributes of the output genomic regions are only the region |
|
153 |
+coordinates, and Jaccard indexes (JaccardIntersect and JaccardResult). |
|
154 |
+Jaccard Indexes are standard measures of similarity of the contributing |
|
155 |
+regions, added as default region attributes. |
|
156 |
+The JaccardIntersect index is calculated as the ratio between the lengths |
|
157 |
+of the intersection and of the union of the contributing regions; |
|
158 |
+the JaccardResult index is calculated as the ratio between the lengths |
|
159 |
+of the result and the union of the contributing regions. |
|
160 |
+If aggregate functions are specified, a new attributes is added for |
|
161 |
+each aggregate function specified. |
|
162 |
+Output metadata are the union of the input ones. |
|
163 |
+If \emph{groupby} clause is specified, the input samples are partitioned |
|
164 |
+in groups, each with distinct values of the grouping metadata attributes, |
|
165 |
+and the \emph{cover} operation is separately applied to each group, |
|
166 |
+yielding to one sample in the result for each group. |
|
167 |
+Input samples that do not satisfy the \emph{groupby} condition |
|
168 |
+are disregarded. |
|
169 |
+ |
|
70 | 170 |
Abstract class representing GMQL dataset |
71 | 171 |
|
72 | 172 |
It returns all the samples satisfying the predicate on metadata. |
... | ... |
@@ -78,25 +178,6 @@ attribute defined in semijoin clause with at least one metadata value |
78 | 178 |
in common with semi join dataset. |
79 | 179 |
If no metadata in common between input dataset and semi join dataset, |
80 | 180 |
no sample is extracted. |
81 |
- |
|
82 |
-It is used to integrate homogeneous or heterogeneous samples of two datasets |
|
83 |
-within a single dataset; for each sample of either input dataset, |
|
84 |
-a result sample is created as follows: |
|
85 |
-\itemize{ |
|
86 |
-\item {Metadata are the same as in the original sample.} |
|
87 |
-\item {Resulting schema is obtained by projecting the schema |
|
88 |
-of the right dataset over the schema of the left one |
|
89 |
-(more properly, it will be performed by adding to the schema of the |
|
90 |
-left dataset the region attributes of the right dataset which are not |
|
91 |
-identical to those of the left dataset)} |
|
92 |
-\item {Regions are the same (in coordinates and attribute values) |
|
93 |
-as in the original sample. |
|
94 |
-Region attributes which are missing in an input dataset sample |
|
95 |
-w.r.t. the merged schema are set to null.} |
|
96 |
-} |
|
97 |
-For what concerns metadata, attributes of samples from the left (right) |
|
98 |
-input dataset are prefixed with the strings LEFT (RIGHT), so as to trace |
|
99 |
-the dataset to which they originally belonged. |
|
100 | 181 |
} |
101 | 182 |
\section{Slots}{ |
102 | 183 |
|
... | ... |
@@ -106,6 +187,32 @@ the dataset to which they originally belonged. |
106 | 187 |
|
107 | 188 |
\examples{ |
108 | 189 |
|
190 |
+## This statement produces an output dataset with a single output sample. |
|
191 |
+## The COVER operation considers all areas defined by a minimum |
|
192 |
+## of two overlapping regions in the input samples, |
|
193 |
+## up to any amount of overlapping regions. |
|
194 |
+ |
|
195 |
+init_gmql() |
|
196 |
+test_path <- system.file("example","DATASET",package = "RGMQL") |
|
197 |
+exp = read_dataset(test_path) |
|
198 |
+res = cover(exp, 2, ANY()) |
|
199 |
+ |
|
200 |
+\dontrun{ |
|
201 |
+## This GMQL statement computes the result grouping the input exp samples |
|
202 |
+## by the values of their cell metadata attribute, |
|
203 |
+## thus one output res sample is generated for each cell type; |
|
204 |
+## output regions are produced where at least 2 and at most 3 regions |
|
205 |
+## of grouped exp samples overlap, setting as attributes of the resulting |
|
206 |
+## regions the minimum pvalue of the overlapping regions (min_pvalue) |
|
207 |
+## and their Jaccard indexes (JaccardIntersect and JaccardResult). |
|
208 |
+ |
|
209 |
+test_path <- system.file("example", "DATASET", package = "RGMQL") |
|
210 |
+exp = read_dataset(test_path) |
|
211 |
+res = cover(exp, 2, 3, c("cell"), list(min_pValue = MIN("pvalue"))) |
|
212 |
+} |
|
213 |
+ |
|
214 |
+ |
|
215 |
+ |
|
109 | 216 |
## It selects from input data samples of patients younger than 70 years old, |
110 | 217 |
## based on filtering on sample metadata attribute Patient_age |
111 | 218 |
|
... | ... |
@@ -140,18 +247,4 @@ TRUE, semi_join_dataset = join_data ) |
140 | 247 |
|
141 | 248 |
} |
142 | 249 |
|
143 |
- |
|
144 |
-## It creates a dataset called full which contains all samples from the |
|
145 |
-## datasets data1 and data2 whose schema is defined by merging the two |
|
146 |
-## dataset schemas. |
|
147 |
-## (union of all the attributes present in the two input datasets). |
|
148 |
- |
|
149 |
-init_gmql() |
|
150 |
-test_path <- system.file("example", "DATASET", package = "RGMQL") |
|
151 |
-test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
|
152 |
-data1 <- read_dataset(test_path) |
|
153 |
-data2 <- read_dataset(test_path2) |
|
154 |
- |
|
155 |
-res <- union(data1, data2) |
|
156 |
- |
|
157 | 250 |
} |
158 | 251 |
deleted file mode 100644 |
... | ... |
@@ -1,149 +0,0 @@ |
1 |
-% Generated by roxygen2: do not edit by hand |
|
2 |
-% Please edit documentation in R/Cover.R |
|
3 |
-\docType{methods} |
|
4 |
-\name{cover} |
|
5 |
-\alias{cover} |
|
6 |
-\alias{cover,} |
|
7 |
-\alias{cover-methods} |
|
8 |
-\alias{cover} |
|
9 |
-\alias{cover,} |
|
10 |
-\alias{cover-methods} |
|
11 |
-\title{GMQL Operation: COVER} |
|
12 |
-\usage{ |
|
13 |
-cover(data, minAcc, maxAcc, ...) |
|
14 |
- |
|
15 |
-\S4method{cover}{GMQLDataset}(data, minAcc, maxAcc, groupBy = NULL, |
|
16 |
- aggregates = NULL, variation = "cover") |
|
17 |
-} |
|
18 |
-\arguments{ |
|
19 |
-\item{data}{GMQLDataset class object} |
|
20 |
- |
|
21 |
-\item{minAcc}{minimum number of overlapping regions to be considered |
|
22 |
-during execution |
|
23 |
-Is a integer number, declared also as string. |
|
24 |
-minAcc accept also: |
|
25 |
-\itemize{ |
|
26 |
-\item{PARAMETER class object: \code{\link{ALL}} that represents the number |
|
27 |
-of samples in the input dataset} |
|
28 |
-\item{and expression built using PARAMETER object: (ALL() + N) / K or |
|
29 |
-ALL() / K } |
|
30 |
-}} |
|
31 |
- |
|
32 |
-\item{maxAcc}{maximum number of overlapping regions to be considered |
|
33 |
-during execution |
|
34 |
-Is a integer number, declared also as string. |
|
35 |
-maxAcc accept also: |
|
36 |
-\itemize{ |
|
37 |
-\item{PARAMETER class object: \code{\link{ALL}} that represents the number |
|
38 |
-of samples in the input dataset} |
|
39 |
-\item{PARAMETER calss object: \code{\link{ANY}}} that acts as a wildcard, |
|
40 |
-considering any amount of overlapping. |
|
41 |
-\item{and expression built using PARAMETER object: (ALL() + N) / K or |
|
42 |
-ALL() / K } |
|
43 |
-}} |
|
44 |
- |
|
45 |
-\item{groupBy}{list of CONDITION objects where every object contains |
|
46 |
-the name of metadata to be used in semijoin, or simple string concatenation |
|
47 |
-of name of metadata, e.g. c("cell_type", "attribute_tag", "size") |
|
48 |
-without declaring condition. |
|
49 |
-The CONDITION's available are: |
|
50 |
-\itemize{ |
|
51 |
-\item{\code{\link{FULL}}: Fullname evaluation, two attributes match |
|
52 |
-if they both end with value and, if they have a further prefixes, |
|
53 |
-the two prefix sequence are identical} |
|
54 |
-\item{\code{\link{EXACT}}: Exact evaluation, only attributes exactly |
|
55 |
-as value will match; no further prefixes are allowed. } |
|
56 |
-} |
|
57 |
-Every condition accepts only one string value. (e.g. FULL("cell_type") ) |
|
58 |
-In case of single concatenation with no CONDITION or list with some value |
|
59 |
-without conditon, the metadata are considered having default |
|
60 |
-evaluation: the two attributes match if both end with value.} |
|
61 |
- |
|
62 |
-\item{aggregates}{list of element in the form \emph{key} = \emph{aggregate}. |
|
63 |
-The \emph{aggregate} is an object of class AGGREGATES |
|
64 |
-The aggregate functions available are: \code{\link{SUM}}, |
|
65 |
-\code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, |
|
66 |
-\code{\link{AVG}}, \code{\link{MEDIAN}}, \code{\link{STD}}, |
|
67 |
-\code{\link{BAG}}, \code{\link{BAGD}}, \code{\link{Q1}}, |
|
68 |
-\code{\link{Q2}}, \code{\link{Q3}}. |
|
69 |
-Every aggregate accepts a string value, execet for COUNT, which does not |
|
70 |
-have any value. |
|
71 |
-Argument of 'aggregate function' must exist in schema, i.e. among region |
|
72 |
-attributes. Two style are allowed: |
|
73 |
-\itemize{ |
|
74 |
-\item list of key-value pairs: e.g. sum = SUM("pvalue") |
|
75 |
-\item list of values: e.g. SUM("pvalue") |
|
76 |
-} |
|
77 |
-"mixed style" is not allowed} |
|
78 |
- |
|
79 |
-\item{variation}{string identifying the cover GMQL function variation. |
|
80 |
-The admissible string are: |
|
81 |
-\itemize{ |
|
82 |
-\item{flat: returns the contiguous region that starts from the first end |
|
83 |
-and stops at the last end of the regions which would contribute |
|
84 |
-to each region of the \emph{cover}.} |
|
85 |
-\item{summit: returns regions that start from a position |
|
86 |
-where the number of intersecting regions is not increasing afterwards and |
|
87 |
-stops at a position where either the number of intersecting regions |
|
88 |
-decreases, or it violates the max accumulation index.} |
|
89 |
-\item{histogram: returns the non-overlapping regions contributing to |
|
90 |
-the cover, each with its accumulation index value, which is assigned to |
|
91 |
-the AccIndex region attribute.} |
|
92 |
-\item{cover: default value.} |
|
93 |
-}} |
|
94 |
-} |
|
95 |
-\value{ |
|
96 |
-GMQLDataset class object. It contains the value to use as input |
|
97 |
-for the subsequent GMQL function |
|
98 |
-} |
|
99 |
-\description{ |
|
100 |
-It takes as input a dataset containing one or more samples and returns |
|
101 |
-another dataset (with a single sample, if no \emph{groupby} option is |
|
102 |
-specified) by “collapsing” the input dataset samples and their regions |
|
103 |
-according to certain rules specified by the input parameters. |
|
104 |
-The attributes of the output genomic regions are only the region |
|
105 |
-coordinates, and Jaccard indexes (JaccardIntersect and JaccardResult). |
|
106 |
-Jaccard Indexes are standard measures of similarity of the contributing |
|
107 |
-regions, added as default region attributes. |
|
108 |
-The JaccardIntersect index is calculated as the ratio between the lengths |
|
109 |
-of the intersection and of the union of the contributing regions; |
|
110 |
-the JaccardResult index is calculated as the ratio between the lengths |
|
111 |
-of the result and the union of the contributing regions. |
|
112 |
-If aggregate functions are specified, a new attributes is added for |
|
113 |
-each aggregate function specified. |
|
114 |
-Output metadata are the union of the input ones. |
|
115 |
-If \emph{groupby} clause is specified, the input samples are partitioned |
|
116 |
-in groups, each with distinct values of the grouping metadata attributes, |
|
117 |
-and the \emph{cover} operation is separately applied to each group, |
|
118 |
-yielding to one sample in the result for each group. |
|
119 |
-Input samples that do not satisfy the \emph{groupby} condition |
|
120 |
-are disregarded. |
|
121 |
-} |
|
122 |
-\examples{ |
|
123 |
- |
|
124 |
-## This statement produces an output dataset with a single output sample. |
|
125 |
-## The COVER operation considers all areas defined by a minimum |
|
126 |
-## of two overlapping regions in the input samples, |
|
127 |
-## up to any amount of overlapping regions. |
|
128 |
- |
|
129 |
-init_gmql() |
|
130 |
-test_path <- system.file("example","DATASET",package = "RGMQL") |
|
131 |
-exp = read_dataset(test_path) |
|
132 |
-res = cover(exp, 2, ANY()) |
|
133 |
- |
|
134 |
-\dontrun{ |
|
135 |
-## This GMQL statement computes the result grouping the input exp samples |
|
136 |
-## by the values of their cell metadata attribute, |
|
137 |
-## thus one output res sample is generated for each cell type; |
|
138 |
-## output regions are produced where at least 2 and at most 3 regions |
|
139 |
-## of grouped exp samples overlap, setting as attributes of the resulting |
|
140 |
-## regions the minimum pvalue of the overlapping regions (min_pvalue) |
|
141 |
-## and their Jaccard indexes (JaccardIntersect and JaccardResult). |
|
142 |
- |
|
143 |
-test_path <- system.file("example", "DATASET", package = "RGMQL") |
|
144 |
-exp = read_dataset(test_path) |
|
145 |
-res = cover(exp, 2, 3, c("cell"), list(min_pValue = MIN("pvalue"))) |
|
146 |
-} |
|
147 |
- |
|
148 |
- |
|
149 |
-} |
150 | 0 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,56 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/Union.R |
|
3 |
+\docType{methods} |
|
4 |
+\name{union,GMQLDataset,GMQLDataset-method} |
|
5 |
+\alias{union,GMQLDataset,GMQLDataset-method} |
|
6 |
+\alias{union,} |
|
7 |
+\alias{union-method} |
|
8 |
+\title{GMQL Operation: UNION} |
|
9 |
+\usage{ |
|
10 |
+\S4method{union}{GMQLDataset,GMQLDataset}(x, y) |
|
11 |
+} |
|
12 |
+\arguments{ |
|
13 |
+\item{x}{GMQLDataset class object} |
|
14 |
+ |
|
15 |
+\item{y}{GMQLDataset class object} |
|
16 |
+} |
|
17 |
+\value{ |
|
18 |
+GMQLDataset class object. It contains the value to use as input |
|
19 |
+for the subsequent GMQL function |
|
20 |
+} |
|
21 |
+\description{ |
|
22 |
+It is used to integrate homogeneous or heterogeneous samples of two datasets |
|
23 |
+within a single dataset; for each sample of either input dataset, |
|
24 |
+a result sample is created as follows: |
|
25 |
+\itemize{ |
|
26 |
+\item {Metadata are the same as in the original sample.} |
|
27 |
+\item {Resulting schema is obtained by projecting the schema |
|
28 |
+of the right dataset over the schema of the left one |
|
29 |
+(more properly, it will be performed by adding to the schema of the |
|
30 |
+left dataset the region attributes of the right dataset which are not |
|
31 |
+identical to those of the left dataset)} |
|
32 |
+\item {Regions are the same (in coordinates and attribute values) |
|
33 |
+as in the original sample. |
|
34 |
+Region attributes which are missing in an input dataset sample |
|
35 |
+w.r.t. the merged schema are set to null.} |
|
36 |
+} |
|
37 |
+For what concerns metadata, attributes of samples from the left (right) |
|
38 |
+input dataset are prefixed with the strings LEFT (RIGHT), so as to trace |
|
39 |
+the dataset to which they originally belonged. |
|
40 |
+} |
|
41 |
+\examples{ |
|
42 |
+ |
|
43 |
+## It creates a dataset called full which contains all samples from the |
|
44 |
+## datasets data1 and data2 whose schema is defined by merging the two |
|
45 |
+## dataset schemas. |
|
46 |
+## (union of all the attributes present in the two input datasets). |
|
47 |
+ |
|
48 |
+init_gmql() |
|
49 |
+test_path <- system.file("example", "DATASET", package = "RGMQL") |
|
50 |
+test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
|
51 |
+data1 <- read_dataset(test_path) |
|
52 |
+data2 <- read_dataset(test_path2) |
|
53 |
+ |
|
54 |
+res <- union(data1, data2) |
|
55 |
+ |
|
56 |
+} |