Browse code

start new build

Simone authored on 15/11/2017 07:36:20
Showing 7 changed files

... ...
@@ -1,7 +1,7 @@
1 1
 Package: RGMQL
2 2
 Type: Package
3 3
 Title: GenoMetric Query Language for R/Bioconductor
4
-Version: 0.99.30
4
+Version: 0.99.31
5 5
 Author: Simone Pallotta, Marco Masseroli
6 6
 Maintainer: Simone Pallotta <simonepallotta@hotmail.com>
7 7
 Description: This RGMQL package brings the GenoMetric Query Language (GMQL)
... ...
@@ -30,6 +30,7 @@ export(SUM)
30 30
 export(UP)
31 31
 export(compile_query)
32 32
 export(compile_query_fromfile)
33
+export(cover)
33 34
 export(delete_dataset)
34 35
 export(download_as_GRangesList)
35 36
 export(download_dataset)
... ...
@@ -1,7 +1,3 @@
1
-
2
-
3
-
4
-
5 1
 #' GMQL Operation: COVER
6 2
 #'
7 3
 #' It takes as input a dataset containing one or more samples and returns 
... ...
@@ -134,11 +130,10 @@
134 130
 #' }
135 131
 #' 
136 132
 #' 
137
-#' @name cover
138
-#' @rdname cover-methods
139
-#' @aliases cover, cover-methods
133
+#' @rdname GMQLDataset-class
134
+#' @aliases cover, GMQLDataset--method
140 135
 #' 
141
-#' @exportMethod cover
136
+#' @export
142 137
 #' 
143 138
 setGeneric("cover", function(data, minAcc, maxAcc, ...)
144 139
 {
... ...
@@ -148,12 +143,11 @@ setGeneric("cover", function(data, minAcc, maxAcc, ...)
148 143
     min <- .check_cover_param(minAcc,TRUE)
149 144
     max <- .check_cover_param(maxAcc,FALSE)
150 145
     
151
-    gmql_cover(data,minAcc,maxAcc)
146
+    gmql_cover(data,min,max,NULL,NULL,"COVER")
152 147
 })
153 148
 
154
-#' @name cover
155
-#' @rdname cover-methods
156
-#' @aliases cover, cover-methods
149
+#' @rdname GMQLDataset-class
150
+#' @aliases cover, GMQLDataset--method
157 151
 #' @export
158 152
 setMethod("cover", "GMQLDataset",
159 153
             function(data, minAcc, maxAcc, groupBy = NULL, aggregates = NULL, 
... ...
@@ -166,12 +160,12 @@ setMethod("cover", "GMQLDataset",
166 160
                 max <- .check_cover_param(maxAcc,FALSE)
167 161
                 flag = toupper(variation)
168 162
                 
169
-                gmql_cover(data@value, minAcc, maxAcc, groupBy, aggregates, 
170
-                                flag)
163
+                gmql_cover(data@value, min, max, groupBy, aggregates, 
164
+                           flag)
171 165
             })
172 166
 
173
-gmql_cover <- function(input_data, minAcc, maxAcc, groupBy = NULL, 
174
-                        aggregates = NULL, flag = "cover")
167
+gmql_cover <- function(data, minAcc, maxAcc, groupBy = NULL, 
168
+                        aggregates = NULL, flag)
175 169
 {
176 170
     if(!is.null(groupBy))
177 171
         join_condition_matrix <- .jarray(.join_condition(groupBy),
... ...
@@ -188,14 +182,13 @@ gmql_cover <- function(input_data, minAcc, maxAcc, groupBy = NULL,
188 182
     WrappeR <- J("it/polimi/genomics/r/Wrapper")
189 183
     response <- switch(flag,
190 184
                 "COVER" = WrappeR$cover(minAcc, maxAcc, join_condition_matrix,
191
-                                    metadata_matrix, input_data),
185
+                                    metadata_matrix, data),
192 186
                 "FLAT" = WrappeR$flat(minAcc, maxAcc, join_condition_matrix,
193
-                                    metadata_matrix,input_data),
187
+                                    metadata_matrix, data),
194 188
                 "SUMMIT" = WrappeR$summit(minAcc,maxAcc, join_condition_matrix,
195
-                                    metadata_matrix, input_data),
189
+                                    metadata_matrix, data),
196 190
                 "HISTOGRAM" = WrappeR$histogram(minAcc, maxAcc, 
197
-                                join_condition_matrix, metadata_matrix,
198
-                                input_data))
191
+                                join_condition_matrix, metadata_matrix, data))
199 192
     if(is.null(response))
200 193
         stop("no admissible variation: cover, flat, summit, histogram")
201 194
     
... ...
@@ -42,9 +42,10 @@
42 42
 #' 
43 43
 #' res <- union(data1, data2)
44 44
 #' 
45
-#' @rdname GMQLDataset-class
46
-#' @aliases union, GMQLDataset-method
45
+#' @rdname union-method
46
+#' @aliases union, union-method
47 47
 #' @export
48
+#' 
48 49
 setMethod("union", c("GMQLDataset","GMQLDataset"),
49 50
             function(x, y)
50 51
             {
... ...
@@ -1,25 +1,107 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/Dataset-class.R, R/Select.R, R/Union.R
3
-\docType{class}
4
-\name{GMQLDataset-class}
2
+% Please edit documentation in R/Cover.R, R/Dataset-class.R, R/Select.R
3
+\docType{methods}
4
+\name{cover}
5
+\alias{cover}
6
+\alias{cover,}
7
+\alias{GMQLDataset--method}
8
+\alias{cover,GMQLDataset-method}
9
+\alias{cover,}
10
+\alias{GMQLDataset--method}
5 11
 \alias{GMQLDataset-class}
6 12
 \alias{GMQLDataset}
7 13
 \alias{filter}
8 14
 \alias{filter,}
9 15
 \alias{filter-methods}
10
-\alias{union}
11
-\alias{union,}
12
-\alias{GMQLDataset-method}
13
-\title{Class GMQLDataset}
16
+\title{GMQL Operation: COVER}
14 17
 \usage{
18
+cover(data, minAcc, maxAcc, ...)
19
+
20
+\S4method{cover}{GMQLDataset}(data, minAcc, maxAcc, groupBy = NULL,
21
+  aggregates = NULL, variation = "cover")
22
+
15 23
 GMQLDataset(value)
16 24
 
17 25
 filter(data, m_predicate = NULL, r_predicate = NULL, semi_join = NULL,
18 26
   semi_join_negation = FALSE, semi_join_dataset = NULL)
19
-
20
-\S4method{union}{GMQLDataset,GMQLDataset}(x, y)
21 27
 }
22 28
 \arguments{
29
+\item{data}{GMQLDataset class object}
30
+
31
+\item{minAcc}{minimum number of overlapping regions to be considered 
32
+during execution
33
+Is a integer number, declared also as string.
34
+minAcc accept also:
35
+\itemize{
36
+\item{PARAMETER class object: \code{\link{ALL}} that represents the number 
37
+of samples in the input dataset}
38
+\item{and expression built using PARAMETER object: (ALL() + N) / K or
39
+ALL() / K }
40
+}}
41
+
42
+\item{maxAcc}{maximum number of overlapping regions to be considered 
43
+during execution
44
+Is a integer number, declared also as string.
45
+maxAcc accept also:
46
+\itemize{
47
+\item{PARAMETER class object: \code{\link{ALL}} that represents the number 
48
+of samples in the input dataset}
49
+\item{PARAMETER calss object: \code{\link{ANY}}} that acts as a wildcard, 
50
+considering any amount of overlapping.
51
+\item{and expression built using PARAMETER object: (ALL() + N) / K or
52
+ALL() / K }
53
+}}
54
+
55
+\item{groupBy}{list of CONDITION objects where every object contains 
56
+the name of metadata to be used in semijoin, or simple string concatenation 
57
+of name of metadata, e.g. c("cell_type", "attribute_tag", "size") 
58
+without declaring condition.
59
+The CONDITION's available are:
60
+\itemize{
61
+\item{\code{\link{FULL}}: Fullname evaluation, two attributes match 
62
+if they both end with value and, if they have a further prefixes,
63
+the two prefix sequence are identical}
64
+\item{\code{\link{EXACT}}: Exact evaluation, only attributes exactly 
65
+as value will match; no further prefixes are allowed. }
66
+}
67
+Every condition accepts only one string value. (e.g. FULL("cell_type") )
68
+In case of single concatenation with no CONDITION or list with some value 
69
+without conditon, the metadata are considered having default 
70
+evaluation: the two attributes match if both end with value.}
71
+
72
+\item{aggregates}{list of element in the form \emph{key} = \emph{aggregate}.
73
+The \emph{aggregate} is an object of class AGGREGATES
74
+The aggregate functions available are: \code{\link{SUM}}, 
75
+\code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, 
76
+\code{\link{AVG}}, \code{\link{MEDIAN}}, \code{\link{STD}}, 
77
+\code{\link{BAG}}, \code{\link{BAGD}}, \code{\link{Q1}}, 
78
+\code{\link{Q2}}, \code{\link{Q3}}.
79
+Every aggregate accepts a string value, execet for COUNT, which does not 
80
+have any value.
81
+Argument of 'aggregate function' must exist in schema, i.e. among region 
82
+attributes. Two style are allowed:
83
+\itemize{
84
+\item list of key-value pairs: e.g. sum = SUM("pvalue")
85
+\item list of values: e.g. SUM("pvalue")
86
+}
87
+"mixed style" is not allowed}
88
+
89
+\item{variation}{string identifying the cover GMQL function variation.
90
+The admissible string are:
91
+\itemize{
92
+\item{flat: returns the contiguous region that starts from the first end 
93
+and stops at the last end of the regions which would contribute 
94
+to each region of the \emph{cover}.}
95
+\item{summit: returns regions that start from a position
96
+where the number of intersecting regions is not increasing afterwards and
97
+stops at a position where either the number of intersecting regions 
98
+decreases, or it violates the max accumulation index.}
99
+\item{histogram: returns the non-overlapping regions contributing to 
100
+the cover, each with its accumulation index value, which is assigned to 
101
+the AccIndex region attribute.}
102
+\item{cover: default value.}
103
+}}
104
+
23 105
 \item{value}{value associated to GMQL dataset}
24 106
 
25 107
 \item{m_predicate}{logical predicate made up by R logical operation 
... ...
@@ -53,10 +135,6 @@ considering semi_join IN semi_join_dataset}
53 135
 
54 136
 \item{semi_join_dataset}{GMQLDataset class object}
55 137
 
56
-\item{x}{GMQLDataset class object}
57
-
58
-\item{y}{GMQLDataset class object}
59
-
60 138
 \item{x}{GMQLDataset class object}
61 139
 }
62 140
 \value{
... ...
@@ -67,6 +145,28 @@ GMQLDataset class object. It contains the value to use as input
67 145
 for the subsequent GMQL function
68 146
 }
69 147
 \description{
148
+It takes as input a dataset containing one or more samples and returns 
149
+another dataset (with a single sample, if no \emph{groupby} option is 
150
+specified) by “collapsing” the input dataset samples and their regions 
151
+according to certain rules specified by the input parameters.
152
+The attributes of the output genomic regions are only the region 
153
+coordinates, and Jaccard indexes (JaccardIntersect and JaccardResult).
154
+Jaccard Indexes are standard measures of similarity of the contributing 
155
+regions, added as default region attributes.
156
+The JaccardIntersect index is calculated as the ratio between the lengths 
157
+of the intersection and of the union of the contributing regions; 
158
+the JaccardResult index is calculated as the ratio between the lengths 
159
+of the result and the union of the contributing regions.
160
+If aggregate functions are specified, a new attributes is added for 
161
+each aggregate function specified.
162
+Output metadata are the union of the input ones.
163
+If \emph{groupby} clause is specified, the input samples are partitioned 
164
+in groups, each with distinct values of the grouping metadata attributes, 
165
+and the \emph{cover} operation is separately applied to each group, 
166
+yielding to one sample in the result for each group.
167
+Input samples that do not satisfy the \emph{groupby} condition 
168
+are disregarded.
169
+
70 170
 Abstract class representing GMQL dataset
71 171
 
72 172
 It returns all the samples satisfying the predicate on metadata.
... ...
@@ -78,25 +178,6 @@ attribute defined in semijoin clause with at least one metadata value
78 178
 in common with semi join dataset.
79 179
 If no metadata in common between input dataset and semi join dataset, 
80 180
 no sample is extracted.
81
-
82
-It is used to integrate homogeneous or heterogeneous samples of two datasets 
83
-within a single dataset; for each sample of either input dataset, 
84
-a result sample is created as follows:
85
-\itemize{
86
-\item {Metadata are the same as in the original sample.}
87
-\item {Resulting schema is obtained by projecting the schema 
88
-of the right dataset over the schema of the left one
89
-(more properly, it will be performed by adding to the schema of the 
90
-left dataset the region attributes of the right dataset which are not 
91
-identical to those of the left dataset)}
92
-\item {Regions are the same (in coordinates and attribute values) 
93
-as in the original sample.
94
-Region attributes which are missing in an input dataset sample 
95
-w.r.t. the merged schema are set to null.}
96
-}
97
-For what concerns metadata, attributes of samples from the left (right) 
98
-input dataset are prefixed with the strings LEFT (RIGHT), so as to trace 
99
-the dataset to which they originally belonged.
100 181
 }
101 182
 \section{Slots}{
102 183
 
... ...
@@ -106,6 +187,32 @@ the dataset to which they originally belonged.
106 187
 
107 188
 \examples{
108 189
 
190
+## This statement produces an output dataset with a single output sample. 
191
+## The COVER operation considers all areas defined by a minimum 
192
+## of two overlapping regions in the input samples, 
193
+## up to any amount of overlapping regions.
194
+
195
+init_gmql()
196
+test_path <- system.file("example","DATASET",package = "RGMQL")
197
+exp = read_dataset(test_path)
198
+res = cover(exp, 2, ANY())
199
+
200
+\dontrun{
201
+## This GMQL statement computes the result grouping the input exp samples 
202
+## by the values of their cell metadata attribute, 
203
+## thus one output res sample is generated for each cell type; 
204
+## output regions are produced where at least 2 and at most 3 regions 
205
+## of grouped exp samples overlap, setting as attributes of the resulting 
206
+## regions the minimum pvalue of the overlapping regions (min_pvalue) 
207
+## and their Jaccard indexes (JaccardIntersect and JaccardResult).
208
+
209
+test_path <- system.file("example", "DATASET", package = "RGMQL")
210
+exp = read_dataset(test_path)
211
+res = cover(exp, 2, 3, c("cell"), list(min_pValue = MIN("pvalue")))
212
+}
213
+
214
+
215
+
109 216
 ## It selects from input data samples of patients younger than 70 years old, 
110 217
 ## based on filtering on sample metadata attribute Patient_age
111 218
 
... ...
@@ -140,18 +247,4 @@ TRUE, semi_join_dataset = join_data )
140 247
 
141 248
 }
142 249
 
143
-
144
-## It creates a dataset called full which contains all samples from the 
145
-## datasets data1 and data2 whose schema is defined by merging the two 
146
-## dataset schemas.
147
-## (union of all the attributes present in the two input datasets).
148
-
149
-init_gmql()
150
-test_path <- system.file("example", "DATASET", package = "RGMQL")
151
-test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL")
152
-data1 <- read_dataset(test_path)
153
-data2 <- read_dataset(test_path2)
154
-
155
-res <- union(data1, data2)
156
-
157 250
 }
158 251
deleted file mode 100644
... ...
@@ -1,149 +0,0 @@
1
-% Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/Cover.R
3
-\docType{methods}
4
-\name{cover}
5
-\alias{cover}
6
-\alias{cover,}
7
-\alias{cover-methods}
8
-\alias{cover}
9
-\alias{cover,}
10
-\alias{cover-methods}
11
-\title{GMQL Operation: COVER}
12
-\usage{
13
-cover(data, minAcc, maxAcc, ...)
14
-
15
-\S4method{cover}{GMQLDataset}(data, minAcc, maxAcc, groupBy = NULL,
16
-  aggregates = NULL, variation = "cover")
17
-}
18
-\arguments{
19
-\item{data}{GMQLDataset class object}
20
-
21
-\item{minAcc}{minimum number of overlapping regions to be considered 
22
-during execution
23
-Is a integer number, declared also as string.
24
-minAcc accept also:
25
-\itemize{
26
-\item{PARAMETER class object: \code{\link{ALL}} that represents the number 
27
-of samples in the input dataset}
28
-\item{and expression built using PARAMETER object: (ALL() + N) / K or
29
-ALL() / K }
30
-}}
31
-
32
-\item{maxAcc}{maximum number of overlapping regions to be considered 
33
-during execution
34
-Is a integer number, declared also as string.
35
-maxAcc accept also:
36
-\itemize{
37
-\item{PARAMETER class object: \code{\link{ALL}} that represents the number 
38
-of samples in the input dataset}
39
-\item{PARAMETER calss object: \code{\link{ANY}}} that acts as a wildcard, 
40
-considering any amount of overlapping.
41
-\item{and expression built using PARAMETER object: (ALL() + N) / K or
42
-ALL() / K }
43
-}}
44
-
45
-\item{groupBy}{list of CONDITION objects where every object contains 
46
-the name of metadata to be used in semijoin, or simple string concatenation 
47
-of name of metadata, e.g. c("cell_type", "attribute_tag", "size") 
48
-without declaring condition.
49
-The CONDITION's available are:
50
-\itemize{
51
-\item{\code{\link{FULL}}: Fullname evaluation, two attributes match 
52
-if they both end with value and, if they have a further prefixes,
53
-the two prefix sequence are identical}
54
-\item{\code{\link{EXACT}}: Exact evaluation, only attributes exactly 
55
-as value will match; no further prefixes are allowed. }
56
-}
57
-Every condition accepts only one string value. (e.g. FULL("cell_type") )
58
-In case of single concatenation with no CONDITION or list with some value 
59
-without conditon, the metadata are considered having default 
60
-evaluation: the two attributes match if both end with value.}
61
-
62
-\item{aggregates}{list of element in the form \emph{key} = \emph{aggregate}.
63
-The \emph{aggregate} is an object of class AGGREGATES
64
-The aggregate functions available are: \code{\link{SUM}}, 
65
-\code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, 
66
-\code{\link{AVG}}, \code{\link{MEDIAN}}, \code{\link{STD}}, 
67
-\code{\link{BAG}}, \code{\link{BAGD}}, \code{\link{Q1}}, 
68
-\code{\link{Q2}}, \code{\link{Q3}}.
69
-Every aggregate accepts a string value, execet for COUNT, which does not 
70
-have any value.
71
-Argument of 'aggregate function' must exist in schema, i.e. among region 
72
-attributes. Two style are allowed:
73
-\itemize{
74
-\item list of key-value pairs: e.g. sum = SUM("pvalue")
75
-\item list of values: e.g. SUM("pvalue")
76
-}
77
-"mixed style" is not allowed}
78
-
79
-\item{variation}{string identifying the cover GMQL function variation.
80
-The admissible string are:
81
-\itemize{
82
-\item{flat: returns the contiguous region that starts from the first end 
83
-and stops at the last end of the regions which would contribute 
84
-to each region of the \emph{cover}.}
85
-\item{summit: returns regions that start from a position
86
-where the number of intersecting regions is not increasing afterwards and
87
-stops at a position where either the number of intersecting regions 
88
-decreases, or it violates the max accumulation index.}
89
-\item{histogram: returns the non-overlapping regions contributing to 
90
-the cover, each with its accumulation index value, which is assigned to 
91
-the AccIndex region attribute.}
92
-\item{cover: default value.}
93
-}}
94
-}
95
-\value{
96
-GMQLDataset class object. It contains the value to use as input 
97
-for the subsequent GMQL function
98
-}
99
-\description{
100
-It takes as input a dataset containing one or more samples and returns 
101
-another dataset (with a single sample, if no \emph{groupby} option is 
102
-specified) by “collapsing” the input dataset samples and their regions 
103
-according to certain rules specified by the input parameters.
104
-The attributes of the output genomic regions are only the region 
105
-coordinates, and Jaccard indexes (JaccardIntersect and JaccardResult).
106
-Jaccard Indexes are standard measures of similarity of the contributing 
107
-regions, added as default region attributes.
108
-The JaccardIntersect index is calculated as the ratio between the lengths 
109
-of the intersection and of the union of the contributing regions; 
110
-the JaccardResult index is calculated as the ratio between the lengths 
111
-of the result and the union of the contributing regions.
112
-If aggregate functions are specified, a new attributes is added for 
113
-each aggregate function specified.
114
-Output metadata are the union of the input ones.
115
-If \emph{groupby} clause is specified, the input samples are partitioned 
116
-in groups, each with distinct values of the grouping metadata attributes, 
117
-and the \emph{cover} operation is separately applied to each group, 
118
-yielding to one sample in the result for each group.
119
-Input samples that do not satisfy the \emph{groupby} condition 
120
-are disregarded.
121
-}
122
-\examples{
123
-
124
-## This statement produces an output dataset with a single output sample. 
125
-## The COVER operation considers all areas defined by a minimum 
126
-## of two overlapping regions in the input samples, 
127
-## up to any amount of overlapping regions.
128
-
129
-init_gmql()
130
-test_path <- system.file("example","DATASET",package = "RGMQL")
131
-exp = read_dataset(test_path)
132
-res = cover(exp, 2, ANY())
133
-
134
-\dontrun{
135
-## This GMQL statement computes the result grouping the input exp samples 
136
-## by the values of their cell metadata attribute, 
137
-## thus one output res sample is generated for each cell type; 
138
-## output regions are produced where at least 2 and at most 3 regions 
139
-## of grouped exp samples overlap, setting as attributes of the resulting 
140
-## regions the minimum pvalue of the overlapping regions (min_pvalue) 
141
-## and their Jaccard indexes (JaccardIntersect and JaccardResult).
142
-
143
-test_path <- system.file("example", "DATASET", package = "RGMQL")
144
-exp = read_dataset(test_path)
145
-res = cover(exp, 2, 3, c("cell"), list(min_pValue = MIN("pvalue")))
146
-}
147
-
148
-
149
-}
150 0
new file mode 100644
... ...
@@ -0,0 +1,56 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/Union.R
3
+\docType{methods}
4
+\name{union,GMQLDataset,GMQLDataset-method}
5
+\alias{union,GMQLDataset,GMQLDataset-method}
6
+\alias{union,}
7
+\alias{union-method}
8
+\title{GMQL Operation: UNION}
9
+\usage{
10
+\S4method{union}{GMQLDataset,GMQLDataset}(x, y)
11
+}
12
+\arguments{
13
+\item{x}{GMQLDataset class object}
14
+
15
+\item{y}{GMQLDataset class object}
16
+}
17
+\value{
18
+GMQLDataset class object. It contains the value to use as input 
19
+for the subsequent GMQL function
20
+}
21
+\description{
22
+It is used to integrate homogeneous or heterogeneous samples of two datasets 
23
+within a single dataset; for each sample of either input dataset, 
24
+a result sample is created as follows:
25
+\itemize{
26
+\item {Metadata are the same as in the original sample.}
27
+\item {Resulting schema is obtained by projecting the schema 
28
+of the right dataset over the schema of the left one
29
+(more properly, it will be performed by adding to the schema of the 
30
+left dataset the region attributes of the right dataset which are not 
31
+identical to those of the left dataset)}
32
+\item {Regions are the same (in coordinates and attribute values) 
33
+as in the original sample.
34
+Region attributes which are missing in an input dataset sample 
35
+w.r.t. the merged schema are set to null.}
36
+}
37
+For what concerns metadata, attributes of samples from the left (right) 
38
+input dataset are prefixed with the strings LEFT (RIGHT), so as to trace 
39
+the dataset to which they originally belonged.
40
+}
41
+\examples{
42
+
43
+## It creates a dataset called full which contains all samples from the 
44
+## datasets data1 and data2 whose schema is defined by merging the two 
45
+## dataset schemas.
46
+## (union of all the attributes present in the two input datasets).
47
+
48
+init_gmql()
49
+test_path <- system.file("example", "DATASET", package = "RGMQL")
50
+test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL")
51
+data1 <- read_dataset(test_path)
52
+data2 <- read_dataset(test_path2)
53
+
54
+res <- union(data1, data2)
55
+
56
+}