... | ... |
@@ -1,11 +1,11 @@ |
1 | 1 |
Package: RGMQL |
2 | 2 |
Type: Package |
3 | 3 |
Title: GenoMetric Query Language for R/Bioconductor |
4 |
-Version: 0.99.23 |
|
4 |
+Version: 0.99.24 |
|
5 | 5 |
Author: Simone Pallotta, Marco Masseroli |
6 | 6 |
Maintainer: Simone Pallotta <simonepallotta@hotmail.com> |
7 |
-Description: This RGMQL package brings the GenoMetric Query Language (GMQL) functionalities |
|
8 |
- into the R environment. |
|
7 |
+Description: This RGMQL package brings the GenoMetric Query Language (GMQL) |
|
8 |
+ functionalities into the R environment. |
|
9 | 9 |
GMQL is a high-level, declarative language to query and compare multiple and heterogeneous genomic |
10 | 10 |
datasets for biomedical knowledge discovery. It allows expressing easily queries |
11 | 11 |
and processing over genomic regions and their metadata, in a way similar to |
... | ... |
@@ -55,7 +55,7 @@ LazyData: true |
55 | 55 |
RoxygenNote: 6.0.1 |
56 | 56 |
Imports: httr, rJava,GenomicRanges, rtracklayer, data.table, utils, plyr, xml2, |
57 | 57 |
methods, S4Vectors, dplyr, stats |
58 |
-Depends: R(>= 3.4.2) |
|
58 |
+Depends: R(<= 3.4.2) |
|
59 | 59 |
VignetteBuilder: knitr |
60 | 60 |
Suggests: BiocStyle, knitr, rmarkdown |
61 | 61 |
biocViews: Software,Infrastructure,DataImport,Network |
... | ... |
@@ -4,8 +4,8 @@ |
4 | 4 |
#' another dataset (with a single sample, if no \emph{groupby} option is |
5 | 5 |
#' specified) by “collapsing” the input dataset samples and their regions |
6 | 6 |
#' according to certain rules specified by the input parameters. |
7 |
-#' The attributes of the output genomic regions are only the region coordinates, |
|
8 |
-#' and Jaccard indexes (JaccardIntersect and JaccardResult). |
|
7 |
+#' The attributes of the output genomic regions are only the region |
|
8 |
+#' coordinates, and Jaccard indexes (JaccardIntersect and JaccardResult). |
|
9 | 9 |
#' Jaccard Indexes are standard measures of similarity of the contributing |
10 | 10 |
#' regions, added as default region attributes. |
11 | 11 |
#' The JaccardIntersect index is calculated as the ratio between the lengths |
... | ... |
@@ -70,9 +70,10 @@ |
70 | 70 |
#' @param aggregates list of element in the form \emph{key} = \emph{aggregate}. |
71 | 71 |
#' The \emph{aggregate} is an object of class AGGREGATES |
72 | 72 |
#' The aggregate functions available are: \code{\link{SUM}}, |
73 |
-#' \code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, \code{\link{AVG}}, |
|
74 |
-#' \code{\link{MEDIAN}}, \code{\link{STD}}, \code{\link{BAG}}, |
|
75 |
-#' \code{\link{BAGD}}, \code{\link{Q1}}, \code{\link{Q2}}, \code{\link{Q3}}. |
|
73 |
+#' \code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, |
|
74 |
+#' \code{\link{AVG}}, \code{\link{MEDIAN}}, \code{\link{STD}}, |
|
75 |
+#' \code{\link{BAG}}, \code{\link{BAGD}}, \code{\link{Q1}}, |
|
76 |
+#' \code{\link{Q2}}, \code{\link{Q3}}. |
|
76 | 77 |
#' Every aggregate accepts a string value, execet for COUNT, which does not |
77 | 78 |
#' have any value. |
78 | 79 |
#' Argument of 'aggregate function' must exist in schema, i.e. among region |
... | ... |
@@ -181,9 +182,10 @@ cover <- function(input_data, minAcc, maxAcc, groupBy = NULL, |
181 | 182 |
#' @param aggregates list of element in the form \emph{key} = \emph{aggregate}. |
182 | 183 |
#' The \emph{aggregate} is an object of class AGGREGATES |
183 | 184 |
#' The aggregate functions available are: \code{\link{SUM}}, |
184 |
-#' \code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, \code{\link{AVG}}, |
|
185 |
-#' \code{\link{MEDIAN}}, \code{\link{STD}}, \code{\link{BAG}}, |
|
186 |
-#' \code{\link{BAGD}}, \code{\link{Q1}}, \code{\link{Q2}}, \code{\link{Q3}}. |
|
185 |
+#' \code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, |
|
186 |
+#' \code{\link{AVG}}, \code{\link{MEDIAN}}, \code{\link{STD}}, |
|
187 |
+#' \code{\link{BAG}}, \code{\link{BAGD}}, \code{\link{Q1}}, |
|
188 |
+#' \code{\link{Q2}}, \code{\link{Q3}}. |
|
187 | 189 |
#' Every aggregate accepts a string value, execet for COUNT, which does not |
188 | 190 |
#' have any value. |
189 | 191 |
#' Argument of 'aggregate function' must exist in schema, i.e. among region |
... | ... |
@@ -282,9 +284,10 @@ histogram <- function(input_data, minAcc, maxAcc, groupBy = NULL, |
282 | 284 |
#' @param aggregates list of element in the form \emph{key} = \emph{aggregate}. |
283 | 285 |
#' The \emph{aggregate} is an object of class AGGREGATES |
284 | 286 |
#' The aggregate functions available are: \code{\link{SUM}}, |
285 |
-#' \code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, \code{\link{AVG}}, |
|
286 |
-#' \code{\link{MEDIAN}}, \code{\link{STD}}, \code{\link{BAG}}, |
|
287 |
-#' \code{\link{BAGD}}, \code{\link{Q1}}, \code{\link{Q2}}, \code{\link{Q3}}. |
|
287 |
+#' \code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, |
|
288 |
+#' \code{\link{AVG}}, \code{\link{MEDIAN}}, \code{\link{STD}}, |
|
289 |
+#' \code{\link{BAG}}, \code{\link{BAGD}}, \code{\link{Q1}}, |
|
290 |
+#' \code{\link{Q2}}, \code{\link{Q3}}. |
|
288 | 291 |
#' Every aggregate accepts a string value, execet for COUNT, which does not |
289 | 292 |
#' have any value. |
290 | 293 |
#' Argument of 'aggregate function' must exist in schema, i.e. among region |
... | ... |
@@ -380,9 +383,10 @@ summit <- function(input_data, minAcc, maxAcc, groupBy = NULL, |
380 | 383 |
#' @param aggregates list of element in the form \emph{key} = \emph{aggregate}. |
381 | 384 |
#' The \emph{aggregate} is an object of class AGGREGATES |
382 | 385 |
#' The aggregate functions available are: \code{\link{SUM}}, |
383 |
-#' \code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, \code{\link{AVG}}, |
|
384 |
-#' \code{\link{MEDIAN}}, \code{\link{STD}}, \code{\link{BAG}}, |
|
385 |
-#' \code{\link{BAGD}}, \code{\link{Q1}}, \code{\link{Q2}}, \code{\link{Q3}}. |
|
386 |
+#' \code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, |
|
387 |
+#' \code{\link{AVG}}, \code{\link{MEDIAN}}, \code{\link{STD}}, |
|
388 |
+#' \code{\link{BAG}}, \code{\link{BAGD}}, \code{\link{Q1}}, |
|
389 |
+#' \code{\link{Q2}}, \code{\link{Q3}}. |
|
386 | 390 |
#' Every aggregate accepts a string value, execet for COUNT, which does not |
387 | 391 |
#' have any value. |
388 | 392 |
#' Argument of 'aggregate function' must exist in schema, i.e. among region |
... | ... |
@@ -1,9 +1,9 @@ |
1 | 1 |
#' GMQL Operation: DIFFERENCE |
2 | 2 |
#' |
3 | 3 |
#' It produces one sample in the result for each sample of the left operand, |
4 |
-#' by keeping the same metadata of the left input sample and only those regions |
|
5 |
-#' (with their schema and values) of the left input sample which do not |
|
6 |
-#' intersect with any region in the right operand sample. |
|
4 |
+#' by keeping the same metadata of the left input sample and only those |
|
5 |
+#' regions (with their schema and values) of the left input sample which |
|
6 |
+#' do not intersect with any region in the right operand sample. |
|
7 | 7 |
#' The optional \emph{joinby} clause is used to extract a subset of couples |
8 | 8 |
#' from the cartesian product of two dataset |
9 | 9 |
#' \emph{left_input_data} x \emph{right_input_data} on which to apply |
... | ... |
@@ -13,9 +13,10 @@ |
13 | 13 |
#' @param metadata list of element in the form \emph{key} = \emph{aggregate}. |
14 | 14 |
#' The \emph{aggregate} is an object of class AGGREGATES |
15 | 15 |
#' The aggregate functions available are: \code{\link{SUM}}, |
16 |
-#' \code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, \code{\link{AVG}}, |
|
17 |
-#' \code{\link{MEDIAN}}, \code{\link{STD}}, \code{\link{BAG}}, |
|
18 |
-#' \code{\link{BAGD}}, \code{\link{Q1}}, \code{\link{Q2}}, \code{\link{Q3}}. |
|
16 |
+#' \code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, |
|
17 |
+#' \code{\link{AVG}}, \code{\link{MEDIAN}}, \code{\link{STD}}, |
|
18 |
+#' \code{\link{BAG}}, \code{\link{BAGD}}, \code{\link{Q1}}, |
|
19 |
+#' \code{\link{Q2}}, \code{\link{Q3}}. |
|
19 | 20 |
#' Every aggregate accepts a string value, execet for COUNT, which does not |
20 | 21 |
#' have any value. |
21 | 22 |
#' Argument of 'aggregate function' must exist in schema, i.e. among region |
... | ... |
@@ -108,7 +108,7 @@ filter_and_extract <- function(data, metadata = NULL, |
108 | 108 |
} |
109 | 109 |
|
110 | 110 |
.extract_from_GRangesList <- function(rangesList, metadata = NULL, |
111 |
- metadata_prefix = NULL, regions = NULL) |
|
111 |
+ metadata_prefix = NULL, regions = NULL) |
|
112 | 112 |
{ |
113 | 113 |
if(!is(rangesList,"GRangesList")) |
114 | 114 |
stop("only GrangesList admitted") |
... | ... |
@@ -146,7 +146,7 @@ export_gmql <- function(samples, dir_out, is_gtf) |
146 | 146 |
names(columns) <- plyr::revalue(names(columns),c(type = "feature", |
147 | 147 |
phase = "frame")) |
148 | 148 |
fixed_element = c(seqname = "character", source = "character", |
149 |
- feature = "character",start = "long", end = "long", |
|
149 |
+ feature = "character",start = "long", end = "long", |
|
150 | 150 |
score = "numeric", strand = "character", |
151 | 151 |
frame = "character") |
152 | 152 |
node_list <- c(fixed_element, columns) |
... | ... |
@@ -54,8 +54,8 @@ |
54 | 54 |
#' \item{contig: outputs the concatenation between the left_input_data and |
55 | 55 |
#' right_input_data regions that satisfy the genometric predicate, |
56 | 56 |
#' (i.e. the output regionis defined as having left (right) coordinates |
57 |
-#' equal to the minimum (maximum) of the corresponding coordinate values in the |
|
58 |
-#' left_input_data and right_input_data regions satisfying |
|
57 |
+#' equal to the minimum (maximum) of the corresponding coordinate values |
|
58 |
+#' in the left_input_data and right_input_data regions satisfying |
|
59 | 59 |
#' the genometric predicate)} |
60 | 60 |
#' } |
61 | 61 |
#' |
... | ... |
@@ -65,12 +65,12 @@ |
65 | 65 |
#' |
66 | 66 |
#' @examples |
67 | 67 |
#' |
68 |
-#' ## Given a dataset 'hm' and one called 'tss' with a sample including |
|
69 |
-#' ## Transcription Start Site annotations, it searches for those regions of hm |
|
70 |
-#' ## that are at a minimal distance from a transcription start site (TSS) |
|
71 |
-#' ## and takes the first/closest one for each TSS, provided that such distance |
|
72 |
-#' ## is lesser than 120K bases and joined 'tss' and 'hm' samples are obtained |
|
73 |
-#' ## from the same provider (joinby clause). |
|
68 |
+#' # Given a dataset 'hm' and one called 'tss' with a sample including |
|
69 |
+#' # Transcription Start Site annotations, it searches for those regions of hm |
|
70 |
+#' # that are at a minimal distance from a transcription start site (TSS) |
|
71 |
+#' # and takes the first/closest one for each TSS, provided that such distance |
|
72 |
+#' # is lesser than 120K bases and joined 'tss' and 'hm' samples are obtained |
|
73 |
+#' # from the same provider (joinby clause). |
|
74 | 74 |
#' |
75 | 75 |
#' init_gmql() |
76 | 76 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
... | ... |
@@ -83,8 +83,9 @@ |
83 | 83 |
#' |
84 | 84 |
#' @export |
85 | 85 |
#' |
86 |
-join <- function(right_input_data, left_input_data, genometric_predicate = NULL, |
|
87 |
- joinBy = NULL, region_output="contig") |
|
86 |
+join <- function(right_input_data, left_input_data, |
|
87 |
+ genometric_predicate = NULL, joinBy = NULL, |
|
88 |
+ region_output="contig") |
|
88 | 89 |
{ |
89 | 90 |
if(!is.null(genometric_predicate)) |
90 | 91 |
{ |
... | ... |
@@ -6,8 +6,8 @@ |
6 | 6 |
#' The number of generated output samples is the Cartesian product |
7 | 7 |
#' of the samples in the two input datasets; |
8 | 8 |
#' each output sample has the same regions as the related input left sample, |
9 |
-#' with their attributes and values, plus the attributes computed as aggregates |
|
10 |
-#' over right region values. |
|
9 |
+#' with their attributes and values, plus the attributes computed as |
|
10 |
+#' aggregates over right region values. |
|
11 | 11 |
#' Output sample metadata are the union of the related input sample metadata, |
12 | 12 |
#' whose attribute names are prefixed with "left" or "right" respectively. |
13 | 13 |
#' |
... | ... |
@@ -24,9 +24,10 @@ |
24 | 24 |
#' @param aggregates list of element in the form \emph{key} = \emph{aggregate}. |
25 | 25 |
#' The \emph{aggregate} is an object of class AGGREGATES |
26 | 26 |
#' The aggregate functions available are: \code{\link{SUM}}, |
27 |
-#' \code{\link{COUNT}},\code{\link{MIN}}, \code{\link{MAX}}, \code{\link{AVG}}, |
|
28 |
-#' \code{\link{STD}}, \code{\link{MEDIAN}},\code{\link{BAG}}, |
|
29 |
-#' \code{\link{BAGD}}, \code{\link{Q1}}, \code{\link{Q2}}, \code{\link{Q3}}. |
|
27 |
+#' \code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, |
|
28 |
+#' \code{\link{AVG}}, \code{\link{MEDIAN}}, \code{\link{STD}}, |
|
29 |
+#' \code{\link{BAG}}, \code{\link{BAGD}}, \code{\link{Q1}}, |
|
30 |
+#' \code{\link{Q2}}, \code{\link{Q3}}. |
|
30 | 31 |
#' Every aggregate accepts a string value, execet for COUNT |
31 | 32 |
#' Argument of 'aggregate' must exist in schema |
32 | 33 |
#' Two style are allowed: |
... | ... |
@@ -59,21 +60,22 @@ |
59 | 60 |
#' |
60 | 61 |
#' @examples |
61 | 62 |
#' |
62 |
-#' ## It counts the number of regions in each sample from exp that overlap with |
|
63 |
-#' ## a ref region, and for each ref region it computes the minimum score |
|
64 |
-#' ## of all the regions in each exp sample that overlap with it. |
|
65 |
-#' ## The MAP joinby option ensures that only the exp samples referring to |
|
66 |
-#' ## the same 'cell_tissue' of a ref sample are mapped on such ref sample; |
|
67 |
-#' ## exp samples with no cell_tissue metadata attribute, or with such metadata |
|
68 |
-#' ## but with a different value from the one(s) of ref sample(s), |
|
69 |
-#' ## are disregarded. |
|
63 |
+#' # It counts the number of regions in each sample from exp that overlap with |
|
64 |
+#' # a ref region, and for each ref region it computes the minimum score |
|
65 |
+#' # of all the regions in each exp sample that overlap with it. |
|
66 |
+#' # The MAP joinby option ensures that only the exp samples referring to |
|
67 |
+#' # the same 'cell_tissue' of a ref sample are mapped on such ref sample; |
|
68 |
+#' # exp samples with no cell_tissue metadata attribute, or with such metadata |
|
69 |
+#' # but with a different value from the one(s) of ref sample(s), |
|
70 |
+#' # are disregarded. |
|
70 | 71 |
#' |
71 | 72 |
#' init_gmql() |
72 | 73 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
73 | 74 |
#' test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
74 | 75 |
#' exp = read_dataset(test_path) |
75 | 76 |
#' ref = read_dataset(test_path2) |
76 |
-#' out = map(ref,exp, list(minScore = MIN("score")), joinBy = c("cell_tissue")) |
|
77 |
+#' out = map(ref,exp, list(minScore = MIN("score")), |
|
78 |
+#' joinBy = c("cell_tissue")) |
|
77 | 79 |
#' |
78 | 80 |
#' |
79 | 81 |
#' @export |
... | ... |
@@ -38,10 +38,10 @@ |
38 | 38 |
#' |
39 | 39 |
#' @examples |
40 | 40 |
#' |
41 |
-#' ## It creates a dataset called merged which contains one sample for each |
|
42 |
-#' ## antibody_target value found within the metadata of the exp dataset sample; |
|
43 |
-#' ## each created sample contains all regions from all 'exp' samples |
|
44 |
-#' ## with a specific value for their antibody_target metadata attribute. |
|
41 |
+#' # It creates a dataset called merged which contains one sample for each |
|
42 |
+#' # antibody_target value found within the metadata of the exp dataset sample; |
|
43 |
+#' # each created sample contains all regions from all 'exp' samples |
|
44 |
+#' # with a specific value for their antibody_target metadata attribute. |
|
45 | 45 |
#' |
46 | 46 |
#' init_gmql() |
47 | 47 |
#' test_path <- system.file("example","DATASET",package = "RGMQL") |
... | ... |
@@ -73,7 +73,7 @@ order <- function(input_data, metadata_ordering = NULL, mtop = 0, mtopg = 0, |
73 | 73 |
|
74 | 74 |
if(length(mtop)>1 || length(mtopg)>1 || length(rtop)>1 || length(rtopg)>1 |
75 | 75 |
|| length(mtopp)>1 || length(rtopp)>1) |
76 |
- warning("only first element is taken by rtop, mtop, mtopg, rtopg, rtopp, mtopp") |
|
76 |
+ warning("only first element: rtop, mtop, mtopg, rtopg, rtopp, mtopp") |
|
77 | 77 |
|
78 | 78 |
# we consider only the first element even if input is a vector of Int |
79 | 79 |
# we cut the other arguments |
... | ... |
@@ -234,7 +234,7 @@ We provide two metadata for you") |
234 | 234 |
if("phase" %in% col_names) # if GTF, change |
235 | 235 |
{ |
236 | 236 |
col_names <- plyr::revalue(col_names,c(type = "feature", |
237 |
- phase = "frame", seqnames = "seqname")) |
|
237 |
+ phase = "frame", seqnames = "seqname")) |
|
238 | 238 |
schema_matrix <- cbind(toupper(col_types),col_names) |
239 | 239 |
schema_matrix<- schema_matrix[setdiff(rownames(schema_matrix), |
240 | 240 |
c("group","width")),] |
... | ... |
@@ -264,9 +264,11 @@ We provide two metadata for you") |
264 | 264 |
{ |
265 | 265 |
parser <- toupper(parser) |
266 | 266 |
if(!identical(parser,"BEDPARSER") && !identical(parser,"ANNPARSER") && |
267 |
- !identical(parser,"BROADPROJPARSER") && !identical(parser,"BASICPARSER") |
|
268 |
- && !identical(parser,"NARROWPEAKPARSER") && |
|
269 |
- !identical(parser,"RNASEQPARSER") && !identical(parser,"CUSTOMPARSER")) |
|
267 |
+ !identical(parser,"BROADPROJPARSER") && |
|
268 |
+ !identical(parser,"BASICPARSER") && |
|
269 |
+ !identical(parser,"NARROWPEAKPARSER") && |
|
270 |
+ !identical(parser,"RNASEQPARSER") && |
|
271 |
+ !identical(parser,"CUSTOMPARSER")) |
|
270 | 272 |
stop("parser not defined") |
271 | 273 |
|
272 | 274 |
parser |
... | ... |
@@ -132,8 +132,9 @@ Function will be invoked with these parameters as NULL") |
132 | 132 |
dispatch = TRUE) |
133 | 133 |
} |
134 | 134 |
WrappeR <- J("it/polimi/genomics/r/Wrapper") |
135 |
- response <- WrappeR$select(predicate,region_predicate,join_condition_matrix, |
|
136 |
- semi_join_dataset, semi_join_negation, input_data$value) |
|
135 |
+ response <- WrappeR$select(predicate,region_predicate, |
|
136 |
+ join_condition_matrix, semi_join_dataset, |
|
137 |
+ semi_join_negation, input_data$value) |
|
137 | 138 |
error <- strtoi(response[1]) |
138 | 139 |
data <- response[2] |
139 | 140 |
if(error!=0) |
... | ... |
@@ -40,7 +40,7 @@ |
40 | 40 |
names <- names(meta_data) |
41 | 41 |
if(is.null(names)) |
42 | 42 |
{ |
43 |
- warning("You did not assign a names to a list.\nWe build names for you") |
|
43 |
+ warning("You did not assign a names to a list.\nWe build it for you") |
|
44 | 44 |
names <- sapply(meta_data, take_value.META_AGGREGATES) |
45 | 45 |
} |
46 | 46 |
else |
... | ... |
@@ -42,10 +42,10 @@ if(getRversion() >= "3.1.0") |
42 | 42 |
login_gmql <- function(url, username = NULL, password = NULL) |
43 | 43 |
{ |
44 | 44 |
as_guest <- TRUE |
45 |
- |
|
45 |
+ |
|
46 | 46 |
if(!is.null(username) || !is.null(password)) |
47 | 47 |
as_guest <- FALSE |
48 |
- |
|
48 |
+ |
|
49 | 49 |
if(as_guest) |
50 | 50 |
{ |
51 | 51 |
h <- c('Accept' = "Application/json") |
... | ... |
@@ -55,8 +55,8 @@ check.CONDITION <- function(value) |
55 | 55 |
#' |
56 | 56 |
#' ## select with condition |
57 | 57 |
#' ## the first and the third attribute are DEF the second one is EXACT |
58 |
-#' s = select(r, semi_join = list("cell_type", EXACT("cell"), "attribute_tag"), |
|
59 |
-#' semi_join_dataset = r) |
|
58 |
+#' s = select(r, semi_join = list("cell_type", EXACT("cell"), |
|
59 |
+#' "attribute_tag"), semi_join_dataset = r) |
|
60 | 60 |
#' |
61 | 61 |
#' \dontrun{ |
62 | 62 |
#' |
... | ... |
@@ -413,8 +413,8 @@ download_as_GRangesList <- function(url,datasetName) |
413 | 413 |
|
414 | 414 |
#' Shows metadata list from dataset sample |
415 | 415 |
#' |
416 |
-#' It retrieves metadata for a specific sample in dataset using the proper GMQL |
|
417 |
-#' web service available on a remote server |
|
416 |
+#' It retrieves metadata for a specific sample in dataset using the proper |
|
417 |
+#' GMQL web service available on a remote server |
|
418 | 418 |
#' |
419 | 419 |
#' @import httr |
420 | 420 |
#' |
... | ... |
@@ -463,7 +463,8 @@ sample_metadata <- function(url, datasetName,sampleName) |
463 | 463 |
#' |
464 | 464 |
#' It retrieves regions for a specific sample |
465 | 465 |
#' (whose name is specified in the paramter "sampleName") |
466 |
-#' in a specific dataset (whose name is specified in the paramter "datasetName") |
|
466 |
+#' in a specific dataset |
|
467 |
+#' (whose name is specified in the parameter "datasetName") |
|
467 | 468 |
#' using the proper GMQL web service available on a remote server |
468 | 469 |
#' |
469 | 470 |
#' @import httr |
... | ... |
@@ -25,7 +25,7 @@ check.DISTAL <- function(value) |
25 | 25 |
{ |
26 | 26 |
if(!is.numeric(value)) |
27 | 27 |
stop("value: is not a numeric") |
28 |
- |
|
28 |
+ |
|
29 | 29 |
if(is.numeric(value) && length(value)>1) |
30 | 30 |
stop("value: no multiple string") |
31 | 31 |
} |
... | ... |
@@ -49,12 +49,12 @@ check.DISTAL <- function(value) |
49 | 49 |
#' |
50 | 50 |
#' @examples |
51 | 51 |
#' |
52 |
-#' ### Given a dataset HM and one called TSS with a sample including |
|
53 |
-#' ## Transcription Start Site annotations, it searches for those regions of hm |
|
54 |
-#' ## that are at a minimal distance from a transcription start site (TSS) |
|
55 |
-#' ## and takes the first/closest one for each TSS, |
|
56 |
-#' ## provided that such distance is lesser than 1200 bases and joined TSS |
|
57 |
-#' ## and HM samples are obtained from the same provider (joinby clause). |
|
52 |
+#' ## Given a dataset HM and one called TSS with a sample including |
|
53 |
+#' # Transcription Start Site annotations, it searches for those regions of hm |
|
54 |
+#' # that are at a minimal distance from a transcription start site (TSS) |
|
55 |
+#' # and takes the first/closest one for each TSS, |
|
56 |
+#' # provided that such distance is lesser than 1200 bases and joined TSS |
|
57 |
+#' # and HM samples are obtained from the same provider (joinby clause). |
|
58 | 58 |
#' |
59 | 59 |
#' init_gmql() |
60 | 60 |
#' test_path <- system.file("example","DATASET",package = "RGMQL") |
... | ... |
@@ -65,13 +65,11 @@ check.DISTAL <- function(value) |
65 | 65 |
#' genometric_predicate = list(list(MD(1), DL(1200))), c("provider"), |
66 | 66 |
#' region_output = "RIGHT") |
67 | 67 |
#' |
68 |
-#' |
|
69 | 68 |
#' @export |
70 | 69 |
#' |
71 | 70 |
DL <- function(value) |
72 | 71 |
{ |
73 | 72 |
check.DISTAL(value) |
74 |
- |
|
75 | 73 |
list <- list(value = as.integer(value)) |
76 | 74 |
## Set the name for the class |
77 | 75 |
class(list) <- c("DL","DISTAL") |
... | ... |
@@ -96,12 +94,12 @@ DL <- function(value) |
96 | 94 |
#' |
97 | 95 |
#' @examples |
98 | 96 |
#' |
99 |
-#' ### Given a dataset HM and one called TSS with a sample including |
|
100 |
-#' ## Transcription Start Site annotations, it searches for those regions of hm |
|
101 |
-#' ## that are at a minimal distance from a transcription start site 'TSS' |
|
102 |
-#' ## and takes the first/closest one for each TSS, provided that such distance |
|
103 |
-#' ## is greater than 12 bases and joined TSS and HM samples are obtained |
|
104 |
-#' ## from the same provider 'joinby clause'. |
|
97 |
+#' ## Given a dataset HM and one called TSS with a sample including |
|
98 |
+#' # Transcription Start Site annotations, it searches for those regions of hm |
|
99 |
+#' # that are at a minimal distance from a transcription start site 'TSS' |
|
100 |
+#' # and takes the first/closest one for each TSS, provided that such distance |
|
101 |
+#' # is greater than 12 bases and joined TSS and HM samples are obtained |
|
102 |
+#' # from the same provider 'joinby clause'. |
|
105 | 103 |
#' |
106 | 104 |
#' init_gmql() |
107 | 105 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
... | ... |
@@ -118,7 +116,6 @@ DL <- function(value) |
118 | 116 |
DG <- function(value) |
119 | 117 |
{ |
120 | 118 |
check.DISTAL(value) |
121 |
- |
|
122 | 119 |
list <- list(value = as.integer(value)) |
123 | 120 |
## Set the name for the class |
124 | 121 |
class(list) <- c("DG","DISTAL") |
... | ... |
@@ -149,12 +146,12 @@ DG <- function(value) |
149 | 146 |
#' |
150 | 147 |
#' @examples |
151 | 148 |
#' |
152 |
-#' ### Given a dataset HM and one called TSS with a sample including |
|
153 |
-#' ## Transcription Start Site annotations, it searches for those regions of hm |
|
154 |
-#' ## that are at a minimal distance from a transcription start site (TSS) |
|
155 |
-#' ## and takes the first/closest one for each TSS, provided that such distance |
|
156 |
-#' ## is lesser than 120K bases and joined TSS and HM samples are obtained |
|
157 |
-#' ## from the same provider (joinby clause). |
|
149 |
+#' ## Given a dataset HM and one called TSS with a sample including |
|
150 |
+#' # Transcription Start Site annotations, it searches for those regions of hm |
|
151 |
+#' # that are at a minimal distance from a transcription start site (TSS) |
|
152 |
+#' # and takes the first/closest one for each TSS, provided that such distance |
|
153 |
+#' # is lesser than 120K bases and joined TSS and HM samples are obtained |
|
154 |
+#' # from the same provider (joinby clause). |
|
158 | 155 |
#' |
159 | 156 |
#' init_gmql() |
160 | 157 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
... | ... |
@@ -171,7 +168,6 @@ DG <- function(value) |
171 | 168 |
DLE <- function(value) |
172 | 169 |
{ |
173 | 170 |
check.DISTAL(value) |
174 |
- |
|
175 | 171 |
list <- list(value = as.integer(value)) |
176 | 172 |
## Set the name for the class |
177 | 173 |
class(list) <- c("DLE","DISTAL") |
... | ... |
@@ -198,12 +194,12 @@ DLE <- function(value) |
198 | 194 |
#' |
199 | 195 |
#' @examples |
200 | 196 |
#' |
201 |
-#' ## Given a dataset 'hm' and one called 'tss' with a sample including |
|
202 |
-#' ## Transcription Start Site annotations, it searches for those regions of hm |
|
203 |
-#' ## that are at a minimal distance from a transcription start site (TSS) |
|
204 |
-#' ## and takes the first/closest one for each TSS, provided that such distance |
|
205 |
-#' ## is greater than 120K bases and joined 'tss' and 'hm' samples are obtained |
|
206 |
-#' ## from the same provider (joinby clause). |
|
197 |
+#' # Given a dataset 'hm' and one called 'tss' with a sample including |
|
198 |
+#' # Transcription Start Site annotations, it searches for those regions of hm |
|
199 |
+#' # that are at a minimal distance from a transcription start site (TSS) |
|
200 |
+#' # and takes the first/closest one for each TSS, provided that such distance |
|
201 |
+#' # is greater than 120K bases and joined 'tss' and 'hm' samples are obtained |
|
202 |
+#' # from the same provider (joinby clause). |
|
207 | 203 |
#' |
208 | 204 |
#' init_gmql() |
209 | 205 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
... | ... |
@@ -219,7 +215,6 @@ DLE <- function(value) |
219 | 215 |
DGE <- function(value) |
220 | 216 |
{ |
221 | 217 |
check.DISTAL(value) |
222 |
- |
|
223 | 218 |
list <- list(value = as.integer(value)) |
224 | 219 |
## Set the name for the class |
225 | 220 |
class(list) <- c("DGE","DISTAL") |
... | ... |
@@ -249,12 +244,12 @@ DGE <- function(value) |
249 | 244 |
#' @examples |
250 | 245 |
#' |
251 | 246 |
#' |
252 |
-#' ### Given a dataset 'hm' and one called 'tss' with a sample including |
|
253 |
-#' ## Transcription Start Site annotations, it searches for those regions of hm |
|
254 |
-#' ## that are at a minimal distance from a transcription start site (TSS) |
|
255 |
-#' ## and takes the first/closest one for each TSS, |
|
256 |
-#' ## provided that such distance is greater than 120K bases and joined 'tss' |
|
257 |
-#' ## and 'hm' samples are obtained from the same provider (joinby clause). |
|
247 |
+#' # Given a dataset 'hm' and one called 'tss' with a sample including |
|
248 |
+#' # Transcription Start Site annotations, it searches for those regions of hm |
|
249 |
+#' # that are at a minimal distance from a transcription start site (TSS) |
|
250 |
+#' # and takes the first/closest one for each TSS, |
|
251 |
+#' # provided that such distance is greater than 120K bases and joined 'tss' |
|
252 |
+#' # and 'hm' samples are obtained from the same provider (joinby clause). |
|
258 | 253 |
#' |
259 | 254 |
#' init_gmql() |
260 | 255 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
... | ... |
@@ -270,7 +265,6 @@ DGE <- function(value) |
270 | 265 |
MD <- function(value) |
271 | 266 |
{ |
272 | 267 |
check.DISTAL(value) |
273 |
- |
|
274 | 268 |
list <- list(value = as.integer(value)) |
275 | 269 |
## Set the name for the class |
276 | 270 |
class(list) <- c("MD","DISTAL") |
... | ... |
@@ -300,12 +294,12 @@ MD <- function(value) |
300 | 294 |
#' @examples |
301 | 295 |
#' |
302 | 296 |
#' |
303 |
-#' ### Given a dataset 'hm' and one called 'tss' with a sample including |
|
304 |
-#' ## Transcription Start Site annotations, it searches for those regions of hm |
|
305 |
-#' ## that are at a minimal distance from a transcription start site (TSS) |
|
306 |
-#' ## and takes the first/closest one for each TSS, provided that such distance |
|
307 |
-#' ## is greater than 120K bases and joined 'tss' and 'hm' samples are obtained |
|
308 |
-#' ## from the same provider (joinby clause). |
|
297 |
+#' # Given a dataset 'hm' and one called 'tss' with a sample including |
|
298 |
+#' # Transcription Start Site annotations, it searches for those regions of hm |
|
299 |
+#' # that are at a minimal distance from a transcription start site (TSS) |
|
300 |
+#' # and takes the first/closest one for each TSS, provided that such distance |
|
301 |
+#' # is greater than 120K bases and joined 'tss' and 'hm' samples are obtained |
|
302 |
+#' # from the same provider (joinby clause). |
|
309 | 303 |
#' |
310 | 304 |
#' init_gmql() |
311 | 305 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
... | ... |
@@ -352,12 +346,12 @@ as.character.UP <- function(obj) { |
352 | 346 |
#' @examples |
353 | 347 |
#' |
354 | 348 |
#' |
355 |
-#' ### Given a dataset 'hm' and one called 'tss' with a sample including |
|
356 |
-#' ## Transcription Start Site annotations, it searches for those regions of hm |
|
357 |
-#' ## that are at a minimal distance from a transcription start site (TSS) |
|
358 |
-#' ## and takes the first/closest one for each TSS, provided that such distance |
|
359 |
-#' ## is greater than 12K bases and joined 'tss' and 'hm' samples are obtained |
|
360 |
-#' ## from the same provider (joinby clause). |
|
349 |
+#' # Given a dataset 'hm' and one called 'tss' with a sample including |
|
350 |
+#' # Transcription Start Site annotations, it searches for those regions of hm |
|
351 |
+#' # that are at a minimal distance from a transcription start site (TSS) |
|
352 |
+#' # and takes the first/closest one for each TSS, provided that such distance |
|
353 |
+#' # is greater than 12K bases and joined 'tss' and 'hm' samples are obtained |
|
354 |
+#' # from the same provider (joinby clause). |
|
361 | 355 |
#' |
362 | 356 |
#' init_gmql() |
363 | 357 |
#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
... | ... |
@@ -365,8 +359,8 @@ as.character.UP <- function(obj) { |
365 | 359 |
#' TSS = read_dataset(test_path) |
366 | 360 |
#' HM = read_dataset(test_path2) |
367 | 361 |
#' join_data = join(TSS, HM, |
368 |
-#' genometric_predicate = list(list(MD(1), DGE(12000), DOWN())), c("provider"), |
|
369 |
-#' region_output = "RIGHT") |
|
362 |
+#' genometric_predicate = list(list(MD(1), DGE(12000), DOWN())), |
|
363 |
+#' c("provider"), region_output = "RIGHT") |
|
370 | 364 |
#' |
371 | 365 |
#' |
372 | 366 |
#' @export |
... | ... |
@@ -23,12 +23,12 @@ region is less than, or equal to, 'value' bases. |
23 | 23 |
} |
24 | 24 |
\examples{ |
25 | 25 |
|
26 |
-### Given a dataset HM and one called TSS with a sample including |
|
27 |
-## Transcription Start Site annotations, it searches for those regions of hm |
|
28 |
-## that are at a minimal distance from a transcription start site 'TSS' |
|
29 |
-## and takes the first/closest one for each TSS, provided that such distance |
|
30 |
-## is greater than 12 bases and joined TSS and HM samples are obtained |
|
31 |
-## from the same provider 'joinby clause'. |
|
26 |
+## Given a dataset HM and one called TSS with a sample including |
|
27 |
+# Transcription Start Site annotations, it searches for those regions of hm |
|
28 |
+# that are at a minimal distance from a transcription start site 'TSS' |
|
29 |
+# and takes the first/closest one for each TSS, provided that such distance |
|
30 |
+# is greater than 12 bases and joined TSS and HM samples are obtained |
|
31 |
+# from the same provider 'joinby clause'. |
|
32 | 32 |
|
33 | 33 |
init_gmql() |
34 | 34 |
test_path <- system.file("example", "DATASET", package = "RGMQL") |
... | ... |
@@ -24,12 +24,12 @@ or equal to, 'value' bases. |
24 | 24 |
} |
25 | 25 |
\examples{ |
26 | 26 |
|
27 |
-## Given a dataset 'hm' and one called 'tss' with a sample including |
|
28 |
-## Transcription Start Site annotations, it searches for those regions of hm |
|
29 |
-## that are at a minimal distance from a transcription start site (TSS) |
|
30 |
-## and takes the first/closest one for each TSS, provided that such distance |
|
31 |
-## is greater than 120K bases and joined 'tss' and 'hm' samples are obtained |
|
32 |
-## from the same provider (joinby clause). |
|
27 |
+# Given a dataset 'hm' and one called 'tss' with a sample including |
|
28 |
+# Transcription Start Site annotations, it searches for those regions of hm |
|
29 |
+# that are at a minimal distance from a transcription start site (TSS) |
|
30 |
+# and takes the first/closest one for each TSS, provided that such distance |
|
31 |
+# is greater than 120K bases and joined 'tss' and 'hm' samples are obtained |
|
32 |
+# from the same provider (joinby clause). |
|
33 | 33 |
|
34 | 34 |
init_gmql() |
35 | 35 |
test_path <- system.file("example", "DATASET", package = "RGMQL") |
... | ... |
@@ -24,12 +24,12 @@ that their distance from the anchor region is less than, or equal to, |
24 | 24 |
} |
25 | 25 |
\examples{ |
26 | 26 |
|
27 |
-### Given a dataset HM and one called TSS with a sample including |
|
28 |
-## Transcription Start Site annotations, it searches for those regions of hm |
|
29 |
-## that are at a minimal distance from a transcription start site (TSS) |
|
30 |
-## and takes the first/closest one for each TSS, |
|
31 |
-## provided that such distance is lesser than 1200 bases and joined TSS |
|
32 |
-## and HM samples are obtained from the same provider (joinby clause). |
|
27 |
+## Given a dataset HM and one called TSS with a sample including |
|
28 |
+# Transcription Start Site annotations, it searches for those regions of hm |
|
29 |
+# that are at a minimal distance from a transcription start site (TSS) |
|
30 |
+# and takes the first/closest one for each TSS, |
|
31 |
+# provided that such distance is lesser than 1200 bases and joined TSS |
|
32 |
+# and HM samples are obtained from the same provider (joinby clause). |
|
33 | 33 |
|
34 | 34 |
init_gmql() |
35 | 35 |
test_path <- system.file("example","DATASET",package = "RGMQL") |
... | ... |
@@ -40,7 +40,6 @@ join_data = join(TSS, HM, |
40 | 40 |
genometric_predicate = list(list(MD(1), DL(1200))), c("provider"), |
41 | 41 |
region_output = "RIGHT") |
42 | 42 |
|
43 |
- |
|
44 | 43 |
} |
45 | 44 |
\seealso{ |
46 | 45 |
\code{\link{DGE}} \code{\link{DLE}} \code{\link{DG}} |
... | ... |
@@ -28,12 +28,12 @@ the anchor region |
28 | 28 |
} |
29 | 29 |
\examples{ |
30 | 30 |
|
31 |
-### Given a dataset HM and one called TSS with a sample including |
|
32 |
-## Transcription Start Site annotations, it searches for those regions of hm |
|
33 |
-## that are at a minimal distance from a transcription start site (TSS) |
|
34 |
-## and takes the first/closest one for each TSS, provided that such distance |
|
35 |
-## is lesser than 120K bases and joined TSS and HM samples are obtained |
|
36 |
-## from the same provider (joinby clause). |
|
31 |
+## Given a dataset HM and one called TSS with a sample including |
|
32 |
+# Transcription Start Site annotations, it searches for those regions of hm |
|
33 |
+# that are at a minimal distance from a transcription start site (TSS) |
|
34 |
+# and takes the first/closest one for each TSS, provided that such distance |
|
35 |
+# is lesser than 120K bases and joined TSS and HM samples are obtained |
|
36 |
+# from the same provider (joinby clause). |
|
37 | 37 |
|
38 | 38 |
init_gmql() |
39 | 39 |
test_path <- system.file("example", "DATASET", package = "RGMQL") |
... | ... |
@@ -24,12 +24,12 @@ directions of the genome. |
24 | 24 |
\examples{ |
25 | 25 |
|
26 | 26 |
|
27 |
-### Given a dataset 'hm' and one called 'tss' with a sample including |
|
28 |
-## Transcription Start Site annotations, it searches for those regions of hm |
|
29 |
-## that are at a minimal distance from a transcription start site (TSS) |
|
30 |
-## and takes the first/closest one for each TSS, provided that such distance |
|
31 |
-## is greater than 12K bases and joined 'tss' and 'hm' samples are obtained |
|
32 |
-## from the same provider (joinby clause). |
|
27 |
+# Given a dataset 'hm' and one called 'tss' with a sample including |
|
28 |
+# Transcription Start Site annotations, it searches for those regions of hm |
|
29 |
+# that are at a minimal distance from a transcription start site (TSS) |
|
30 |
+# and takes the first/closest one for each TSS, provided that such distance |
|
31 |
+# is greater than 12K bases and joined 'tss' and 'hm' samples are obtained |
|
32 |
+# from the same provider (joinby clause). |
|
33 | 33 |
|
34 | 34 |
init_gmql() |
35 | 35 |
test_path <- system.file("example", "DATASET", package = "RGMQL") |
... | ... |
@@ -37,8 +37,8 @@ test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
37 | 37 |
TSS = read_dataset(test_path) |
38 | 38 |
HM = read_dataset(test_path2) |
39 | 39 |
join_data = join(TSS, HM, |
40 |
-genometric_predicate = list(list(MD(1), DGE(12000), DOWN())), c("provider"), |
|
41 |
-region_output = "RIGHT") |
|
40 |
+genometric_predicate = list(list(MD(1), DGE(12000), DOWN())), |
|
41 |
+c("provider"), region_output = "RIGHT") |
|
42 | 42 |
|
43 | 43 |
|
44 | 44 |
} |
... | ... |
@@ -28,8 +28,8 @@ r = read_dataset(test_path) |
28 | 28 |
|
29 | 29 |
## select with condition |
30 | 30 |
## the first and the third attribute are DEF the second one is EXACT |
31 |
-s = select(r, semi_join = list("cell_type", EXACT("cell"), "attribute_tag"), |
|
32 |
-semi_join_dataset = r) |
|
31 |
+s = select(r, semi_join = list("cell_type", EXACT("cell"), |
|
32 |
+"attribute_tag"), semi_join_dataset = r) |
|
33 | 33 |
|
34 | 34 |
\dontrun{ |
35 | 35 |
|
... | ... |
@@ -27,12 +27,12 @@ if they exceed the 'value' limit. |
27 | 27 |
\examples{ |
28 | 28 |
|
29 | 29 |
|
30 |
-### Given a dataset 'hm' and one called 'tss' with a sample including |
|
31 |
-## Transcription Start Site annotations, it searches for those regions of hm |
|
32 |
-## that are at a minimal distance from a transcription start site (TSS) |
|
33 |
-## and takes the first/closest one for each TSS, |
|
34 |
-## provided that such distance is greater than 120K bases and joined 'tss' |
|
35 |
-## and 'hm' samples are obtained from the same provider (joinby clause). |
|
30 |
+# Given a dataset 'hm' and one called 'tss' with a sample including |
|
31 |
+# Transcription Start Site annotations, it searches for those regions of hm |
|
32 |
+# that are at a minimal distance from a transcription start site (TSS) |
|
33 |
+# and takes the first/closest one for each TSS, |
|
34 |
+# provided that such distance is greater than 120K bases and joined 'tss' |
|
35 |
+# and 'hm' samples are obtained from the same provider (joinby clause). |
|
36 | 36 |
|
37 | 37 |
init_gmql() |
38 | 38 |
test_path <- system.file("example", "DATASET", package = "RGMQL") |
... | ... |
@@ -24,12 +24,12 @@ the directions of the genome. |
24 | 24 |
\examples{ |
25 | 25 |
|
26 | 26 |
|
27 |
-### Given a dataset 'hm' and one called 'tss' with a sample including |
|
28 |
-## Transcription Start Site annotations, it searches for those regions of hm |
|
29 |
-## that are at a minimal distance from a transcription start site (TSS) |
|
30 |
-## and takes the first/closest one for each TSS, provided that such distance |
|
31 |
-## is greater than 120K bases and joined 'tss' and 'hm' samples are obtained |
|
32 |
-## from the same provider (joinby clause). |
|
27 |
+# Given a dataset 'hm' and one called 'tss' with a sample including |
|
28 |
+# Transcription Start Site annotations, it searches for those regions of hm |
|
29 |
+# that are at a minimal distance from a transcription start site (TSS) |
|
30 |
+# and takes the first/closest one for each TSS, provided that such distance |
|
31 |
+# is greater than 120K bases and joined 'tss' and 'hm' samples are obtained |
|
32 |
+# from the same provider (joinby clause). |
|
33 | 33 |
|
34 | 34 |
init_gmql() |
35 | 35 |
test_path <- system.file("example", "DATASET", package = "RGMQL") |
... | ... |
@@ -53,9 +53,10 @@ evaluation: the two attributes match if both end with value.} |
53 | 53 |
\item{aggregates}{list of element in the form \emph{key} = \emph{aggregate}. |
54 | 54 |
The \emph{aggregate} is an object of class AGGREGATES |
55 | 55 |
The aggregate functions available are: \code{\link{SUM}}, |
56 |
-\code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, \code{\link{AVG}}, |
|
57 |
-\code{\link{MEDIAN}}, \code{\link{STD}}, \code{\link{BAG}}, |
|
58 |
-\code{\link{BAGD}}, \code{\link{Q1}}, \code{\link{Q2}}, \code{\link{Q3}}. |
|
56 |
+\code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, |
|
57 |
+\code{\link{AVG}}, \code{\link{MEDIAN}}, \code{\link{STD}}, |
|
58 |
+\code{\link{BAG}}, \code{\link{BAGD}}, \code{\link{Q1}}, |
|
59 |
+\code{\link{Q2}}, \code{\link{Q3}}. |
|
59 | 60 |
Every aggregate accepts a string value, execet for COUNT, which does not |
60 | 61 |
have any value. |
61 | 62 |
Argument of 'aggregate function' must exist in schema, i.e. among region |
... | ... |
@@ -75,8 +76,8 @@ It takes as input a dataset containing one or more samples and returns |
75 | 76 |
another dataset (with a single sample, if no \emph{groupby} option is |
76 | 77 |
specified) by “collapsing” the input dataset samples and their regions |
77 | 78 |
according to certain rules specified by the input parameters. |
78 |
-The attributes of the output genomic regions are only the region coordinates, |
|
79 |
-and Jaccard indexes (JaccardIntersect and JaccardResult). |
|
79 |
+The attributes of the output genomic regions are only the region |
|
80 |
+coordinates, and Jaccard indexes (JaccardIntersect and JaccardResult). |
|
80 | 81 |
Jaccard Indexes are standard measures of similarity of the contributing |
81 | 82 |
regions, added as default region attributes. |
82 | 83 |
The JaccardIntersect index is calculated as the ratio between the lengths |
... | ... |
@@ -42,9 +42,9 @@ for the subsequent GMQL function |
42 | 42 |
} |
43 | 43 |
\description{ |
44 | 44 |
It produces one sample in the result for each sample of the left operand, |
45 |
-by keeping the same metadata of the left input sample and only those regions |
|
46 |
-(with their schema and values) of the left input sample which do not |
|
47 |
-intersect with any region in the right operand sample. |
|
45 |
+by keeping the same metadata of the left input sample and only those |
|
46 |
+regions (with their schema and values) of the left input sample which |
|
47 |
+do not intersect with any region in the right operand sample. |
|
48 | 48 |
The optional \emph{joinby} clause is used to extract a subset of couples |
49 | 49 |
from the cartesian product of two dataset |
50 | 50 |
\emph{left_input_data} x \emph{right_input_data} on which to apply |
... | ... |
@@ -12,9 +12,10 @@ extend(input_data, metadata = NULL) |
12 | 12 |
\item{metadata}{list of element in the form \emph{key} = \emph{aggregate}. |
13 | 13 |
The \emph{aggregate} is an object of class AGGREGATES |
14 | 14 |
The aggregate functions available are: \code{\link{SUM}}, |
15 |
-\code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, \code{\link{AVG}}, |
|
16 |
-\code{\link{MEDIAN}}, \code{\link{STD}}, \code{\link{BAG}}, |
|
17 |
-\code{\link{BAGD}}, \code{\link{Q1}}, \code{\link{Q2}}, \code{\link{Q3}}. |
|
15 |
+\code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, |
|
16 |
+\code{\link{AVG}}, \code{\link{MEDIAN}}, \code{\link{STD}}, |
|
17 |
+\code{\link{BAG}}, \code{\link{BAGD}}, \code{\link{Q1}}, |
|
18 |
+\code{\link{Q2}}, \code{\link{Q3}}. |
|
18 | 19 |
Every aggregate accepts a string value, execet for COUNT, which does not |
19 | 20 |
have any value. |
20 | 21 |
Argument of 'aggregate function' must exist in schema, i.e. among region |
... | ... |
@@ -51,9 +51,10 @@ evaluation: the two attributes match if both end with value.} |
51 | 51 |
\item{aggregates}{list of element in the form \emph{key} = \emph{aggregate}. |
52 | 52 |
The \emph{aggregate} is an object of class AGGREGATES |
53 | 53 |
The aggregate functions available are: \code{\link{SUM}}, |
54 |
-\code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, \code{\link{AVG}}, |
|
55 |
-\code{\link{MEDIAN}}, \code{\link{STD}}, \code{\link{BAG}}, |
|
56 |
-\code{\link{BAGD}}, \code{\link{Q1}}, \code{\link{Q2}}, \code{\link{Q3}}. |
|
54 |
+\code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, |
|
55 |
+\code{\link{AVG}}, \code{\link{MEDIAN}}, \code{\link{STD}}, |
|
56 |
+\code{\link{BAG}}, \code{\link{BAGD}}, \code{\link{Q1}}, |
|
57 |
+\code{\link{Q2}}, \code{\link{Q3}}. |
|
57 | 58 |
Every aggregate accepts a string value, execet for COUNT, which does not |
58 | 59 |
have any value. |
59 | 60 |
Argument of 'aggregate function' must exist in schema, i.e. among region |
... | ... |
@@ -53,9 +53,10 @@ evaluation: the two attributes match if both end with value.} |
53 | 53 |
\item{aggregates}{list of element in the form \emph{key} = \emph{aggregate}. |
54 | 54 |
The \emph{aggregate} is an object of class AGGREGATES |
55 | 55 |
The aggregate functions available are: \code{\link{SUM}}, |
56 |
-\code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, \code{\link{AVG}}, |
|
57 |
-\code{\link{MEDIAN}}, \code{\link{STD}}, \code{\link{BAG}}, |
|
58 |
-\code{\link{BAGD}}, \code{\link{Q1}}, \code{\link{Q2}}, \code{\link{Q3}}. |
|
56 |
+\code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, |
|
57 |
+\code{\link{AVG}}, \code{\link{MEDIAN}}, \code{\link{STD}}, |
|
58 |
+\code{\link{BAG}}, \code{\link{BAGD}}, \code{\link{Q1}}, |
|
59 |
+\code{\link{Q2}}, \code{\link{Q3}}. |
|
59 | 60 |
Every aggregate accepts a string value, execet for COUNT, which does not |
60 | 61 |
have any value. |
61 | 62 |
Argument of 'aggregate function' must exist in schema, i.e. among region |
... | ... |
@@ -49,8 +49,8 @@ no output is produced} |
49 | 49 |
\item{contig: outputs the concatenation between the left_input_data and |
50 | 50 |
right_input_data regions that satisfy the genometric predicate, |
51 | 51 |
(i.e. the output regionis defined as having left (right) coordinates |
52 |
-equal to the minimum (maximum) of the corresponding coordinate values in the |
|
53 |
-left_input_data and right_input_data regions satisfying |
|
52 |
+equal to the minimum (maximum) of the corresponding coordinate values |
|
53 |
+in the left_input_data and right_input_data regions satisfying |
|
54 | 54 |
the genometric predicate)} |
55 | 55 |
}} |
56 | 56 |
} |
... | ... |
@@ -71,12 +71,12 @@ with their attribute names prefixed with left or right respectively. |
71 | 71 |
} |
72 | 72 |
\examples{ |
73 | 73 |
|
74 |
-## Given a dataset 'hm' and one called 'tss' with a sample including |
|
75 |
-## Transcription Start Site annotations, it searches for those regions of hm |
|
76 |
-## that are at a minimal distance from a transcription start site (TSS) |
|
77 |
-## and takes the first/closest one for each TSS, provided that such distance |
|
78 |
-## is lesser than 120K bases and joined 'tss' and 'hm' samples are obtained |
|
79 |
-## from the same provider (joinby clause). |
|
74 |
+# Given a dataset 'hm' and one called 'tss' with a sample including |
|
75 |
+# Transcription Start Site annotations, it searches for those regions of hm |
|
76 |
+# that are at a minimal distance from a transcription start site (TSS) |
|
77 |
+# and takes the first/closest one for each TSS, provided that such distance |
|
78 |
+# is lesser than 120K bases and joined 'tss' and 'hm' samples are obtained |
|
79 |
+# from the same provider (joinby clause). |
|
80 | 80 |
|
81 | 81 |
init_gmql() |
82 | 82 |
test_path <- system.file("example", "DATASET", package = "RGMQL") |
... | ... |
@@ -14,9 +14,10 @@ map(left_input_data, right_input_data, aggregates = NULL, joinBy = NULL) |
14 | 14 |
\item{aggregates}{list of element in the form \emph{key} = \emph{aggregate}. |
15 | 15 |
The \emph{aggregate} is an object of class AGGREGATES |
16 | 16 |
The aggregate functions available are: \code{\link{SUM}}, |
17 |
-\code{\link{COUNT}},\code{\link{MIN}}, \code{\link{MAX}}, \code{\link{AVG}}, |
|
18 |
-\code{\link{STD}}, \code{\link{MEDIAN}},\code{\link{BAG}}, |
|
19 |
-\code{\link{BAGD}}, \code{\link{Q1}}, \code{\link{Q2}}, \code{\link{Q3}}. |
|
17 |
+\code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, |
|
18 |
+\code{\link{AVG}}, \code{\link{MEDIAN}}, \code{\link{STD}}, |
|
19 |
+\code{\link{BAG}}, \code{\link{BAGD}}, \code{\link{Q1}}, |
|
20 |
+\code{\link{Q2}}, \code{\link{Q3}}. |
|
20 | 21 |
Every aggregate accepts a string value, execet for COUNT |
21 | 22 |
Argument of 'aggregate' must exist in schema |
22 | 23 |
Two style are allowed: |
... | ... |
@@ -54,8 +55,8 @@ for each region of each sample in the left dataset; |
54 | 55 |
The number of generated output samples is the Cartesian product |
55 | 56 |
of the samples in the two input datasets; |
56 | 57 |
each output sample has the same regions as the related input left sample, |
57 |
-with their attributes and values, plus the attributes computed as aggregates |
|
58 |
-over right region values. |
|
58 |
+with their attributes and values, plus the attributes computed as |
|
59 |
+aggregates over right region values. |
|
59 | 60 |
Output sample metadata are the union of the related input sample metadata, |
60 | 61 |
whose attribute names are prefixed with "left" or "right" respectively. |
61 | 62 |
} |
... | ... |
@@ -69,21 +70,22 @@ present with equal values in both M1 and M2 |
69 | 70 |
} |
70 | 71 |
\examples{ |
71 | 72 |
|
72 |
-## It counts the number of regions in each sample from exp that overlap with |
|
73 |
-## a ref region, and for each ref region it computes the minimum score |
|
74 |
-## of all the regions in each exp sample that overlap with it. |
|
75 |
-## The MAP joinby option ensures that only the exp samples referring to |
|
76 |
-## the same 'cell_tissue' of a ref sample are mapped on such ref sample; |
|
77 |
-## exp samples with no cell_tissue metadata attribute, or with such metadata |
|
78 |
-## but with a different value from the one(s) of ref sample(s), |
|
79 |
-## are disregarded. |
|
73 |
+# It counts the number of regions in each sample from exp that overlap with |
|
74 |
+# a ref region, and for each ref region it computes the minimum score |
|
75 |
+# of all the regions in each exp sample that overlap with it. |
|
76 |
+# The MAP joinby option ensures that only the exp samples referring to |
|
77 |
+# the same 'cell_tissue' of a ref sample are mapped on such ref sample; |
|
78 |
+# exp samples with no cell_tissue metadata attribute, or with such metadata |
|
79 |
+# but with a different value from the one(s) of ref sample(s), |
|
80 |
+# are disregarded. |
|
80 | 81 |
|
81 | 82 |
init_gmql() |
82 | 83 |
test_path <- system.file("example", "DATASET", package = "RGMQL") |
83 | 84 |
test_path2 <- system.file("example", "DATASET_GDM", package = "RGMQL") |
84 | 85 |
exp = read_dataset(test_path) |
85 | 86 |
ref = read_dataset(test_path2) |
86 |
-out = map(ref,exp, list(minScore = MIN("score")), joinBy = c("cell_tissue")) |
|
87 |
+out = map(ref,exp, list(minScore = MIN("score")), |
|
88 |
+joinBy = c("cell_tissue")) |
|
87 | 89 |
|
88 | 90 |
|
89 | 91 |
} |
... | ... |
@@ -44,10 +44,10 @@ are disregarded. |
44 | 44 |
} |
45 | 45 |
\examples{ |
46 | 46 |
|
47 |
-## It creates a dataset called merged which contains one sample for each |
|
48 |
-## antibody_target value found within the metadata of the exp dataset sample; |
|
49 |
-## each created sample contains all regions from all 'exp' samples |
|
50 |
-## with a specific value for their antibody_target metadata attribute. |
|
47 |
+# It creates a dataset called merged which contains one sample for each |
|
48 |
+# antibody_target value found within the metadata of the exp dataset sample; |
|
49 |
+# each created sample contains all regions from all 'exp' samples |
|
50 |
+# with a specific value for their antibody_target metadata attribute. |
|
51 | 51 |
|
52 | 52 |
init_gmql() |
53 | 53 |
test_path <- system.file("example","DATASET",package = "RGMQL") |
... | ... |
@@ -18,8 +18,8 @@ and base url; service name is added automatically} |
18 | 18 |
list of metadata in the form 'key = value' |
19 | 19 |
} |
20 | 20 |
\description{ |
21 |
-It retrieves metadata for a specific sample in dataset using the proper GMQL |
|
22 |
-web service available on a remote server |
|
21 |
+It retrieves metadata for a specific sample in dataset using the proper |
|
22 |
+GMQL web service available on a remote server |
|
23 | 23 |
} |
24 | 24 |
\details{ |
25 | 25 |
If error occures a specific error is printed |
... | ... |
@@ -20,7 +20,8 @@ Granges data containing regions of sample |
20 | 20 |
\description{ |
21 | 21 |
It retrieves regions for a specific sample |
22 | 22 |
(whose name is specified in the paramter "sampleName") |
23 |
-in a specific dataset (whose name is specified in the paramter "datasetName") |
|
23 |
+in a specific dataset |
|
24 |
+(whose name is specified in the parameter "datasetName") |
|
24 | 25 |
using the proper GMQL web service available on a remote server |
25 | 26 |
} |
26 | 27 |
\details{ |
... | ... |
@@ -53,9 +53,10 @@ evaluation: the two attributes match if both end with value.} |
53 | 53 |
\item{aggregates}{list of element in the form \emph{key} = \emph{aggregate}. |
54 | 54 |
The \emph{aggregate} is an object of class AGGREGATES |
55 | 55 |
The aggregate functions available are: \code{\link{SUM}}, |
56 |
-\code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, \code{\link{AVG}}, |
|
57 |
-\code{\link{MEDIAN}}, \code{\link{STD}}, \code{\link{BAG}}, |
|
58 |
-\code{\link{BAGD}}, \code{\link{Q1}}, \code{\link{Q2}}, \code{\link{Q3}}. |
|
56 |
+\code{\link{COUNT}}, \code{\link{MIN}}, \code{\link{MAX}}, |
|
57 |
+\code{\link{AVG}}, \code{\link{MEDIAN}}, \code{\link{STD}}, |
|
58 |
+\code{\link{BAG}}, \code{\link{BAGD}}, \code{\link{Q1}}, |
|
59 |
+\code{\link{Q2}}, \code{\link{Q3}}. |
|
59 | 60 |
Every aggregate accepts a string value, execet for COUNT, which does not |
60 | 61 |
have any value. |
61 | 62 |
Argument of 'aggregate function' must exist in schema, i.e. among region |