... | ... |
@@ -8,14 +8,11 @@ export(BAG) |
8 | 8 |
export(BAGD) |
9 | 9 |
export(COUNT) |
10 | 10 |
export(DESC) |
11 |
-export(DF) |
|
12 | 11 |
export(DG) |
13 | 12 |
export(DGE) |
14 | 13 |
export(DL) |
15 | 14 |
export(DLE) |
16 | 15 |
export(DOWN) |
17 |
-export(EX) |
|
18 |
-export(FN) |
|
19 | 16 |
export(MAX) |
20 | 17 |
export(MD) |
21 | 18 |
export(MEDIAN) |
... | ... |
@@ -31,6 +28,7 @@ export(SUM) |
31 | 28 |
export(UP) |
32 | 29 |
export(compile_query) |
33 | 30 |
export(compile_query_fromfile) |
31 |
+export(condition_evaluation) |
|
34 | 32 |
export(delete_dataset) |
35 | 33 |
export(download_as_GRangesList) |
36 | 34 |
export(download_dataset) |
... | ... |
@@ -64,8 +64,19 @@ |
64 | 64 |
# meta join condition |
65 | 65 |
.join_condition <- function(cond) |
66 | 66 |
{ |
67 |
- join_condition_matrix <- do.call(rbind, cond) |
|
68 |
- join_condition_matrix |
|
67 |
+ cond_matrix <- NULL |
|
68 |
+ def <- cond$def |
|
69 |
+ if(!is.null(def)) |
|
70 |
+ cond_matrix <- rbind(cond_matrix, def) |
|
71 |
+ |
|
72 |
+ exact <- cond$exact |
|
73 |
+ if(!is.null(exact)) |
|
74 |
+ cond_matrix <- rbind(cond_matrix, exact) |
|
75 |
+ |
|
76 |
+ full <- cond$full |
|
77 |
+ if(!is.null(full)) |
|
78 |
+ cond_matrix <- rbind(cond_matrix, full) |
|
79 |
+ cond_matrix |
|
69 | 80 |
} |
70 | 81 |
|
71 | 82 |
.check_input <- function(value) |
... | ... |
@@ -8,86 +8,57 @@ |
8 | 8 |
#' They create a 2-D array made up by two coloumn: |
9 | 9 |
#' type of condition evaluation and the metadata attribute name |
10 | 10 |
#' |
11 |
-#' \itemize{ |
|
12 |
-#' \item{FN: It defines a FULL (FULLNAME) evaluation of the input values. |
|
11 |
+#' |
|
12 |
+#' @param default series of string identifying a name of metadata attribute |
|
13 |
+#' to be evaluated. |
|
14 |
+#' It defines a DEFAULT evaluation of the input values. |
|
15 |
+#' DEFAULT evaluation: the two attributes match if both end with value. |
|
16 |
+#' |
|
17 |
+#' @param full series of string identifying a name of metadata attribute |
|
18 |
+#' to be evaluated. |
|
19 |
+#' It defines a FULL (FULLNAME) evaluation of the input values. |
|
13 | 20 |
#' FULL evaluation: two attributes match if they both end with value and, |
14 |
-#' if they have further prefixes, the two prefix sequences are identical} |
|
15 |
-#' \item{EX: It defines a EXACT evaluation of the input values. |
|
21 |
+#' if they have further prefixes, the two prefix sequences are identical. |
|
22 |
+#' |
|
23 |
+#' @param exact series of string identifying a name of metadata attribute |
|
24 |
+#' to be evaluated. |
|
25 |
+#' It defines a EXACT evaluation of the input values. |
|
16 | 26 |
#' EXACT evaluation: only attributes exactly as value match; |
17 |
-#' no further prefixes are allowed. } |
|
18 |
-#' \item{DF: It defines a DEFAULT evaluation of the input values. |
|
19 |
-#' DEFAULT evaluation: the two attributes match if both end with value.} |
|
20 |
-#' } |
|
21 |
-#' |
|
22 |
-#' @param ... series of string identifying a name of metadata attribute |
|
23 |
-#' to be evaluated |
|
27 |
+#' no further prefixes are allowed. |
|
24 | 28 |
#' |
25 |
-#' @return 2-D array containing method of evaluation and metadata |
|
29 |
+#' @return list of 2-D array containing method of evaluation and metadata |
|
26 | 30 |
#' |
27 | 31 |
#' @examples |
28 | 32 |
#' |
29 | 33 |
#' "where is my example?" |
30 | 34 |
#' |
31 | 35 |
#' @name Evaluation-Function |
32 |
-#' @aliases FN |
|
36 |
+#' @aliases condition_evaluation |
|
33 | 37 |
#' @rdname condition_eval_func |
34 | 38 |
#' @export |
35 |
-FN <- function(...) |
|
39 |
+condition_evaluation <- function(default = c(""), full = c(""), exact = c("")) |
|
36 | 40 |
{ |
37 |
- conds <- c(...) |
|
38 |
- conds = conds[!conds %in% ""] |
|
39 |
- conds = conds[!duplicated(conds)] |
|
40 |
- if(length(conds)<=0) |
|
41 |
- join_condition_matrix <- .jnull("java/lang/String") |
|
42 |
- else |
|
43 |
- { |
|
44 |
- join_condition_matrix <- t(sapply(conds, function(x) { |
|
45 |
- new_value = c("FULL",x) |
|
46 |
- matrix <- matrix(new_value) |
|
47 |
- })) |
|
48 |
- } |
|
49 |
- join_condition_matrix |
|
41 |
+ df <- .condition("DEF",default) |
|
42 |
+ fn <- .condition("FULL",full) |
|
43 |
+ ex <- .condition("EXACT",exact) |
|
44 |
+ list("def" = df, "full" = fn, "exact" = ex) |
|
50 | 45 |
} |
51 | 46 |
|
52 |
-#' @name Evaluation-Function |
|
53 |
-#' @aliases EX |
|
54 |
-#' @rdname condition_eval_func |
|
55 |
-#' @export |
|
56 |
-EX <- function(...) |
|
47 |
+.condition <- function(cond, array) |
|
57 | 48 |
{ |
58 |
- conds <- c(...) |
|
59 |
- conds = conds[!conds %in% ""] |
|
60 |
- conds = conds[!duplicated(conds)] |
|
61 |
- if(length(conds)<=0) |
|
62 |
- join_condition_matrix <- .jnull("java/lang/String") |
|
49 |
+ array = array[!array %in% ""] |
|
50 |
+ array = array[!duplicated(array)] |
|
51 |
+ |
|
52 |
+ if(!length(array)) |
|
53 |
+ join_condition_matrix <- NULL |
|
63 | 54 |
else |
64 | 55 |
{ |
65 |
- join_condition_matrix <- t(sapply(conds, function(x) { |
|
66 |
- new_value = c("EXACT",x) |
|
56 |
+ join_condition_matrix <- t(sapply(array, function(x) { |
|
57 |
+ new_value = c(cond, x) |
|
67 | 58 |
matrix <- matrix(new_value) |
68 | 59 |
})) |
69 | 60 |
} |
70 | 61 |
join_condition_matrix |
71 | 62 |
} |
72 | 63 |
|
73 |
-#' @name Evaluation-Function |
|
74 |
-#' @aliases DF |
|
75 |
-#' @rdname condition_eval_func |
|
76 |
-#' @export |
|
77 |
-DF <- function(...) |
|
78 |
-{ |
|
79 |
- conds <- c(...) |
|
80 |
- conds = conds[!conds %in% ""] |
|
81 |
- conds = conds[!duplicated(conds)] |
|
82 |
- if(length(conds)<=0) |
|
83 |
- join_condition_matrix <- .jnull("java/lang/String") |
|
84 |
- else |
|
85 |
- { |
|
86 |
- join_condition_matrix <- t(sapply(conds, function(x) { |
|
87 |
- new_value = c("DEF",x) |
|
88 |
- matrix <- matrix(new_value) |
|
89 |
- })) |
|
90 |
- } |
|
91 |
- join_condition_matrix |
|
92 |
-} |
|
93 | 64 |
|
... | ... |
@@ -64,9 +64,11 @@ filter_and_extract <- function(data, metadata = NULL, |
64 | 64 |
suffix = "antibody_target") |
65 | 65 |
{ |
66 | 66 |
if(is(data,"GRangesList")) |
67 |
- .extract_from_GRangesList(data, metadata, metadata_prefix, regions) |
|
67 |
+ .extract_from_GRangesList(data, metadata, metadata_prefix, regions, |
|
68 |
+ suffix) |
|
68 | 69 |
else |
69 |
- .extract_from_dataset(data, metadata, metadata_prefix, regions, suffix) |
|
70 |
+ .extract_from_dataset(data, metadata, metadata_prefix, regions, |
|
71 |
+ suffix) |
|
70 | 72 |
} |
71 | 73 |
|
72 | 74 |
.extract_from_dataset <- function(datasetName, metadata, metadata_prefix, |
... | ... |
@@ -130,7 +132,7 @@ filter_and_extract <- function(data, metadata = NULL, |
130 | 132 |
} |
131 | 133 |
|
132 | 134 |
.extract_from_GRangesList <- function(rangesList, metadata, metadata_prefix, |
133 |
- regions) |
|
135 |
+ regions, suffix) |
|
134 | 136 |
{ |
135 | 137 |
if(!is(rangesList,"GRangesList")) |
136 | 138 |
stop("only GrangesList admitted") |
... | ... |
@@ -139,7 +141,8 @@ filter_and_extract <- function(data, metadata = NULL, |
139 | 141 |
stop("rangesList empty") |
140 | 142 |
|
141 | 143 |
meta_list <- metadata(rangesList) |
142 |
- samples <- .check_metadata_list(metadata, metadata_prefix,meta_list) |
|
144 |
+ samples <- .check_metadata_list(metadata, metadata_prefix, meta_list, |
|
145 |
+ suffix) |
|
143 | 146 |
if(length(unlist(samples))<=0) |
144 | 147 |
samples <- rangesList |
145 | 148 |
else |
... | ... |
@@ -166,7 +169,7 @@ filter_and_extract <- function(data, metadata = NULL, |
166 | 169 |
g1 |
167 | 170 |
} |
168 | 171 |
|
169 |
-.check_metadata_list <- function(metadata,metadata_prefix,meta_list) |
|
172 |
+.check_metadata_list <- function(metadata,metadata_prefix,meta_list,col_name) |
|
170 | 173 |
{ |
171 | 174 |
vec_meta <- paste0(metadata_prefix,metadata) |
172 | 175 |
list <- mapply(function(x,index){ |
... | ... |
@@ -48,16 +48,9 @@ |
48 | 48 |
#' \item{an expression built using PARAMETER object: (ALL() + N) / K or |
49 | 49 |
#' ALL() / K, with N and K integer values } |
50 | 50 |
#' } |
51 |
-#' @param groupBy list of evalation functions to define evaluation on metadata: |
|
52 |
-#' \itemize{ |
|
53 |
-#' \item{\code{\link{FN}}(value): Fullname evaluation, two attributes match |
|
54 |
-#' if they both end with \emph{value} and, if they have further prefixes, |
|
55 |
-#' the two prefix sequence are identical} |
|
56 |
-#' \item{\code{\link{EX}}(value): Exact evaluation, only attributes exactly |
|
57 |
-#' as \emph{value} match; no further prefixes are allowed. } |
|
58 |
-#' \item{\code{\link{DF}}(value): Default evaluation, the two attributes match |
|
59 |
-#' if both end with \emph{value}.} |
|
60 |
-#' } |
|
51 |
+#' @param groupBy \code{\link{condition_evaluation}} function to support |
|
52 |
+#' methods with groupBy or JoinBy input paramter |
|
53 |
+#' |
|
61 | 54 |
#' @param ... a series of expressions separated by comma in the form |
62 | 55 |
#' \emph{key} = \emph{aggregate}. The \emph{aggregate} is an object of |
63 | 56 |
#' class AGGREGATES. The aggregate functions available are: \code{\link{SUM}}, |
... | ... |
@@ -122,7 +115,7 @@ |
122 | 115 |
#' ## regions the minimum pvalue of the overlapping regions (min_pvalue) |
123 | 116 |
#' ## and their Jaccard indexes (JaccardIntersect and JaccardResult). |
124 | 117 |
#' |
125 |
-#' res = cover(exp, 2, 3, groupBy = list(DF("cell")), |
|
118 |
+#' res = cover(exp, 2, 3, groupBy = condition_evaluation(c("cell")), |
|
126 | 119 |
#' min_pValue = MIN("pvalue")) |
127 | 120 |
#' |
128 | 121 |
#' @name cover |
... | ... |
@@ -156,12 +149,15 @@ gmql_cover <- function(data, min_acc, max_acc, groupBy, aggregates, flag) |
156 | 149 |
if(!is.null(groupBy)) |
157 | 150 |
{ |
158 | 151 |
cond <- .join_condition(groupBy) |
159 |
- join_condition_matrix <- .jarray(cond, dispatch = TRUE) |
|
152 |
+ if(is.null(cond)) |
|
153 |
+ join_condition_matrix <- .jnull("java/lang/String") |
|
154 |
+ else |
|
155 |
+ join_condition_matrix <- .jarray(cond, dispatch = TRUE) |
|
160 | 156 |
} |
161 | 157 |
else |
162 | 158 |
join_condition_matrix <- .jnull("java/lang/String") |
163 | 159 |
|
164 |
- if(!is.null(aggregates) && !length(aggregates) == 0) |
|
160 |
+ if(!is.null(aggregates) && length(aggregates)) |
|
165 | 161 |
{ |
166 | 162 |
aggr <- .aggregates(aggregates,"AGGREGATES") |
167 | 163 |
metadata_matrix <- .jarray(aggr, dispatch = TRUE) |
... | ... |
@@ -24,16 +24,8 @@ |
24 | 24 |
#' \code{\link{DLE}}, \code{\link{DGE}}, \code{\link{DL}}, \code{\link{DG}}, |
25 | 25 |
#' \code{\link{MD}}, \code{\link{UP}}, \code{\link{DOWN}} |
26 | 26 |
#' |
27 |
-#' @param joinBy list of evalation functions to define evaluation on metadata: |
|
28 |
-#' \itemize{ |
|
29 |
-#' \item{ \code{\link{FN}}(value): Fullname evaluation, two attributes match |
|
30 |
-#' if they both end with \emph{value} and, if they have further prefixes, |
|
31 |
-#' the two prefix sequence are identical.} |
|
32 |
-#' \item{ \code{\link{EX}}(value): Exact evaluation, only attributes exactly |
|
33 |
-#' as \emph{value} match; no further prefixes are allowed.} |
|
34 |
-#' \item{ \code{\link{DF}}(value): Default evaluation, the two attributes match |
|
35 |
-#' if both end with \emph{value}.} |
|
36 |
-#' } |
|
27 |
+#' @param joinBy \code{\link{condition_evaluation}} function to support |
|
28 |
+#' methods with groupBy or JoinBy input paramter |
|
37 | 29 |
#' |
38 | 30 |
#' @param region_output single string that declares which region is given in |
39 | 31 |
#' output for each input pair of left dataset and right dataset regions |
... | ... |
@@ -93,7 +85,6 @@ setMethod("merge", c("GMQLDataset","GMQLDataset"), |
93 | 85 |
{ |
94 | 86 |
ptr_data_x <- x@value |
95 | 87 |
ptr_data_y <- y@value |
96 |
- joinBy = list(...) |
|
97 | 88 |
gmql_join(ptr_data_x, ptr_data_y, genometric_predicate, |
98 | 89 |
joinBy, region_output) |
99 | 90 |
}) |
... | ... |
@@ -123,17 +114,20 @@ gmql_join <- function(left_data, right_data, genometric_predicate, joinBy, |
123 | 114 |
else |
124 | 115 |
genomatrix <- .jnull("java/lang/String") |
125 | 116 |
|
126 |
- if(!is.null(joinBy) && !length(joinBy) == 0) |
|
127 |
- join_condition_matrix <- .jarray(.join_condition(joinBy), |
|
128 |
- dispatch = TRUE) |
|
117 |
+ if(!is.null(joinBy)) |
|
118 |
+ { |
|
119 |
+ cond <- .join_condition(joinBy) |
|
120 |
+ if(is.null(cond)) |
|
121 |
+ join_condition_matrix <- .jnull("java/lang/String") |
|
122 |
+ else |
|
123 |
+ join_condition_matrix <- .jarray(cond, dispatch = TRUE) |
|
124 |
+ } |
|
129 | 125 |
else |
130 | 126 |
join_condition_matrix <- .jnull("java/lang/String") |
131 | 127 |
|
132 | 128 |
ouput <- toupper(region_output) |
133 |
- if(!identical(ouput,"CAT") && !identical(ouput,"LEFT") && |
|
134 |
- !identical(ouput,"RIGHT") && !identical(ouput,"INT") && |
|
135 |
- !identical(ouput,"RIGHT_DIST") && !identical(ouput,"BOTH") && |
|
136 |
- !identical(ouput,"LEFT_DIST")) |
|
129 |
+ if(!ouput %in% c("CAT", "LEFT", "RIGHT", "INT", "BOTH", "RIGHT_DIST", |
|
130 |
+ "LEFT_DIST")) |
|
137 | 131 |
stop("region_output must be cat, left, right, right_dist, left_dist |
138 | 132 |
or int (intersection)") |
139 | 133 |
|
... | ... |
@@ -15,16 +15,8 @@ |
15 | 15 |
#' @importFrom S4Vectors aggregate |
16 | 16 |
#' |
17 | 17 |
#' @param x GMQLDataset class object |
18 |
-#' @param groupBy list of evalation functions to define evaluation on metadata: |
|
19 |
-#' \itemize{ |
|
20 |
-#' \item{\code{\link{FN}}(value): Fullname evaluation, two attributes match |
|
21 |
-#' if they both end with \emph{value} and, if they have further prefixes, |
|
22 |
-#' the two prefix sequences are identical} |
|
23 |
-#' \item{\code{\link{EX}}(value): Exact evaluation, only attributes exactly |
|
24 |
-#' as \emph{value} match; no further prefixes are allowed. } |
|
25 |
-#' \item{\code{\link{DF}}(value): Default evaluation, the two attributes match |
|
26 |
-#' if both end with \emph{value}.} |
|
27 |
-#' } |
|
18 |
+#' @param groupBy \code{\link{condition_evaluation}} function to support |
|
19 |
+#' methods with groupBy or JoinBy input paramter |
|
28 | 20 |
#' |
29 | 21 |
#' @return GMQLDataset object. It contains the value to use as input |
30 | 22 |
#' for the subsequent GMQLDataset method |
... | ... |
@@ -48,7 +40,7 @@ |
48 | 40 |
#' ## antibody_target and cell metadata |
49 | 41 |
#' ## attributes. |
50 | 42 |
#' |
51 |
-#' merged = aggregate(exp, list(DF("antibody_target","cell"))) |
|
43 |
+#' merged = aggregate(exp, condition_evaluation(c("antibody_target","cell"))) |
|
52 | 44 |
#' |
53 | 45 |
#' @name aggregate |
54 | 46 |
#' @rdname aggregate |
... | ... |
@@ -65,13 +57,13 @@ setMethod("aggregate", "GMQLDataset", |
65 | 57 |
|
66 | 58 |
gmql_merge <- function(input_data, groupBy) |
67 | 59 |
{ |
68 |
- if(!is.list(groupBy)) |
|
69 |
- stop("groupBy must be list") |
|
70 |
- |
|
71 |
- if(!is.null(groupBy) && !length(groupBy) == 0) |
|
60 |
+ if(!is.null(groupBy)) |
|
72 | 61 |
{ |
73 | 62 |
cond <- .join_condition(groupBy) |
74 |
- join_condition_matrix <- .jarray(cond, dispatch = TRUE) |
|
63 |
+ if(is.null(cond)) |
|
64 |
+ join_condition_matrix <- .jnull("java/lang/String") |
|
65 |
+ else |
|
66 |
+ join_condition_matrix <- .jarray(cond, dispatch = TRUE) |
|
75 | 67 |
} |
76 | 68 |
else |
77 | 69 |
join_condition_matrix <- .jnull("java/lang/String") |
... | ... |
@@ -319,7 +319,7 @@ run_query <- function(url, queryName, query, output_gtf = TRUE) |
319 | 319 |
|
320 | 320 |
req <- httr::GET(url) |
321 | 321 |
real_URL <- req$url |
322 |
- URL <- paste0(real_URL,"queries/run/",fileName,"/",out) |
|
322 |
+ URL <- paste0(real_URL,"queries/run/",queryName,"/",out) |
|
323 | 323 |
h <- c('Accept' = "Application/json", |
324 | 324 |
'Content-Type' = 'text/plain','X-Auth-Token' = authToken) |
325 | 325 |
|
... | ... |
@@ -7,6 +7,8 @@ |
7 | 7 |
\alias{EX} |
8 | 8 |
\alias{Evaluation-Function} |
9 | 9 |
\alias{DF} |
10 |
+\alias{Evaluation-Function} |
|
11 |
+\alias{condition_evaluation} |
|
10 | 12 |
\title{Condition evaluation functions} |
11 | 13 |
\usage{ |
12 | 14 |
FN(...) |
... | ... |
@@ -14,6 +16,8 @@ FN(...) |
14 | 16 |
EX(...) |
15 | 17 |
|
16 | 18 |
DF(...) |
19 |
+ |
|
20 |
+condition_evaluation(...) |
|
17 | 21 |
} |
18 | 22 |
\arguments{ |
19 | 23 |
\item{...}{series of string identifying a name of metadata attribute |
... | ... |
@@ -18,9 +18,9 @@ this condition is logically "ANDed" with prefix filtering (see below) |
18 | 18 |
if NULL no filtering action occures |
19 | 19 |
(i.e every sample is taken for region filtering)} |
20 | 20 |
|
21 |
-\item{metadata_prefix}{vector of strings that will filter metadata |
|
22 |
-containing rispectively every element of this vector. |
|
23 |
-number of element in both vector must match} |
|
21 |
+\item{metadata_prefix}{vector of strings that will support the metadata |
|
22 |
+filtering. If defined every defined 'metadata' are concatenated with the |
|
23 |
+corresponding prefix.} |
|
24 | 24 |
|
25 | 25 |
\item{regions}{vector of strings that extracts only region attribute |
26 | 26 |
specified; if NULL no regions attribute is taken and the output is only |
... | ... |
@@ -33,7 +33,7 @@ metadata() associated. If not present, the column name is the name of |
33 | 33 |
selected regions} |
34 | 34 |
} |
35 | 35 |
\value{ |
36 |
-Granges with selected regions (if any) in elementMetadata |
|
36 |
+GRanges with selected regions |
|
37 | 37 |
} |
38 | 38 |
\description{ |
39 | 39 |
This function lets user to create a new GRangesList with fixed information: |
... | ... |
@@ -44,15 +44,23 @@ with its prefix. The input regions are shown for each sample obtained |
44 | 44 |
from filtering. |
45 | 45 |
} |
46 | 46 |
\details{ |
47 |
-This function works only with datatset or GRangesList that has the same |
|
48 |
-information about regions attribute (but of course different value) |
|
47 |
+This function works only with datatset or GRangesList which samples or |
|
48 |
+Granges have the same regions coordinates (chr, ranges, strand) |
|
49 |
+ |
|
49 | 50 |
In case of Grangeslist data input the function will search for metadata |
50 | 51 |
into metadata() function associated to Grangeslist. |
51 | 52 |
} |
52 | 53 |
\examples{ |
53 | 54 |
|
55 |
+## This statement defines the path to the folders "DATASET" in the |
|
56 |
+## subdirectory "example" of the package "RGMQL" and filter such folder |
|
57 |
+## dataset including at output only "pvalue" and "peak" regions |
|
58 |
+ |
|
54 | 59 |
test_path <- system.file("example", "DATASET", package = "RGMQL") |
55 |
-filter_and_extract(test_path,regions = c("pvalue", "peak")) |
|
60 |
+filter_and_extract(test_path, regions = c("pvalue", "peak")) |
|
61 |
+ |
|
62 |
+## This statement import a GMQL dataset as GRangesList and filter it |
|
63 |
+## including at output only "pvalue" and "peak" regions |
|
56 | 64 |
|
57 | 65 |
grl = import_gmql(test_path, TRUE) |
58 | 66 |
filter_and_extract(grl, regions = c("pvalue", "peak")) |
... | ... |
@@ -41,6 +41,7 @@ If error occurs, a specific error is printed |
41 | 41 |
\examples{ |
42 | 42 |
|
43 | 43 |
\dontrun{ |
44 |
+ |
|
44 | 45 |
remote_url = "http://genomic.deib.polimi.it/gmql-rest-r/" |
45 | 46 |
login_gmql(remote_url) |
46 | 47 |
run_query(remote_url, "query_1", "DATASET = SELECT() Example_Dataset1; |
... | ... |
@@ -48,7 +49,7 @@ MATERIALIZE DATASET INTO RESULT_DS;", output_gtf = FALSE) |
48 | 49 |
|
49 | 50 |
test_path <- system.file("example", package = "RGMQL") |
50 | 51 |
test_query <- file.path(test_path, "query1.txt") |
51 |
-run_query_fromfile(remote_url, "query_1", test_query, output_gtf = FALSE) |
|
52 |
+run_query_fromfile(remote_url, test_query, output_gtf = FALSE) |
|
52 | 53 |
} |
53 | 54 |
|
54 | 55 |
} |
... | ... |
@@ -50,7 +50,7 @@ only those samples that have the same value for each attribute |
50 | 50 |
are considered when performing the difference. |
51 | 51 |
} |
52 | 52 |
\examples{ |
53 |
-## Thi statement initializes and runs the GMQL server for local execution |
|
53 |
+## This statement initializes and runs the GMQL server for local execution |
|
54 | 54 |
## and creation of results on disk. Then, with system.file() it defines |
55 | 55 |
## the path to the folders "DATASET" and "DATASET_GDM" in the subdirectory |
56 | 56 |
## "example" of the package "RGMQL" and opens such folder as a GMQL |
... | ... |
@@ -3,7 +3,7 @@ |
3 | 3 |
\name{show_dataset} |
4 | 4 |
\alias{show_dataset} |
5 | 5 |
\alias{show_datasets_list} |
6 |
-\title{Show Datasets} |
|
6 |
+\title{Show datasets} |
|
7 | 7 |
\usage{ |
8 | 8 |
show_datasets_list(url) |
9 | 9 |
} |
... | ... |
@@ -12,20 +12,18 @@ show_datasets_list(url) |
12 | 12 |
and base url; service name is added automatically} |
13 | 13 |
} |
14 | 14 |
\value{ |
15 |
-list of datasets. |
|
16 |
- |
|
17 |
-Every dataset in the list is identified by: |
|
15 |
+List of datasets. Every dataset in the list is identified by: |
|
18 | 16 |
\itemize{ |
19 | 17 |
\item{name: name of dataset} |
20 | 18 |
\item{owner: public or name of the user} |
21 | 19 |
} |
22 | 20 |
} |
23 | 21 |
\description{ |
24 |
-It show all GMQL dataset stored in repository using the proper GMQL |
|
25 |
-web service available on a remote server |
|
22 |
+It shows all GMQL datasets stored in remote repository using the proper |
|
23 |
+GMQL web service available on a remote server |
|
26 | 24 |
} |
27 | 25 |
\details{ |
28 |
-If error occures a specific error is printed |
|
26 |
+If error occurs, a specific error is printed |
|
29 | 27 |
} |
30 | 28 |
\examples{ |
31 | 29 |
|
... | ... |
@@ -11,18 +11,17 @@ show_jobs_list(url) |
11 | 11 |
and base url; service name is added automatically} |
12 | 12 |
} |
13 | 13 |
\value{ |
14 |
-list of jobs |
|
15 |
-Every job in the list is identified by: |
|
14 |
+List of jobs. Every job in the list is identified by: |
|
16 | 15 |
\itemize{ |
17 | 16 |
\item{id: unique job identifier} |
18 | 17 |
} |
19 | 18 |
} |
20 | 19 |
\description{ |
21 |
-It show all Jobs (run, succeded or failed) invoked by user using the proper |
|
20 |
+It shows all jobs (run, succeded or failed) invoked by user using the proper |
|
22 | 21 |
GMQL web service available on a remote server |
23 | 22 |
} |
24 | 23 |
\details{ |
25 |
-If error occurs a specific error is printed |
|
24 |
+If error occurs, a specific error is printed |
|
26 | 25 |
} |
27 | 26 |
\examples{ |
28 | 27 |
remote_url = "http://genomic.deib.polimi.it/gmql-rest-r" |
... | ... |
@@ -11,19 +11,18 @@ show_queries_list(url) |
11 | 11 |
and base url; service name is added automatically} |
12 | 12 |
} |
13 | 13 |
\value{ |
14 |
-list of queries |
|
15 |
-Every query in the list is identified by: |
|
14 |
+list of queries. Every query in the list is identified by: |
|
16 | 15 |
\itemize{ |
17 | 16 |
\item{name: name of query} |
18 | 17 |
\item{text: text of GMQL query} |
19 | 18 |
} |
20 | 19 |
} |
21 | 20 |
\description{ |
22 |
-It shows all the GMQL query saved on repository using the proper GMQL |
|
23 |
-web service available on a remote server |
|
21 |
+It shows all the GMQL queries saved on remote repository using the proper |
|
22 |
+GMQL web service available on a remote server |
|
24 | 23 |
} |
25 | 24 |
\details{ |
26 |
-if error occures, a specific error is printed |
|
25 |
+If error occurs, a specific error is printed |
|
27 | 26 |
} |
28 | 27 |
\examples{ |
29 | 28 |
|
... | ... |
@@ -12,12 +12,11 @@ and base url; service name is added automatically} |
12 | 12 |
|
13 | 13 |
\item{datasetName}{name of dataset to get |
14 | 14 |
if the dataset is a public dataset, we have to add "public." as prefix, |
15 |
-as shown in the example below otherwise no prefix is needed} |
|
15 |
+as shown in the example below, otherwise no prefix is needed} |
|
16 | 16 |
} |
17 | 17 |
\value{ |
18 |
-list of samples in dataset. |
|
19 |
- |
|
20 |
-Every sample in the list is identified by: |
|
18 |
+List of samples in dataset. Every sample in the list is identified |
|
19 |
+by: |
|
21 | 20 |
\itemize{ |
22 | 21 |
\item{id: id of sample} |
23 | 22 |
\item{name: name of sample} |
... | ... |
@@ -29,12 +28,17 @@ It show all sample from a specific GMQL dataset using the proper |
29 | 28 |
GMQL web service available on a remote server |
30 | 29 |
} |
31 | 30 |
\details{ |
32 |
-If error occures a specific error is printed |
|
31 |
+If error occurs, a specific error is printed |
|
33 | 32 |
} |
34 | 33 |
\examples{ |
35 | 34 |
|
35 |
+## Login to GMQL REST services suite as guest |
|
36 |
+ |
|
36 | 37 |
remote_url = "http://genomic.deib.polimi.it/gmql-rest-r" |
37 | 38 |
login_gmql(remote_url) |
38 |
-list <- show_samples_list(remote_url, "public.HG19_BED_ANNOTATION") |
|
39 |
+ |
|
40 |
+## It show all sample present into public dataset 'Example_Dataset1' |
|
41 |
+ |
|
42 |
+list <- show_samples_list(remote_url, "public.Example_Dataset1") |
|
39 | 43 |
|
40 | 44 |
} |
... | ... |
@@ -290,45 +290,45 @@ with at least one mutation. Return the list of samples ordered by |
290 | 290 |
the number of such exons. |
291 | 291 |
```{r, query, eval = TRUE} |
292 | 292 |
|
293 |
-## These statement defines the path to the folders "EXON" and "MUT" in the |
|
294 |
-## subdirectory "example" of the package "RGMQL" |
|
293 |
+# These statement defines the path to the folders "EXON" and "MUT" in the |
|
294 |
+# subdirectory "example" of the package "RGMQL" |
|
295 | 295 |
|
296 | 296 |
exon_path <- system.file("example", "EXON", package = "RGMQL") |
297 | 297 |
mut_path <- system.file("example", "MUT", package = "RGMQL") |
298 | 298 |
|
299 |
-## Read EXON folder as a GMQL dataset named "exon_ds" containing a single |
|
300 |
-## sample with exon regions and MUT folder as a GMQL dataset named "mut_ds" |
|
301 |
-## containing multiple samples with mutation regions |
|
299 |
+# Read EXON folder as a GMQL dataset named "exon_ds" containing a single |
|
300 |
+# sample with exon regions and MUT folder as a GMQL dataset named "mut_ds" |
|
301 |
+# containing multiple samples with mutation regions |
|
302 | 302 |
|
303 | 303 |
exon_ds <- read_dataset(exon_path) |
304 | 304 |
mut_ds <- read_dataset(mut_path) |
305 | 305 |
|
306 |
-## Filter out mut_ds based on predicate |
|
306 |
+# Filter out mut_ds based on predicate |
|
307 | 307 |
|
308 | 308 |
mut = filter(mut_ds, manually_curated__dataType == 'dnaseq' & |
309 | 309 |
clinical_patient__tumor_tissue_site == 'breast') |
310 | 310 |
|
311 |
-## Filter out exon_ds based on predicate |
|
311 |
+# Filter out exon_ds based on predicate |
|
312 | 312 |
|
313 | 313 |
exon = filter(exon_ds, annotation_type == 'exons' & |
314 | 314 |
original_provider == 'RefSeq') |
315 | 315 |
|
316 |
-## For each mutation sample, count mutations within each exon while |
|
317 |
-## mapping the mutations to the exon regions using the map() function |
|
316 |
+# For each mutation sample, count mutations within each exon while |
|
317 |
+# mapping the mutations to the exon regions using the map() function |
|
318 | 318 |
|
319 | 319 |
exon1 <- map(exon, mut) |
320 | 320 |
|
321 |
-## Remove exons in each sample that do not contain mutations |
|
321 |
+# Remove exons in each sample that do not contain mutations |
|
322 | 322 |
|
323 | 323 |
exon2 <- filter(exon1, count_Exon_Mut >= 1) |
324 | 324 |
|
325 |
-## Using the extend() function, count how many exons remain in each sample and |
|
326 |
-## store the result in the sample metadata as a new attribute???value pair, |
|
327 |
-## with exon_count as attribute name |
|
325 |
+# Using the extend() function, count how many exons remain in each sample and |
|
326 |
+# store the result in the sample metadata as a new attribute???value pair, |
|
327 |
+# with exon_count as attribute name |
|
328 | 328 |
|
329 | 329 |
exon3 <- extend(exon2, exon_count = COUNT()) |
330 | 330 |
|
331 |
-## Order samples in descending order of the added metadata exon_count |
|
331 |
+# Order samples in descending order of the added metadata exon_count |
|
332 | 332 |
|
333 | 333 |
exon_res = arrange(exon3, list(DESC("exon_count"))) |
334 | 334 |
``` |
... | ... |
@@ -336,7 +336,8 @@ exon_res = arrange(exon3, list(DESC("exon_count"))) |
336 | 336 |
If you want to store persistently the result, you can materialize it into |
337 | 337 |
specific path defined as input parameter. |
338 | 338 |
```{r, materialize, eval = TRUE} |
339 |
-## Materialize the result dataset on disk |
|
339 |
+# Materialize the result dataset on disk |
|
340 |
+ |
|
340 | 341 |
collect(exon_res) |
341 | 342 |
``` |
342 | 343 |
by default *collect()* has R workig directoy as stored path and *ds1* as name |
... | ... |
@@ -474,38 +475,38 @@ in this way login is automatically performed as specified above |
474 | 475 |
After init, we can start building our queries: |
475 | 476 |
```{r, remote query, eval = TRUE} |
476 | 477 |
|
477 |
-## Read the remote dataset HG19_TCGA_dnaseq |
|
478 |
-## Read the remote dataset HG19_BED_ANNOTATION |
|
478 |
+# Read the remote dataset HG19_TCGA_dnaseq |
|
479 |
+# Read the remote dataset HG19_BED_ANNOTATION |
|
479 | 480 |
|
480 | 481 |
TCGA_dnaseq <- read_dataset("public.HG19_TCGA_dnaseq",is_local = FALSE) |
481 | 482 |
HG19_bed_ann <- read_dataset("public.HG19_BED_ANNOTATION",is_local = FALSE) |
482 | 483 |
|
483 |
-## Filter out HG19_bed_ann based on predicate |
|
484 |
+# Filter out HG19_bed_ann based on predicate |
|
484 | 485 |
|
485 | 486 |
mut = filter(HG19_bed_ann, manually_curated__dataType == 'dnaseq' & |
486 | 487 |
clinical_patient__tumor_tissue_site == 'breast') |
487 | 488 |
|
488 |
-## Filter out TCGA_dnaseq based on predicate |
|
489 |
+# Filter out TCGA_dnaseq based on predicate |
|
489 | 490 |
|
490 | 491 |
exon = filter(TCGA_dnaseq, annotation_type == 'exons' & |
491 | 492 |
original_provider == 'RefSeq') |
492 | 493 |
|
493 |
-## For each mutation sample, count mutations within each exon while |
|
494 |
-## mapping the mutations to the exon regions using the map() function |
|
494 |
+# For each mutation sample, count mutations within each exon while |
|
495 |
+# mapping the mutations to the exon regions using the map() function |
|
495 | 496 |
|
496 | 497 |
exon1 <- map(exon, mut) |
497 | 498 |
|
498 |
-## Remove exons in each sample that do not contain mutations |
|
499 |
+# Remove exons in each sample that do not contain mutations |
|
499 | 500 |
|
500 | 501 |
exon2 <- filter(exon1, count_Exon_Mut >= 1) |
501 | 502 |
|
502 |
-## Using the extend() function, count how many exons remain in each sample and |
|
503 |
-## store the result in the sample metadata as a new attribute???value pair, |
|
504 |
-## with exon_count as attribute name |
|
503 |
+# Using the extend() function, count how many exons remain in each sample and |
|
504 |
+# store the result in the sample metadata as a new attribute???value pair, |
|
505 |
+# with exon_count as attribute name |
|
505 | 506 |
|
506 | 507 |
exon3 <- extend(exon2, exon_count = COUNT()) |
507 | 508 |
|
508 |
-## Order samples in descending order of the added metadata exon_count |
|
509 |
+# Order samples in descending order of the added metadata exon_count |
|
509 | 510 |
|
510 | 511 |
exon_res = arrange(exon3,list(DESC("exon_count"))) |
511 | 512 |
``` |
... | ... |
@@ -534,21 +535,21 @@ even in the same query as in the following example: |
534 | 535 |
```{r, mixed query, eval = TRUE} |
535 | 536 |
|
536 | 537 |
|
537 |
-## This statement defines the path to the folder "EXON" in the subdirectory |
|
538 |
-## "example" of the package "RGMQL" |
|
538 |
+# This statement defines the path to the folder "EXON" in the subdirectory |
|
539 |
+# "example" of the package "RGMQL" |
|
539 | 540 |
|
540 | 541 |
exon_path <- system.file("example", "EXON", package = "RGMQL") |
541 | 542 |
|
542 |
-## Read EXON folder as a GMQL dataset named "exon_ds" containing a single |
|
543 |
-## sample with exon regions |
|
543 |
+# Read EXON folder as a GMQL dataset named "exon_ds" containing a single |
|
544 |
+# sample with exon regions |
|
544 | 545 |
|
545 | 546 |
exon_ds <- read_dataset(exon_path,is_local = TRUE) |
546 | 547 |
|
547 |
-## Read the remote dataset HG19_BED_ANNOTATION |
|
548 |
+# Read the remote dataset HG19_BED_ANNOTATION |
|
548 | 549 |
|
549 | 550 |
HG19_bed_ann <- read_dataset("public.HG19_BED_ANNOTATION",is_local = FALSE) |
550 | 551 |
|
551 |
-## Filter out based on predicate |
|
552 |
+# Filter out based on predicate |
|
552 | 553 |
|
553 | 554 |
mut = filter(HG19_bed_ann, manually_curated__dataType == 'dnaseq' & |
554 | 555 |
clinical_patient__tumor_tissue_site == 'breast') |
... | ... |
@@ -556,22 +557,22 @@ mut = filter(HG19_bed_ann, manually_curated__dataType == 'dnaseq' & |
556 | 557 |
exon = filter(exon_ds, annotation_type == 'exons' & |
557 | 558 |
original_provider == 'RefSeq') |
558 | 559 |
|
559 |
-## For each mutation sample, count mutations within each exon while |
|
560 |
-## mapping the mutations to the exon regions using the map() function |
|
560 |
+# For each mutation sample, count mutations within each exon while |
|
561 |
+# mapping the mutations to the exon regions using the map() function |
|
561 | 562 |
|
562 | 563 |
exon1 <- map(exon, mut) |
563 | 564 |
|
564 |
-## Remove exons in each sample that do not contain mutations |
|
565 |
+# Remove exons in each sample that do not contain mutations |
|
565 | 566 |
|
566 | 567 |
exon2 <- filter(exon1, count_Exon_Mut >= 1) |
567 | 568 |
|
568 |
-## Using the extend() function, count how many exons remain in each sample and |
|
569 |
-## store the result in the sample metadata as a new attribute???value pair, |
|
570 |
-## with exon_count as attribute name |
|
569 |
+# Using the extend() function, count how many exons remain in each sample and |
|
570 |
+# store the result in the sample metadata as a new attribute???value pair, |
|
571 |
+# with exon_count as attribute name |
|
571 | 572 |
|
572 | 573 |
exon3 <- extend(exon2, exon_count = COUNT()) |
573 | 574 |
|
574 |
-## Order samples in descending order of the added metadata exon_count |
|
575 |
+# Order samples in descending order of the added metadata exon_count |
|
575 | 576 |
|
576 | 577 |
exon_res = arrange(exon3,list(DESC("exon_count"))) |
577 | 578 |
|
... | ... |
@@ -607,8 +608,8 @@ if present, as data structure suitable to further processing in other packages. |
607 | 608 |
|
608 | 609 |
We can import a GMQL dataset into R environment as follows: |
609 | 610 |
```{r, import, eval = TRUE} |
610 |
-## This statement defines the path to the folder "EXON" in the subdirectory |
|
611 |
-## "example" of the package "RGMQL" |
|
611 |
+# This statement defines the path to the folder "EXON" in the subdirectory |
|
612 |
+# "example" of the package "RGMQL" |
|
612 | 613 |
|
613 | 614 |
dataset_path <- system.file("example", "EXON", package = "RGMQL") |
614 | 615 |
|
... | ... |
@@ -621,8 +622,8 @@ the second parameter *is_gtf* must specifies the file format: .GTF or .GDM. |
621 | 622 |
|
622 | 623 |
We can export a GRangesList as GMQL dataset as follows: |
623 | 624 |
```{r, export, eval = TRUE} |
624 |
-## This statement defines the path to the subdirectory "example" of the |
|
625 |
-## package "RGMQL" |
|
625 |
+# This statement defines the path to the subdirectory "example" of the |
|
626 |
+# package "RGMQL" |
|
626 | 627 |
|
627 | 628 |
dir_out <- system.file("example", package = "RGMQL") |
628 | 629 |
|
... | ... |
@@ -638,8 +639,8 @@ the second parameter *is_gtf* specifies the file format: .GTF or .GDM. |
638 | 639 |
We can also import only a part of a GMQL dataset into R environment, |
639 | 640 |
by filtering its content as follows: |
640 | 641 |
```{r, filter_extract, eval = TRUE} |
641 |
-## This statement defines the path to the folder "TEAD" in the subdirectory |
|
642 |
-## "example" of the package "RGMQL" |
|
642 |
+# This statement defines the path to the folder "TEAD" in the subdirectory |
|
643 |
+# "example" of the package "RGMQL" |
|
643 | 644 |
|
644 | 645 |
data_in <- system.file("example", "TEAD", package = "RGMQL") |
645 | 646 |
|