... | ... |
@@ -11,7 +11,7 @@ |
11 | 11 |
#' |
12 | 12 |
#' @param datasetName single string GMQL dataset folder path |
13 | 13 |
#' |
14 |
-#' @return GrangesList containing all GMQL sample In dataset |
|
14 |
+#' @return GrangesList containing all GMQL samples in dataset |
|
15 | 15 |
#' |
16 | 16 |
#' @seealso \code{\link{exportGMQL.gdm}} \code{\link{exportGMQL.gtf}} \code{\link{importGMQL.gdm} } |
17 | 17 |
#' |
... | ... |
@@ -72,7 +72,7 @@ importGMQL.gtf <- function(datasetName) |
72 | 72 |
#' |
73 | 73 |
#' @param datasetName single string GMQL dataset folder path |
74 | 74 |
#' |
75 |
-#' @return GrangesList containing all GMQL sample In dataset |
|
75 |
+#' @return GrangesList containing all GMQL samples in dataset |
|
76 | 76 |
#' |
77 | 77 |
#' @seealso \code{\link{exportGMQL.gdm}} \code{\link{exportGMQL.gtf}} \code{\link{importGMQL.gtf} } |
78 | 78 |
#' |
... | ... |
@@ -14,7 +14,7 @@ |
14 | 14 |
#' @param samples GrangesList |
15 | 15 |
#' @param dir_out folder path where create a folder and write all the sample files |
16 | 16 |
#' |
17 |
-#' @return no value return |
|
17 |
+#' @return None |
|
18 | 18 |
#' |
19 | 19 |
#' @seealso \code{\link{exportGMQL.gdm}} \code{\link{exportGMQL.gtf}} \code{\link{importGMQL.gtf}} |
20 | 20 |
#' |
... | ... |
@@ -67,7 +67,7 @@ exportGMQL.gdm <- function(samples, dir_out) |
67 | 67 |
#' @param samples GrangesList |
68 | 68 |
#' @param dir_out folder path where create a folder and write all the sample files |
69 | 69 |
#' |
70 |
-#' @return no value return |
|
70 |
+#' @return None |
|
71 | 71 |
#' |
72 | 72 |
#' |
73 | 73 |
#' @seealso \code{\link{exportGMQL.gdm}} \code{\link{exportGMQL.gtf}} \code{\link{importGMQL.gdm} } |
... | ... |
@@ -32,6 +32,34 @@ |
32 | 32 |
#' |
33 | 33 |
#' @examples |
34 | 34 |
#' |
35 |
+#' ## it creates a new dataset called CTCF_NORM_SCORE by preserving all region attributes apart from score, |
|
36 |
+#' and creating a new region attribute called new_score by dividing the existing score value |
|
37 |
+#' of each region by 1000.0 and incrementing it by 100. |
|
38 |
+#' It also generates, for each sample of the new dataset, |
|
39 |
+#' a new metadata attribute called normalized with value 1, which can be used in future selections. |
|
40 |
+#' |
|
41 |
+#' initGMQL("gtf") |
|
42 |
+#' test_path <- system.file("example","DATA_SET_VAR_GTF",package = "GMQL") |
|
43 |
+#' input = readDataset(test_path) |
|
44 |
+#' CTCF_NORM_SCORE = project(input,metadata_update="normalized AS 1", |
|
45 |
+#' regions_update="new_score AS (score / 1000.0) + 100" , regions=c("score"),all_but_reg=T,) |
|
46 |
+#' |
|
47 |
+#' |
|
48 |
+#' \dontrun{ |
|
49 |
+#' ### it produces an output dataset that contains the same samples as the input dataset. |
|
50 |
+#' Each output sample only contains, as region attributes, |
|
51 |
+#' the four basic coordinates (chr, left, right, strand) and the specified region attributes |
|
52 |
+#' 'variant_classification' and 'variant_type', and as metadata attributes only the specified ones, |
|
53 |
+#' i.e. manually_curated__tissue_status and manually_curated__tumor_tag. |
|
54 |
+#' |
|
55 |
+#' initGMQL("gtf") |
|
56 |
+#' test_path <- system.file("example","DATA_SET_VAR_GTF",package = "GMQL") |
|
57 |
+#' DS_in = readDataset(test_path) |
|
58 |
+#' DS_out = project(DS_in,regions=c("variant_classification", "variant_type"), |
|
59 |
+#' metadata=c("manually_curated__tissue_status","manually_curated__tumor_tag")) |
|
60 |
+#' |
|
61 |
+#' } |
|
62 |
+#' |
|
35 | 63 |
#' @export |
36 | 64 |
#' |
37 | 65 |
#' |
... | ... |
@@ -12,13 +12,13 @@ |
12 | 12 |
#' @param predicate single string predicate made up by logical operation: AND,OR,NOT on metadata attribute |
13 | 13 |
#' @param region_predicate single string predicate made up by logical operation: AND,OR,NOT on schema region values |
14 | 14 |
#' @param semi_join list of CONDITION objects where every object contains the name of metadata to be used in semijoin, |
15 |
-#' or simple string concatenation of name of metadata (e.g c("cell_type","attribute_tag","size") ) without declaring condition. |
|
15 |
+#' or simple string concatenation of name of metadata, e.g c("cell_type","attribute_tag","size") without declaring condition. |
|
16 | 16 |
#' In the latter form all metadata are considered having DEF condition |
17 | 17 |
#' The CONDITION's available are: |
18 | 18 |
#' \itemize{ |
19 | 19 |
#' \item{\code{\link{FULL}}: Fullname evaluation, two attributes match if they both end with value and, |
20 | 20 |
#' if they have a further prefixes, the two prefix sequence are identical} |
21 |
-#' \item{\code{\link{DEF}}: Default evaluation, two attributes match if both end with value. } |
|
21 |
+#' \item{\code{\link{DEF}}: Default evaluation, two attributes match if both end with value} |
|
22 | 22 |
#' \item{\code{\link{EXACT}}: Exact evaluation, only attributes exactly as value will match; no further prefixes are allowed. } |
23 | 23 |
#' } |
24 | 24 |
#' Every condition accepts only one string value. (e.g. FULL("cell_type") ) |
... | ... |
@@ -44,7 +44,7 @@ |
44 | 44 |
#' s=select(input,"Patient_age < 70") |
45 | 45 |
#' |
46 | 46 |
#' |
47 |
-#' \dontrun { |
|
47 |
+#' \dontrun{ |
|
48 | 48 |
#' |
49 | 49 |
#' It creates a new dataset called 'jun_tf' by selecting those samples and their |
50 | 50 |
#' regions from the existing 'data' dataset such that: |
... | ... |
@@ -10,7 +10,7 @@ importGMQL.gdm(datasetName) |
10 | 10 |
\item{datasetName}{single string GMQL dataset folder path} |
11 | 11 |
} |
12 | 12 |
\value{ |
13 |
-GrangesList containing all GMQL sample In dataset |
|
13 |
+GrangesList containing all GMQL samples in dataset |
|
14 | 14 |
} |
15 | 15 |
\description{ |
16 | 16 |
It create a GrangesList from GMQL samples in dataset |
... | ... |
@@ -10,7 +10,7 @@ importGMQL.gtf(datasetName) |
10 | 10 |
\item{datasetName}{single string GMQL dataset folder path} |
11 | 11 |
} |
12 | 12 |
\value{ |
13 |
-GrangesList containing all GMQL sample In dataset |
|
13 |
+GrangesList containing all GMQL samples in dataset |
|
14 | 14 |
} |
15 | 15 |
\description{ |
16 | 16 |
It create a GrangesList from GMQL samples in dataset |
... | ... |
@@ -5,26 +5,33 @@ |
5 | 5 |
\title{GMQL Operation: PROJECT} |
6 | 6 |
\usage{ |
7 | 7 |
project(input_data, metadata = NULL, metadata_update = NULL, |
8 |
- regions = NULL, regions_update = NULL, all_but = FALSE) |
|
8 |
+ all_but_meta = FALSE, regions = NULL, regions_update = NULL, |
|
9 |
+ all_but_reg = FALSE) |
|
9 | 10 |
} |
10 | 11 |
\arguments{ |
11 | 12 |
\item{input_data}{string pointer taken from GMQL function} |
12 | 13 |
|
13 | 14 |
\item{metadata}{vector of string made up by metadata attribute} |
14 | 15 |
|
15 |
-\item{metadata_update}{single string predicate} |
|
16 |
+\item{metadata_update}{single string predicate made up by operation on metadata attribute} |
|
16 | 17 |
|
17 |
-\item{regions_update}{single string predicate} |
|
18 |
+\item{all_but_meta}{logical value indicating which metadata you want to exclude. |
|
19 |
+If FALSE only the metadata you choose is kept in the output of the project operation, |
|
20 |
+if TRUE the metadata are all except ones include in region parameter. |
|
21 |
+if metadata is not defined \emph{all_but_meta} is not considerd.} |
|
18 | 22 |
|
19 |
-\item{all_but}{logical value indicating which schema filed attribute you want to exclude. |
|
23 |
+\item{regions_update}{single string predicate made up by operation on schema field attribute} |
|
24 |
+ |
|
25 |
+\item{all_but_reg}{logical value indicating which schema filed attribute you want to exclude. |
|
20 | 26 |
If FALSE only the regions you choose is kept in the output of the project operation, |
21 | 27 |
if TRUE the schema region are all except ones include in region parameter. |
22 |
-if regions is not defined all_but is not considerd.} |
|
28 |
+if regions is not defined \emph{all_but_reg} is not considerd.} |
|
23 | 29 |
|
24 | 30 |
\item{region}{vector of string made up by schema field attribute} |
25 | 31 |
} |
26 | 32 |
\value{ |
27 |
-"url-like" string |
|
33 |
+DAGgraph class object. It contains the value associated to the graph used |
|
34 |
+as input for the subsequent GMQL function#' |
|
28 | 35 |
} |
29 | 36 |
\description{ |
30 | 37 |
It creates, from an existing dataset, a new dataset with all the samples from input dataset |
... | ... |
@@ -38,25 +45,35 @@ It allows to: |
38 | 45 |
} |
39 | 46 |
} |
40 | 47 |
\examples{ |
41 |
-\dontrun{ |
|
48 |
+ |
|
49 |
+## it creates a new dataset called CTCF_NORM_SCORE by preserving all region attributes apart from score, |
|
50 |
+and creating a new region attribute called new_score by dividing the existing score value |
|
51 |
+of each region by 1000.0 and incrementing it by 100. |
|
52 |
+It also generates, for each sample of the new dataset, |
|
53 |
+a new metadata attribute called normalized with value 1, which can be used in future selections. |
|
42 | 54 |
|
43 | 55 |
initGMQL("gtf") |
44 | 56 |
test_path <- system.file("example","DATA_SET_VAR_GTF",package = "GMQL") |
45 |
-r = read(test_path) |
|
57 |
+input = readDataset(test_path) |
|
58 |
+CTCF_NORM_SCORE = project(input,metadata_update="normalized AS 1", |
|
59 |
+regions_update="new_score AS (score / 1000.0) + 100" , regions=c("score"),all_but_reg=T,) |
|
60 |
+ |
|
46 | 61 |
|
47 |
-### preserving all region attributes and creating a new region attribute called length |
|
48 |
-p = project(input_data = r,regions_update="length AS right - left") |
|
62 |
+\dontrun{ |
|
63 |
+### it produces an output dataset that contains the same samples as the input dataset. |
|
64 |
+Each output sample only contains, as region attributes, |
|
65 |
+the four basic coordinates (chr, left, right, strand) and the specified region attributes |
|
66 |
+'variant_classification' and 'variant_type', and as metadata attributes only the specified ones, |
|
67 |
+i.e. manually_curated__tissue_status and manually_curated__tumor_tag. |
|
49 | 68 |
|
50 |
-### preserving all region attributes apart from score, and creating a new region attribute called new_score |
|
51 |
-p = project(input_data = r, regions = "score" regions_update="length AS right - left", all_but=TRUE) |
|
69 |
+initGMQL("gtf") |
|
70 |
+test_path <- system.file("example","DATA_SET_VAR_GTF",package = "GMQL") |
|
71 |
+DS_in = readDataset(test_path) |
|
72 |
+DS_out = project(DS_in,regions=c("variant_classification", "variant_type"), |
|
73 |
+metadata=c("manually_curated__tissue_status","manually_curated__tumor_tag")) |
|
52 | 74 |
|
53 |
-### output dataset that contains the same samples as the input dataset. Each output sample only contains, |
|
54 |
-### as region attributes, the four basic coordinates (chr, left, right, strand) |
|
55 |
-### and the specified region attributes and as metadata attributes only the specified ones |
|
56 |
-p = project(input_data = r, regions = c("variant_classification", "variant_type"), |
|
57 |
-metadata = c("manually_curated","tissue_status", "tumor_ta") ) |
|
58 | 75 |
} |
59 |
-"" |
|
76 |
+ |
|
60 | 77 |
} |
61 | 78 |
\references{ |
62 | 79 |
\url{http://www.bioinformatics.deib.polimi.it/genomic_computing/GMQL/doc/GMQLUserTutorial.pdf} |
... | ... |
@@ -15,13 +15,13 @@ select(input_data, predicate = NULL, region_predicate = NULL, |
15 | 15 |
\item{region_predicate}{single string predicate made up by logical operation: AND,OR,NOT on schema region values} |
16 | 16 |
|
17 | 17 |
\item{semi_join}{list of CONDITION objects where every object contains the name of metadata to be used in semijoin, |
18 |
-or simple string concatenation of name of metadata (e.g c("cell_type","attribute_tag","size") ) without declaring condition. |
|
18 |
+or simple string concatenation of name of metadata, e.g c("cell_type","attribute_tag","size") without declaring condition. |
|
19 | 19 |
In the latter form all metadata are considered having DEF condition |
20 | 20 |
The CONDITION's available are: |
21 | 21 |
\itemize{ |
22 | 22 |
\item{\code{\link{FULL}}: Fullname evaluation, two attributes match if they both end with value and, |
23 | 23 |
if they have a further prefixes, the two prefix sequence are identical} |
24 |
-\item{\code{\link{DEF}}: Default evaluation, two attributes match if both end with value. } |
|
24 |
+\item{\code{\link{DEF}}: Default evaluation, two attributes match if both end with value} |
|
25 | 25 |
\item{\code{\link{EXACT}}: Exact evaluation, only attributes exactly as value will match; no further prefixes are allowed. } |
26 | 26 |
} |
27 | 27 |
Every condition accepts only one string value. (e.g. FULL("cell_type") )} |
... | ... |
@@ -56,7 +56,7 @@ input = readDataset(test_path) |
56 | 56 |
s=select(input,"Patient_age < 70") |
57 | 57 |
|
58 | 58 |
|
59 |
-\dontrun { |
|
59 |
+\dontrun{ |
|
60 | 60 |
|
61 | 61 |
It creates a new dataset called 'jun_tf' by selecting those samples and their |
62 | 62 |
regions from the existing 'data' dataset such that: |