Browse code

fixed import gtf, granges schema

Simone authored on 08/06/2021 09:04:48
Showing1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,331 @@
1
+## ---- include=FALSE-----------------------------------------------------------
2
+options(tinytex.verbose = TRUE)
3
+
4
+
5
+## ---- initialization----------------------------------------------------------
6
+library('RGMQL')
7
+
8
+## ---- initialization_RGMQLlib-------------------------------------------------
9
+library('RGMQLlib')
10
+
11
+## ---- init--------------------------------------------------------------------
12
+init_gmql()
13
+
14
+## ---- read GMQL dataset-------------------------------------------------------
15
+gmql_dataset_path <- system.file("example", "EXON", package = "RGMQL")
16
+data_out = read_gmql(gmql_dataset_path)
17
+
18
+## ---- read GRangesList--------------------------------------------------------
19
+library("GenomicRanges")
20
+
21
+# Granges Object with one region: chr2 and two metadata columns: score = 5 
22
+# and GC  = 0.45
23
+
24
+gr1 <- GRanges(seqnames = "chr2",
25
+    ranges = IRanges(103, 106), strand = "+", score = 5, GC = 0.45)
26
+
27
+# Granges Object with two regions both chr1 and two metadata columns: score = 3
28
+# for the fist region and score = 4 for the second one, GC  = 0.3 and 0.5 
29
+# for the first and second region, respectively
30
+
31
+gr2 <- GRanges(seqnames = c("chr1", "chr1"),
32
+    ranges = IRanges(c(107, 113), width = 3), strand = c("+", "-"),
33
+    score = 3:4, GC = c(0.3, 0.5))
34
+
35
+grl <- GRangesList("txA" = gr1, "txB" = gr2)
36
+data_out <- read_GRangesList(grl)
37
+
38
+## ---- query-------------------------------------------------------------------
39
+
40
+# These statements define the paths to the folders "EXON" and "MUT" in the 
41
+# subdirectory "example" of the package "RGMQL"
42
+
43
+exon_path <- system.file("example", "EXON", package = "RGMQL")
44
+mut_path <- system.file("example", "MUT", package = "RGMQL")
45
+
46
+# Read EXON folder as a GMQL dataset named "exon_ds" containing a single 
47
+# sample with exon regions, and MUT folder as a GMQL dataset named "mut_ds" 
48
+
49
+exon_ds <- read_gmql(exon_path)
50
+mut_ds <- read_gmql(mut_path)
51
+
52
+# Filter out mut_ds based on a metadata predicate to keep breast cancer 
53
+# mutations only
54
+
55
+mut = filter(mut_ds, manually_curated__dataType == 'dnaseq' & 
56
+                clinical_patient__tumor_tissue_site == 'breast')
57
+
58
+# Filter out exon_ds based on a metadata predicate to keep Refseq exons only
59
+
60
+exon = filter(exon_ds, annotation_type == 'exons' & 
61
+                    original_provider == 'RefSeq')
62
+
63
+# For each mutation sample, map the mutations to the exon regions using 
64
+# the map() function and count mutations within each exon storing the value
65
+# in the default region attribute 'count_left_right'
66
+
67
+exon1 <- map(exon, mut)
68
+
69
+# Remove exons in each sample that do not contain mutations
70
+
71
+exon2 <- filter(exon1, r_predicate = count_left_right >= 1)
72
+
73
+# Using the extend() function, count how many exons remain in each sample and
74
+# store the result in the sample metadata as a new attribute-value pair, 
75
+# with exon_count as attribute name 
76
+
77
+exon3 <- extend(exon2, exon_count = COUNT())
78
+
79
+# Order samples in descending order of the added metadata exon_count 
80
+
81
+exon_res = arrange(exon3, list(DESC("exon_count")))
82
+
83
+## ---- materialize-------------------------------------------------------------
84
+# Materialize the result dataset on disk
85
+collect(exon_res)
86
+
87
+## ---- materializeElsewhere----------------------------------------------------
88
+# Materialize the result dataset into a specific folder on disk
89
+collect(exon_res, dir_out = "./WD_R", name = "dataset") #, 
90
+
91
+## ---- execute, eval = FALSE---------------------------------------------------
92
+#  execute()
93
+
94
+## ---- take,eval=FALSE---------------------------------------------------------
95
+#  g <- take(exon_res, rows = 45)
96
+
97
+## ---- init with guest login---------------------------------------------------
98
+test_url = "http://www.gmql.eu/gmql-rest"
99
+login_gmql(test_url)
100
+
101
+## ---- init with login---------------------------------------------------------
102
+test_url = "http://www.gmql.eu/gmql-rest"
103
+login_gmql(test_url, username = 'myname', password = 'mypassword')
104
+
105
+## ---- run, eval = FALSE-------------------------------------------------------
106
+#  
107
+#  job <- run_query(test_url, "query_1", "DNA = SELECT() Example_Dataset_1;
108
+#  MATERIALIZE DNA INTO RESULT_DS;", output_gtf = FALSE)
109
+#  
110
+
111
+## ---- run_from_file, eval = FALSE---------------------------------------------
112
+#  query_path <- system.file("example", "query1.txt", package = "RGMQL")
113
+#  job <- run_query_fromfile(test_url, query_path, output_gtf = FALSE)
114
+
115
+## ---- trace, eval = FALSE-----------------------------------------------------
116
+#  job_id <- job$id
117
+#  trace_job(test_url, job_id)
118
+
119
+## ---- download, eval = FALSE--------------------------------------------------
120
+#  name_dataset <- job$datasets[[1]]$name
121
+#  download_dataset(test_url, name_dataset)
122
+#  
123
+
124
+## ---- download_as_GRangesList, eval=FALSE-------------------------------------
125
+#  name_dataset <- job$datasets[[1]]$name
126
+#  grl = download_as_GRangesList(test_url, name_dataset)
127
+
128
+## ---- logout------------------------------------------------------------------
129
+logout_gmql(test_url)
130
+
131
+## ---- login remote, eval = FALSE----------------------------------------------
132
+#  test_url = "http://www.gmql.eu/gmql-rest"
133
+#  login_gmql(test_url)
134
+
135
+## ---- initialize remote-------------------------------------------------------
136
+init_gmql(url = test_url)
137
+
138
+## ---- change processing mode--------------------------------------------------
139
+remote_processing(TRUE)
140
+
141
+## ---- init remote processing--------------------------------------------------
142
+init_gmql(url = test_url, remote_processing = TRUE)
143
+
144
+## ---- remote query------------------------------------------------------------
145
+
146
+## Read the remote dataset HG19_TCGA_dnaseq
147
+## Read the remote dataset HG19_BED_ANNOTATION
148
+
149
+TCGA_dnaseq <- read_gmql("public.HG19_TCGA_dnaseq", is_local = FALSE)
150
+HG19_bed_ann <- read_gmql("public.HG19_BED_ANNOTATION", is_local = FALSE)
151
+
152
+# Filter out mut_ds based on a metadata predicate to keep breast cancer 
153
+# mutations only
154
+
155
+mut = filter(TCGA_dnaseq, manually_curated__dataType == 'dnaseq' & 
156
+                clinical_patient__tumor_tissue_site == 'breast')
157
+
158
+# Filter out exon_ds based on a metadata predicate to keep Refseq exons only 
159
+
160
+exon = filter(HG19_bed_ann, annotation_type == 'exons' & 
161
+                    original_provider == 'RefSeq')
162
+
163
+# For each mutation sample, map the mutations to the exon regions using 
164
+# the map() function and count mutations within each exon storing the value
165
+# in the default region attribute 'count_left_right'
166
+
167
+exon1 <- map(exon, mut)
168
+
169
+# Remove exons in each sample that do not contain mutations
170
+
171
+exon2 <- filter(exon1, r_predicate = count_left_right >= 1)
172
+
173
+# Using the extend() function, count how many exons remain in each sample and
174
+# store the result in the sample metadata as a new attribute-value pair, 
175
+# with exon_count as attribute name 
176
+
177
+exon3 <- extend(exon2, exon_count = COUNT())
178
+
179
+# Order samples in descending order of the added metadata exon_count 
180
+
181
+exon_res = arrange(exon3, list(DESC("exon_count")))
182
+
183
+## ---- remote materialize, eval = FALSE----------------------------------------
184
+#  collect(exon_res, name="exon_res_data")
185
+
186
+## ---- remote execute, eval = FALSE--------------------------------------------
187
+#  job<-execute()
188
+
189
+## ---- download_2, eval = FALSE------------------------------------------------
190
+#  name_dataset <- job$datasets[[1]]$name
191
+#  download_dataset(test_url, name_dataset)
192
+
193
+## ---- download_as_GRangesList_2, eval=FALSE-----------------------------------
194
+#  name_dataset <- job$datasets[[1]]$name
195
+#  grl = download_as_GRangesList(test_url, name_dataset)
196
+
197
+## ---- logout_2, eval=FALSE----------------------------------------------------
198
+#  logout_gmql(test_url)
199
+
200
+## ---- switch mode-------------------------------------------------------------
201
+test_url = "http://www.gmql.eu/gmql-rest"
202
+init_gmql(url = test_url)
203
+remote_processing(TRUE)
204
+
205
+## ---- mixed query-------------------------------------------------------------
206
+
207
+
208
+# This statement defines the path to the folder "MUT" in the subdirectory 
209
+# "example" of the package "RGMQL"
210
+
211
+mut_path <- system.file("example", "MUT", package = "RGMQL")
212
+
213
+# Read MUT folder as a GMQL dataset named "mut_ds" 
214
+
215
+mut_ds <- read_gmql(mut_path, is_local = TRUE)
216
+
217
+# Read the remote dataset HG19_BED_ANNOTATION
218
+
219
+HG19_bed_ann <- read_gmql("public.HG19_BED_ANNOTATION", is_local = FALSE)
220
+
221
+# Filter out mut_ds based on a metadata predicate to keep breast cancer 
222
+# mutations only
223
+
224
+mut = filter(mut_ds, manually_curated__dataType == 'dnaseq' & 
225
+                clinical_patient__tumor_tissue_site == 'breast')
226
+
227
+# Filter out exon_ds based on a metadata predicate to keep Refseq exons only 
228
+
229
+exon = filter(HG19_bed_ann, annotation_type == 'exons' & 
230
+                    original_provider == 'RefSeq')
231
+
232
+# For each mutation sample, map the mutations to the exon regions using 
233
+# the map() function and count mutations within each exon storing the value
234
+# in the default region attribute 'count_left_right'
235
+
236
+exon1 <- map(exon, mut)
237
+
238
+# Remove exons in each sample that do not contain mutations
239
+
240
+exon2 <- filter(exon1, r_predicate = count_left_right >= 1)
241
+
242
+# Using the extend() function, count how many exons remain in each sample and
243
+# store the result in the sample metadata as a new attribute-value pair, 
244
+# with exon_count as attribute name 
245
+
246
+exon3 <- extend(exon2, exon_count = COUNT())
247
+
248
+# Order samples in descending order of the added metadata exon_count 
249
+
250
+exon_res = arrange(exon3, list(DESC("exon_count")))
251
+
252
+
253
+## ---- mixed materialize, eval = FALSE-----------------------------------------
254
+#  collect(exon_res,"exon_result_dataset")
255
+
256
+## ---- mixed execute, eval = FALSE---------------------------------------------
257
+#  job<-execute()
258
+
259
+## ---- import------------------------------------------------------------------
260
+# This statement defines the path to the folder "EXON" in the subdirectory 
261
+# "example" of the package "RGMQL"
262
+
263
+dataset_path <- system.file("example", "EXON", package = "RGMQL")
264
+
265
+# Import the GMQL dataset EXON as GRangesList
266
+
267
+imported_data <- import_gmql(dataset_path, is_gtf = FALSE)
268
+imported_data
269
+
270
+# and its metadata
271
+
272
+imported_data@metadata
273
+
274
+
275
+## ---- export------------------------------------------------------------------
276
+# This statement defines the path to the subdirectory "exp" of the 
277
+# package "RGMQL"
278
+
279
+dir_out <- paste(system.file("example", package = "RGMQL"), 'exp', sep='/')
280
+
281
+# Export the GRangesList 'imported_data' as GMQL dataset called 'example' 
282
+# at destination path
283
+
284
+export_gmql(imported_data, dir_out, is_gtf = TRUE)
285
+
286
+## ---- filter_extract----------------------------------------------------------
287
+# This statement defines the path to the folder "TCGA-ACC" in the subdirectory 
288
+# "example" of the package "RGMQL"
289
+
290
+data_in <- system.file("example", "TCGA-ACC", package = "RGMQL")
291
+
292
+matrix <- filter_and_extract(data_in, metadata= NULL,
293
+                             region_attributes = 
294
+                               FULL(except = c('fpkm_uq','fpkm')))
295
+matrix
296
+
297
+
298
+## ---- metadata----------------------------------------------------------------
299
+# This statement defines the path to the folder "DATASET_META" in the 
300
+# subdirectory "example" of the package "RGMQL"
301
+
302
+dataset_path <- system.file("example", "DATASET_META", package = "RGMQL")
303
+
304
+# Import the GMQL dataset DATASET_META as GRangesList
305
+
306
+grl_data <- import_gmql(dataset_path, is_gtf = FALSE)
307
+grl_data
308
+
309
+# and its metadata
310
+
311
+grl_data@metadata
312
+
313
+
314
+## ---- retrieve_value----------------------------------------------------------
315
+
316
+# store metadata on variable a
317
+
318
+a = grl_data@metadata
319
+
320
+# get disease value of sample S_00000
321
+
322
+a$S_00000$disease
323
+
324
+
325
+## ---- retrieve_values---------------------------------------------------------
326
+
327
+# get all disease values of sample S_00000
328
+
329
+a$S_00000[which(names(a$S_00000) %in% "disease")]
330
+
331
+
Simone authored on 25/05/2021 07:06:38
Showing1 changed files
1 1
deleted file mode 100644
... ...
@@ -1,331 +0,0 @@
1
-## ---- include=FALSE-----------------------------------------------------------
2
-options(tinytex.verbose = TRUE)
3
-
4
-
5
-## ---- initialization----------------------------------------------------------
6
-library('RGMQL')
7
-
8
-## ---- initialization_RGMQLlib-------------------------------------------------
9
-library('RGMQLlib')
10
-
11
-## ---- init--------------------------------------------------------------------
12
-init_gmql()
13
-
14
-## ---- read GMQL dataset-------------------------------------------------------
15
-gmql_dataset_path <- system.file("example", "EXON", package = "RGMQL")
16
-data_out = read_gmql(gmql_dataset_path)
17
-
18
-## ---- read GRangesList--------------------------------------------------------
19
-library("GenomicRanges")
20
-
21
-# Granges Object with one region: chr2 and two metadata columns: score = 5 
22
-# and GC  = 0.45
23
-
24
-gr1 <- GRanges(seqnames = "chr2",
25
-    ranges = IRanges(103, 106), strand = "+", score = 5, GC = 0.45)
26
-
27
-# Granges Object with two regions both chr1 and two metadata columns: score = 3
28
-# for the fist region and score = 4 for the second one, GC  = 0.3 and 0.5 
29
-# for the first and second region, respectively
30
-
31
-gr2 <- GRanges(seqnames = c("chr1", "chr1"),
32
-    ranges = IRanges(c(107, 113), width = 3), strand = c("+", "-"),
33
-    score = 3:4, GC = c(0.3, 0.5))
34
-
35
-grl <- GRangesList("txA" = gr1, "txB" = gr2)
36
-data_out <- read_GRangesList(grl)
37
-
38
-## ---- query-------------------------------------------------------------------
39
-
40
-# These statements define the paths to the folders "EXON" and "MUT" in the 
41
-# subdirectory "example" of the package "RGMQL"
42
-
43
-exon_path <- system.file("example", "EXON", package = "RGMQL")
44
-mut_path <- system.file("example", "MUT", package = "RGMQL")
45
-
46
-# Read EXON folder as a GMQL dataset named "exon_ds" containing a single 
47
-# sample with exon regions, and MUT folder as a GMQL dataset named "mut_ds" 
48
-
49
-exon_ds <- read_gmql(exon_path)
50
-mut_ds <- read_gmql(mut_path)
51
-
52
-# Filter out mut_ds based on a metadata predicate to keep breast cancer 
53
-# mutations only
54
-
55
-mut = filter(mut_ds, manually_curated__dataType == 'dnaseq' & 
56
-                clinical_patient__tumor_tissue_site == 'breast')
57
-
58
-# Filter out exon_ds based on a metadata predicate to keep Refseq exons only
59
-
60
-exon = filter(exon_ds, annotation_type == 'exons' & 
61
-                    original_provider == 'RefSeq')
62
-
63
-# For each mutation sample, map the mutations to the exon regions using 
64
-# the map() function and count mutations within each exon storing the value
65
-# in the default region attribute 'count_left_right'
66
-
67
-exon1 <- map(exon, mut)
68
-
69
-# Remove exons in each sample that do not contain mutations
70
-
71
-exon2 <- filter(exon1, r_predicate = count_left_right >= 1)
72
-
73
-# Using the extend() function, count how many exons remain in each sample and
74
-# store the result in the sample metadata as a new attribute-value pair, 
75
-# with exon_count as attribute name 
76
-
77
-exon3 <- extend(exon2, exon_count = COUNT())
78
-
79
-# Order samples in descending order of the added metadata exon_count 
80
-
81
-exon_res = arrange(exon3, list(DESC("exon_count")))
82
-
83
-## ---- materialize-------------------------------------------------------------
84
-# Materialize the result dataset on disk
85
-collect(exon_res)
86
-
87
-## ---- materializeElsewhere----------------------------------------------------
88
-# Materialize the result dataset into a specific folder on disk
89
-collect(exon_res, dir_out = "./WD_R", name = "dataset") #, 
90
-
91
-## ---- execute, eval = FALSE---------------------------------------------------
92
-#  execute()
93
-
94
-## ---- take,eval=FALSE---------------------------------------------------------
95
-#  g <- take(exon_res, rows = 45)
96
-
97
-## ---- init with guest login---------------------------------------------------
98
-test_url = "http://www.gmql.eu/gmql-rest"
99
-login_gmql(test_url)
100
-
101
-## ---- init with login---------------------------------------------------------
102
-test_url = "http://www.gmql.eu/gmql-rest"
103
-login_gmql(test_url, username = 'myname', password = 'mypassword')
104
-
105
-## ---- run, eval = FALSE-------------------------------------------------------
106
-#  
107
-#  job <- run_query(test_url, "query_1", "DNA = SELECT() Example_Dataset_1;
108
-#  MATERIALIZE DNA INTO RESULT_DS;", output_gtf = FALSE)
109
-#  
110
-
111
-## ---- run_from_file, eval = FALSE---------------------------------------------
112
-#  query_path <- system.file("example", "query1.txt", package = "RGMQL")
113
-#  job <- run_query_fromfile(test_url, query_path, output_gtf = FALSE)
114
-
115
-## ---- trace, eval = FALSE-----------------------------------------------------
116
-#  job_id <- job$id
117
-#  trace_job(test_url, job_id)
118
-
119
-## ---- download, eval = FALSE--------------------------------------------------
120
-#  name_dataset <- job$datasets[[1]]$name
121
-#  download_dataset(test_url, name_dataset)
122
-#  
123
-
124
-## ---- download_as_GRangesList, eval=FALSE-------------------------------------
125
-#  name_dataset <- job$datasets[[1]]$name
126
-#  grl = download_as_GRangesList(test_url, name_dataset)
127
-
128
-## ---- logout------------------------------------------------------------------
129
-logout_gmql(test_url)
130
-
131
-## ---- login remote, eval = FALSE----------------------------------------------
132
-#  test_url = "http://www.gmql.eu/gmql-rest"
133
-#  login_gmql(test_url)
134
-
135
-## ---- initialize remote-------------------------------------------------------
136
-init_gmql(url = test_url)
137
-
138
-## ---- change processing mode--------------------------------------------------
139
-remote_processing(TRUE)
140
-
141
-## ---- init remote processing--------------------------------------------------
142
-init_gmql(url = test_url, remote_processing = TRUE)
143
-
144
-## ---- remote query------------------------------------------------------------
145
-
146
-## Read the remote dataset HG19_TCGA_dnaseq
147
-## Read the remote dataset HG19_BED_ANNOTATION
148
-
149
-TCGA_dnaseq <- read_gmql("public.HG19_TCGA_dnaseq", is_local = FALSE)
150
-HG19_bed_ann <- read_gmql("public.HG19_BED_ANNOTATION", is_local = FALSE)
151
-
152
-# Filter out mut_ds based on a metadata predicate to keep breast cancer 
153
-# mutations only
154
-
155
-mut = filter(TCGA_dnaseq, manually_curated__dataType == 'dnaseq' & 
156
-                clinical_patient__tumor_tissue_site == 'breast')
157
-
158
-# Filter out exon_ds based on a metadata predicate to keep Refseq exons only 
159
-
160
-exon = filter(HG19_bed_ann, annotation_type == 'exons' & 
161
-                    original_provider == 'RefSeq')
162
-
163
-# For each mutation sample, map the mutations to the exon regions using 
164
-# the map() function and count mutations within each exon storing the value
165
-# in the default region attribute 'count_left_right'
166
-
167
-exon1 <- map(exon, mut)
168
-
169
-# Remove exons in each sample that do not contain mutations
170
-
171
-exon2 <- filter(exon1, r_predicate = count_left_right >= 1)
172
-
173
-# Using the extend() function, count how many exons remain in each sample and
174
-# store the result in the sample metadata as a new attribute-value pair, 
175
-# with exon_count as attribute name 
176
-
177
-exon3 <- extend(exon2, exon_count = COUNT())
178
-
179
-# Order samples in descending order of the added metadata exon_count 
180
-
181
-exon_res = arrange(exon3, list(DESC("exon_count")))
182
-
183
-## ---- remote materialize, eval = FALSE----------------------------------------
184
-#  collect(exon_res, name="exon_res_data")
185
-
186
-## ---- remote execute, eval = FALSE--------------------------------------------
187
-#  job<-execute()
188
-
189
-## ---- download_2, eval = FALSE------------------------------------------------
190
-#  name_dataset <- job$datasets[[1]]$name
191
-#  download_dataset(test_url, name_dataset)
192
-
193
-## ---- download_as_GRangesList_2, eval=FALSE-----------------------------------
194
-#  name_dataset <- job$datasets[[1]]$name
195
-#  grl = download_as_GRangesList(test_url, name_dataset)
196
-
197
-## ---- logout_2, eval=FALSE----------------------------------------------------
198
-#  logout_gmql(test_url)
199
-
200
-## ---- switch mode-------------------------------------------------------------
201
-test_url = "http://www.gmql.eu/gmql-rest"
202
-init_gmql(url = test_url)
203
-remote_processing(TRUE)
204
-
205
-## ---- mixed query-------------------------------------------------------------
206
-
207
-
208
-# This statement defines the path to the folder "MUT" in the subdirectory 
209
-# "example" of the package "RGMQL"
210
-
211
-mut_path <- system.file("example", "MUT", package = "RGMQL")
212
-
213
-# Read MUT folder as a GMQL dataset named "mut_ds" 
214
-
215
-mut_ds <- read_gmql(mut_path, is_local = TRUE)
216
-
217
-# Read the remote dataset HG19_BED_ANNOTATION
218
-
219
-HG19_bed_ann <- read_gmql("public.HG19_BED_ANNOTATION", is_local = FALSE)
220
-
221
-# Filter out mut_ds based on a metadata predicate to keep breast cancer 
222
-# mutations only
223
-
224
-mut = filter(mut_ds, manually_curated__dataType == 'dnaseq' & 
225
-                clinical_patient__tumor_tissue_site == 'breast')
226
-
227
-# Filter out exon_ds based on a metadata predicate to keep Refseq exons only 
228
-
229
-exon = filter(HG19_bed_ann, annotation_type == 'exons' & 
230
-                    original_provider == 'RefSeq')
231
-
232
-# For each mutation sample, map the mutations to the exon regions using 
233
-# the map() function and count mutations within each exon storing the value
234
-# in the default region attribute 'count_left_right'
235
-
236
-exon1 <- map(exon, mut)
237
-
238
-# Remove exons in each sample that do not contain mutations
239
-
240
-exon2 <- filter(exon1, r_predicate = count_left_right >= 1)
241
-
242
-# Using the extend() function, count how many exons remain in each sample and
243
-# store the result in the sample metadata as a new attribute-value pair, 
244
-# with exon_count as attribute name 
245
-
246
-exon3 <- extend(exon2, exon_count = COUNT())
247
-
248
-# Order samples in descending order of the added metadata exon_count 
249
-
250
-exon_res = arrange(exon3, list(DESC("exon_count")))
251
-
252
-
253
-## ---- mixed materialize, eval = FALSE-----------------------------------------
254
-#  collect(exon_res,"exon_result_dataset")
255
-
256
-## ---- mixed execute, eval = FALSE---------------------------------------------
257
-#  job<-execute()
258
-
259
-## ---- import------------------------------------------------------------------
260
-# This statement defines the path to the folder "EXON" in the subdirectory 
261
-# "example" of the package "RGMQL"
262
-
263
-dataset_path <- system.file("example", "EXON", package = "RGMQL")
264
-
265
-# Import the GMQL dataset EXON as GRangesList
266
-
267
-imported_data <- import_gmql(dataset_path, is_gtf = FALSE)
268
-imported_data
269
-
270
-# and its metadata
271
-
272
-imported_data@metadata
273
-
274
-
275
-## ---- export------------------------------------------------------------------
276
-# This statement defines the path to the subdirectory "exp" of the 
277
-# package "RGMQL"
278
-
279
-dir_out <- paste(system.file("example", package = "RGMQL"), 'exp', sep='/')
280
-
281
-# Export the GRangesList 'imported_data' as GMQL dataset called 'example' 
282
-# at destination path
283
-
284
-export_gmql(imported_data, dir_out, is_gtf = TRUE)
285
-
286
-## ---- filter_extract----------------------------------------------------------
287
-# This statement defines the path to the folder "TCGA-ACC" in the subdirectory 
288
-# "example" of the package "RGMQL"
289
-
290
-data_in <- system.file("example", "TCGA-ACC", package = "RGMQL")
291
-
292
-matrix <- filter_and_extract(data_in, metadata= NULL,
293
-                             region_attributes = 
294
-                               FULL(except = c('fpkm_uq','fpkm')))
295
-matrix
296
-
297
-
298
-## ---- metadata----------------------------------------------------------------
299
-# This statement defines the path to the folder "DATASET_META" in the 
300
-# subdirectory "example" of the package "RGMQL"
301
-
302
-dataset_path <- system.file("example", "DATASET_META", package = "RGMQL")
303
-
304
-# Import the GMQL dataset DATASET_META as GRangesList
305
-
306
-grl_data <- import_gmql(dataset_path, is_gtf = FALSE)
307
-grl_data
308
-
309
-# and its metadata
310
-
311
-grl_data@metadata
312
-
313
-
314
-## ---- retrieve_value----------------------------------------------------------
315
-
316
-# store metadata on variable a
317
-
318
-a = grl_data@metadata
319
-
320
-# get disease value of sample S_00000
321
-
322
-a$S_00000$disease
323
-
324
-
325
-## ---- retrieve_values---------------------------------------------------------
326
-
327
-# get all disease values of sample S_00000
328
-
329
-a$S_00000[which(names(a$S_00000) %in% "disease")]
330
-
331
-
Browse code

updates RD and vignette

Simone authored on 25/05/2021 07:02:44
Showing1 changed files
... ...
@@ -1,3 +1,7 @@
1
+## ---- include=FALSE-----------------------------------------------------------
2
+options(tinytex.verbose = TRUE)
3
+
4
+
1 5
 ## ---- initialization----------------------------------------------------------
2 6
 library('RGMQL')
3 7
 
... ...
@@ -177,7 +181,7 @@ exon3 <- extend(exon2, exon_count = COUNT())
177 181
 exon_res = arrange(exon3, list(DESC("exon_count")))
178 182
 
179 183
 ## ---- remote materialize, eval = FALSE----------------------------------------
180
-#  collect(exon_res, name="exon_res_folder")
184
+#  collect(exon_res, name="exon_res_data")
181 185
 
182 186
 ## ---- remote execute, eval = FALSE--------------------------------------------
183 187
 #  job<-execute()
... ...
@@ -269,10 +273,10 @@ imported_data@metadata
269 273
 
270 274
 
271 275
 ## ---- export------------------------------------------------------------------
272
-# This statement defines the path to the subdirectory "example" of the 
276
+# This statement defines the path to the subdirectory "exp" of the 
273 277
 # package "RGMQL"
274 278
 
275
-dir_out <- system.file("example", package = "RGMQL")
279
+dir_out <- paste(system.file("example", package = "RGMQL"), 'exp', sep='/')
276 280
 
277 281
 # Export the GRangesList 'imported_data' as GMQL dataset called 'example' 
278 282
 # at destination path
... ...
@@ -280,12 +284,14 @@ dir_out <- system.file("example", package = "RGMQL")
280 284
 export_gmql(imported_data, dir_out, is_gtf = TRUE)
281 285
 
282 286
 ## ---- filter_extract----------------------------------------------------------
283
-# This statement defines the path to the folder "TEAD" in the subdirectory 
287
+# This statement defines the path to the folder "TCGA-ACC" in the subdirectory 
284 288
 # "example" of the package "RGMQL"
285 289
 
286 290
 data_in <- system.file("example", "TCGA-ACC", package = "RGMQL")
287 291
 
288
-matrix <- filter_and_extract(data_in, metadata = NULL, region_attributes = FULL(except = c('fpkm_uq','fpkm')))
292
+matrix <- filter_and_extract(data_in, metadata= NULL,
293
+                             region_attributes = 
294
+                               FULL(except = c('fpkm_uq','fpkm')))
289 295
 matrix
290 296
 
291 297
 
Browse code

update vignette, new datasets

Simone authored on 18/05/2021 20:04:54
Showing1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,325 @@
1
+## ---- initialization----------------------------------------------------------
2
+library('RGMQL')
3
+
4
+## ---- initialization_RGMQLlib-------------------------------------------------
5
+library('RGMQLlib')
6
+
7
+## ---- init--------------------------------------------------------------------
8
+init_gmql()
9
+
10
+## ---- read GMQL dataset-------------------------------------------------------
11
+gmql_dataset_path <- system.file("example", "EXON", package = "RGMQL")
12
+data_out = read_gmql(gmql_dataset_path)
13
+
14
+## ---- read GRangesList--------------------------------------------------------
15
+library("GenomicRanges")
16
+
17
+# Granges Object with one region: chr2 and two metadata columns: score = 5 
18
+# and GC  = 0.45
19
+
20
+gr1 <- GRanges(seqnames = "chr2",
21
+    ranges = IRanges(103, 106), strand = "+", score = 5, GC = 0.45)
22
+
23
+# Granges Object with two regions both chr1 and two metadata columns: score = 3
24
+# for the fist region and score = 4 for the second one, GC  = 0.3 and 0.5 
25
+# for the first and second region, respectively
26
+
27
+gr2 <- GRanges(seqnames = c("chr1", "chr1"),
28
+    ranges = IRanges(c(107, 113), width = 3), strand = c("+", "-"),
29
+    score = 3:4, GC = c(0.3, 0.5))
30
+
31
+grl <- GRangesList("txA" = gr1, "txB" = gr2)
32
+data_out <- read_GRangesList(grl)
33
+
34
+## ---- query-------------------------------------------------------------------
35
+
36
+# These statements define the paths to the folders "EXON" and "MUT" in the 
37
+# subdirectory "example" of the package "RGMQL"
38
+
39
+exon_path <- system.file("example", "EXON", package = "RGMQL")
40
+mut_path <- system.file("example", "MUT", package = "RGMQL")
41
+
42
+# Read EXON folder as a GMQL dataset named "exon_ds" containing a single 
43
+# sample with exon regions, and MUT folder as a GMQL dataset named "mut_ds" 
44
+
45
+exon_ds <- read_gmql(exon_path)
46
+mut_ds <- read_gmql(mut_path)
47
+
48
+# Filter out mut_ds based on a metadata predicate to keep breast cancer 
49
+# mutations only
50
+
51
+mut = filter(mut_ds, manually_curated__dataType == 'dnaseq' & 
52
+                clinical_patient__tumor_tissue_site == 'breast')
53
+
54
+# Filter out exon_ds based on a metadata predicate to keep Refseq exons only
55
+
56
+exon = filter(exon_ds, annotation_type == 'exons' & 
57
+                    original_provider == 'RefSeq')
58
+
59
+# For each mutation sample, map the mutations to the exon regions using 
60
+# the map() function and count mutations within each exon storing the value
61
+# in the default region attribute 'count_left_right'
62
+
63
+exon1 <- map(exon, mut)
64
+
65
+# Remove exons in each sample that do not contain mutations
66
+
67
+exon2 <- filter(exon1, r_predicate = count_left_right >= 1)
68
+
69
+# Using the extend() function, count how many exons remain in each sample and
70
+# store the result in the sample metadata as a new attribute-value pair, 
71
+# with exon_count as attribute name 
72
+
73
+exon3 <- extend(exon2, exon_count = COUNT())
74
+
75
+# Order samples in descending order of the added metadata exon_count 
76
+
77
+exon_res = arrange(exon3, list(DESC("exon_count")))
78
+
79
+## ---- materialize-------------------------------------------------------------
80
+# Materialize the result dataset on disk
81
+collect(exon_res)
82
+
83
+## ---- materializeElsewhere----------------------------------------------------
84
+# Materialize the result dataset into a specific folder on disk
85
+collect(exon_res, dir_out = "./WD_R", name = "dataset") #, 
86
+
87
+## ---- execute, eval = FALSE---------------------------------------------------
88
+#  execute()
89
+
90
+## ---- take,eval=FALSE---------------------------------------------------------
91
+#  g <- take(exon_res, rows = 45)
92
+
93
+## ---- init with guest login---------------------------------------------------
94
+test_url = "http://www.gmql.eu/gmql-rest"
95
+login_gmql(test_url)
96
+
97
+## ---- init with login---------------------------------------------------------
98
+test_url = "http://www.gmql.eu/gmql-rest"
99
+login_gmql(test_url, username = 'myname', password = 'mypassword')
100
+
101
+## ---- run, eval = FALSE-------------------------------------------------------
102
+#  
103
+#  job <- run_query(test_url, "query_1", "DNA = SELECT() Example_Dataset_1;
104
+#  MATERIALIZE DNA INTO RESULT_DS;", output_gtf = FALSE)
105
+#  
106
+
107
+## ---- run_from_file, eval = FALSE---------------------------------------------
108
+#  query_path <- system.file("example", "query1.txt", package = "RGMQL")
109
+#  job <- run_query_fromfile(test_url, query_path, output_gtf = FALSE)
110
+
111
+## ---- trace, eval = FALSE-----------------------------------------------------
112
+#  job_id <- job$id
113
+#  trace_job(test_url, job_id)
114
+
115
+## ---- download, eval = FALSE--------------------------------------------------
116
+#  name_dataset <- job$datasets[[1]]$name
117
+#  download_dataset(test_url, name_dataset)
118
+#  
119
+
120
+## ---- download_as_GRangesList, eval=FALSE-------------------------------------
121
+#  name_dataset <- job$datasets[[1]]$name
122
+#  grl = download_as_GRangesList(test_url, name_dataset)
123
+
124
+## ---- logout------------------------------------------------------------------
125
+logout_gmql(test_url)
126
+
127
+## ---- login remote, eval = FALSE----------------------------------------------
128
+#  test_url = "http://www.gmql.eu/gmql-rest"
129
+#  login_gmql(test_url)
130
+
131
+## ---- initialize remote-------------------------------------------------------
132
+init_gmql(url = test_url)
133
+
134
+## ---- change processing mode--------------------------------------------------
135
+remote_processing(TRUE)
136
+
137
+## ---- init remote processing--------------------------------------------------
138
+init_gmql(url = test_url, remote_processing = TRUE)
139
+
140
+## ---- remote query------------------------------------------------------------
141
+
142
+## Read the remote dataset HG19_TCGA_dnaseq
143
+## Read the remote dataset HG19_BED_ANNOTATION
144
+
145
+TCGA_dnaseq <- read_gmql("public.HG19_TCGA_dnaseq", is_local = FALSE)
146
+HG19_bed_ann <- read_gmql("public.HG19_BED_ANNOTATION", is_local = FALSE)
147
+
148
+# Filter out mut_ds based on a metadata predicate to keep breast cancer 
149
+# mutations only
150
+
151
+mut = filter(TCGA_dnaseq, manually_curated__dataType == 'dnaseq' & 
152
+                clinical_patient__tumor_tissue_site == 'breast')
153
+
154
+# Filter out exon_ds based on a metadata predicate to keep Refseq exons only 
155
+
156
+exon = filter(HG19_bed_ann, annotation_type == 'exons' & 
157
+                    original_provider == 'RefSeq')
158
+
159
+# For each mutation sample, map the mutations to the exon regions using 
160
+# the map() function and count mutations within each exon storing the value
161
+# in the default region attribute 'count_left_right'
162
+
163
+exon1 <- map(exon, mut)
164
+
165
+# Remove exons in each sample that do not contain mutations
166
+
167
+exon2 <- filter(exon1, r_predicate = count_left_right >= 1)
168
+
169
+# Using the extend() function, count how many exons remain in each sample and
170
+# store the result in the sample metadata as a new attribute-value pair, 
171
+# with exon_count as attribute name 
172
+
173
+exon3 <- extend(exon2, exon_count = COUNT())
174
+
175
+# Order samples in descending order of the added metadata exon_count 
176
+
177
+exon_res = arrange(exon3, list(DESC("exon_count")))
178
+
179
+## ---- remote materialize, eval = FALSE----------------------------------------
180
+#  collect(exon_res, name="exon_res_folder")
181
+
182
+## ---- remote execute, eval = FALSE--------------------------------------------
183
+#  job<-execute()
184
+
185
+## ---- download_2, eval = FALSE------------------------------------------------
186
+#  name_dataset <- job$datasets[[1]]$name
187
+#  download_dataset(test_url, name_dataset)
188
+
189
+## ---- download_as_GRangesList_2, eval=FALSE-----------------------------------
190
+#  name_dataset <- job$datasets[[1]]$name
191
+#  grl = download_as_GRangesList(test_url, name_dataset)
192
+
193
+## ---- logout_2, eval=FALSE----------------------------------------------------
194
+#  logout_gmql(test_url)
195
+
196
+## ---- switch mode-------------------------------------------------------------
197
+test_url = "http://www.gmql.eu/gmql-rest"
198
+init_gmql(url = test_url)
199
+remote_processing(TRUE)
200
+
201
+## ---- mixed query-------------------------------------------------------------
202
+
203
+
204
+# This statement defines the path to the folder "MUT" in the subdirectory 
205
+# "example" of the package "RGMQL"
206
+
207
+mut_path <- system.file("example", "MUT", package = "RGMQL")
208
+
209
+# Read MUT folder as a GMQL dataset named "mut_ds" 
210
+
211
+mut_ds <- read_gmql(mut_path, is_local = TRUE)
212
+
213
+# Read the remote dataset HG19_BED_ANNOTATION
214
+
215
+HG19_bed_ann <- read_gmql("public.HG19_BED_ANNOTATION", is_local = FALSE)
216
+
217
+# Filter out mut_ds based on a metadata predicate to keep breast cancer 
218
+# mutations only
219
+
220
+mut = filter(mut_ds, manually_curated__dataType == 'dnaseq' & 
221
+                clinical_patient__tumor_tissue_site == 'breast')
222
+
223
+# Filter out exon_ds based on a metadata predicate to keep Refseq exons only 
224
+
225
+exon = filter(HG19_bed_ann, annotation_type == 'exons' & 
226
+                    original_provider == 'RefSeq')
227
+
228
+# For each mutation sample, map the mutations to the exon regions using 
229
+# the map() function and count mutations within each exon storing the value
230
+# in the default region attribute 'count_left_right'
231
+
232
+exon1 <- map(exon, mut)
233
+
234
+# Remove exons in each sample that do not contain mutations
235
+
236
+exon2 <- filter(exon1, r_predicate = count_left_right >= 1)
237
+
238
+# Using the extend() function, count how many exons remain in each sample and
239
+# store the result in the sample metadata as a new attribute-value pair, 
240
+# with exon_count as attribute name 
241
+
242
+exon3 <- extend(exon2, exon_count = COUNT())
243
+
244
+# Order samples in descending order of the added metadata exon_count 
245
+
246
+exon_res = arrange(exon3, list(DESC("exon_count")))
247
+
248
+
249
+## ---- mixed materialize, eval = FALSE-----------------------------------------
250
+#  collect(exon_res,"exon_result_dataset")
251
+
252
+## ---- mixed execute, eval = FALSE---------------------------------------------
253
+#  job<-execute()
254
+
255
+## ---- import------------------------------------------------------------------
256
+# This statement defines the path to the folder "EXON" in the subdirectory 
257
+# "example" of the package "RGMQL"
258
+
259
+dataset_path <- system.file("example", "EXON", package = "RGMQL")
260
+
261
+# Import the GMQL dataset EXON as GRangesList
262
+
263
+imported_data <- import_gmql(dataset_path, is_gtf = FALSE)
264
+imported_data
265
+
266
+# and its metadata
267
+
268
+imported_data@metadata
269
+
270
+
271
+## ---- export------------------------------------------------------------------
272
+# This statement defines the path to the subdirectory "example" of the 
273
+# package "RGMQL"
274
+
275
+dir_out <- system.file("example", package = "RGMQL")
276
+
277
+# Export the GRangesList 'imported_data' as GMQL dataset called 'example' 
278
+# at destination path
279
+
280
+export_gmql(imported_data, dir_out, is_gtf = TRUE)
281
+
282
+## ---- filter_extract----------------------------------------------------------
283
+# This statement defines the path to the folder "TEAD" in the subdirectory 
284
+# "example" of the package "RGMQL"
285
+
286
+data_in <- system.file("example", "TCGA-ACC", package = "RGMQL")
287
+
288
+matrix <- filter_and_extract(data_in, metadata = NULL, region_attributes = FULL(except = c('fpkm_uq','fpkm')))
289
+matrix
290
+
291
+
292
+## ---- metadata----------------------------------------------------------------
293
+# This statement defines the path to the folder "DATASET_META" in the 
294
+# subdirectory "example" of the package "RGMQL"
295
+
296
+dataset_path <- system.file("example", "DATASET_META", package = "RGMQL")
297
+
298
+# Import the GMQL dataset DATASET_META as GRangesList
299
+
300
+grl_data <- import_gmql(dataset_path, is_gtf = FALSE)
301
+grl_data
302
+
303
+# and its metadata
304
+
305
+grl_data@metadata
306
+
307
+
308
+## ---- retrieve_value----------------------------------------------------------
309
+
310
+# store metadata on variable a
311
+
312
+a = grl_data@metadata
313
+
314
+# get disease value of sample S_00000
315
+
316
+a$S_00000$disease
317
+
318
+
319
+## ---- retrieve_values---------------------------------------------------------
320
+
321
+# get all disease values of sample S_00000
322
+
323
+a$S_00000[which(names(a$S_00000) %in% "disease")]
324
+
325
+
Browse code

minor fix

Simone authored on 18/04/2018 07:11:16
Showing1 changed files
1 1
deleted file mode 100644
... ...
@@ -1,9 +0,0 @@
1
-## ---- initialization-----------------------------------------------------
2
-library('RGMQL')
3
-
4
-## ---- initialization_RGMQLlib--------------------------------------------
5
-library('RGMQLlib')
6
-
7
-## ---- init---------------------------------------------------------------
8
-init_gmql()
9
-
Browse code

ipdated read_gmql

Simone authored on 15/04/2018 11:23:41
Showing1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,9 @@
1
+## ---- initialization-----------------------------------------------------
2
+library('RGMQL')
3
+
4
+## ---- initialization_RGMQLlib--------------------------------------------
5
+library('RGMQLlib')
6
+
7
+## ---- init---------------------------------------------------------------
8
+init_gmql()
9
+