Simone authored on 25/05/2021 07:06:38
Showing1 changed files

1 1
deleted file mode 100644
... ...
@@ -1,331 +0,0 @@
1
-## ---- include=FALSE-----------------------------------------------------------
2
-options(tinytex.verbose = TRUE)
3
-
4
-
5
-## ---- initialization----------------------------------------------------------
6
-library('RGMQL')
7
-
8
-## ---- initialization_RGMQLlib-------------------------------------------------
9
-library('RGMQLlib')
10
-
11
-## ---- init--------------------------------------------------------------------
12
-init_gmql()
13
-
14
-## ---- read GMQL dataset-------------------------------------------------------
15
-gmql_dataset_path <- system.file("example", "EXON", package = "RGMQL")
16
-data_out = read_gmql(gmql_dataset_path)
17
-
18
-## ---- read GRangesList--------------------------------------------------------
19
-library("GenomicRanges")
20
-
21
-# Granges Object with one region: chr2 and two metadata columns: score = 5 
22
-# and GC  = 0.45
23
-
24
-gr1 <- GRanges(seqnames = "chr2",
25
-    ranges = IRanges(103, 106), strand = "+", score = 5, GC = 0.45)
26
-
27
-# Granges Object with two regions both chr1 and two metadata columns: score = 3
28
-# for the fist region and score = 4 for the second one, GC  = 0.3 and 0.5 
29
-# for the first and second region, respectively
30
-
31
-gr2 <- GRanges(seqnames = c("chr1", "chr1"),
32
-    ranges = IRanges(c(107, 113), width = 3), strand = c("+", "-"),
33
-    score = 3:4, GC = c(0.3, 0.5))
34
-
35
-grl <- GRangesList("txA" = gr1, "txB" = gr2)
36
-data_out <- read_GRangesList(grl)
37
-
38
-## ---- query-------------------------------------------------------------------
39
-
40
-# These statements define the paths to the folders "EXON" and "MUT" in the 
41
-# subdirectory "example" of the package "RGMQL"
42
-
43
-exon_path <- system.file("example", "EXON", package = "RGMQL")
44
-mut_path <- system.file("example", "MUT", package = "RGMQL")
45
-
46
-# Read EXON folder as a GMQL dataset named "exon_ds" containing a single 
47
-# sample with exon regions, and MUT folder as a GMQL dataset named "mut_ds" 
48
-
49
-exon_ds <- read_gmql(exon_path)
50
-mut_ds <- read_gmql(mut_path)
51
-
52
-# Filter out mut_ds based on a metadata predicate to keep breast cancer 
53
-# mutations only
54
-
55
-mut = filter(mut_ds, manually_curated__dataType == 'dnaseq' & 
56
-                clinical_patient__tumor_tissue_site == 'breast')
57
-
58
-# Filter out exon_ds based on a metadata predicate to keep Refseq exons only
59
-
60
-exon = filter(exon_ds, annotation_type == 'exons' & 
61
-                    original_provider == 'RefSeq')
62
-
63
-# For each mutation sample, map the mutations to the exon regions using 
64
-# the map() function and count mutations within each exon storing the value
65
-# in the default region attribute 'count_left_right'
66
-
67
-exon1 <- map(exon, mut)
68
-
69
-# Remove exons in each sample that do not contain mutations
70
-
71
-exon2 <- filter(exon1, r_predicate = count_left_right >= 1)
72
-
73
-# Using the extend() function, count how many exons remain in each sample and
74
-# store the result in the sample metadata as a new attribute-value pair, 
75
-# with exon_count as attribute name 
76
-
77
-exon3 <- extend(exon2, exon_count = COUNT())
78
-
79
-# Order samples in descending order of the added metadata exon_count 
80
-
81
-exon_res = arrange(exon3, list(DESC("exon_count")))
82
-
83
-## ---- materialize-------------------------------------------------------------
84
-# Materialize the result dataset on disk
85
-collect(exon_res)
86
-
87
-## ---- materializeElsewhere----------------------------------------------------
88
-# Materialize the result dataset into a specific folder on disk
89
-collect(exon_res, dir_out = "./WD_R", name = "dataset") #, 
90
-
91
-## ---- execute, eval = FALSE---------------------------------------------------
92
-#  execute()
93
-
94
-## ---- take,eval=FALSE---------------------------------------------------------
95
-#  g <- take(exon_res, rows = 45)
96
-
97
-## ---- init with guest login---------------------------------------------------
98
-test_url = "http://www.gmql.eu/gmql-rest"
99
-login_gmql(test_url)
100
-
101
-## ---- init with login---------------------------------------------------------
102
-test_url = "http://www.gmql.eu/gmql-rest"
103
-login_gmql(test_url, username = 'myname', password = 'mypassword')
104
-
105
-## ---- run, eval = FALSE-------------------------------------------------------
106
-#  
107
-#  job <- run_query(test_url, "query_1", "DNA = SELECT() Example_Dataset_1;
108
-#  MATERIALIZE DNA INTO RESULT_DS;", output_gtf = FALSE)
109
-#  
110
-
111
-## ---- run_from_file, eval = FALSE---------------------------------------------
112
-#  query_path <- system.file("example", "query1.txt", package = "RGMQL")
113
-#  job <- run_query_fromfile(test_url, query_path, output_gtf = FALSE)
114
-
115
-## ---- trace, eval = FALSE-----------------------------------------------------
116
-#  job_id <- job$id
117
-#  trace_job(test_url, job_id)
118
-
119
-## ---- download, eval = FALSE--------------------------------------------------
120
-#  name_dataset <- job$datasets[[1]]$name
121
-#  download_dataset(test_url, name_dataset)
122
-#  
123
-
124
-## ---- download_as_GRangesList, eval=FALSE-------------------------------------
125
-#  name_dataset <- job$datasets[[1]]$name
126
-#  grl = download_as_GRangesList(test_url, name_dataset)
127
-
128
-## ---- logout------------------------------------------------------------------
129
-logout_gmql(test_url)
130
-
131
-## ---- login remote, eval = FALSE----------------------------------------------
132
-#  test_url = "http://www.gmql.eu/gmql-rest"
133
-#  login_gmql(test_url)
134
-
135
-## ---- initialize remote-------------------------------------------------------
136
-init_gmql(url = test_url)
137
-
138
-## ---- change processing mode--------------------------------------------------
139
-remote_processing(TRUE)
140
-
141
-## ---- init remote processing--------------------------------------------------
142
-init_gmql(url = test_url, remote_processing = TRUE)
143
-
144
-## ---- remote query------------------------------------------------------------
145
-
146
-## Read the remote dataset HG19_TCGA_dnaseq
147
-## Read the remote dataset HG19_BED_ANNOTATION
148
-
149
-TCGA_dnaseq <- read_gmql("public.HG19_TCGA_dnaseq", is_local = FALSE)
150
-HG19_bed_ann <- read_gmql("public.HG19_BED_ANNOTATION", is_local = FALSE)
151
-
152
-# Filter out mut_ds based on a metadata predicate to keep breast cancer 
153
-# mutations only
154
-
155
-mut = filter(TCGA_dnaseq, manually_curated__dataType == 'dnaseq' & 
156
-                clinical_patient__tumor_tissue_site == 'breast')
157
-
158
-# Filter out exon_ds based on a metadata predicate to keep Refseq exons only 
159
-
160
-exon = filter(HG19_bed_ann, annotation_type == 'exons' & 
161
-                    original_provider == 'RefSeq')
162
-
163
-# For each mutation sample, map the mutations to the exon regions using 
164
-# the map() function and count mutations within each exon storing the value
165
-# in the default region attribute 'count_left_right'
166
-
167
-exon1 <- map(exon, mut)
168
-
169
-# Remove exons in each sample that do not contain mutations
170
-
171
-exon2 <- filter(exon1, r_predicate = count_left_right >= 1)
172
-
173
-# Using the extend() function, count how many exons remain in each sample and
174
-# store the result in the sample metadata as a new attribute-value pair, 
175
-# with exon_count as attribute name 
176
-
177
-exon3 <- extend(exon2, exon_count = COUNT())
178
-
179
-# Order samples in descending order of the added metadata exon_count 
180
-
181
-exon_res = arrange(exon3, list(DESC("exon_count")))
182
-
183
-## ---- remote materialize, eval = FALSE----------------------------------------
184
-#  collect(exon_res, name="exon_res_data")
185
-
186
-## ---- remote execute, eval = FALSE--------------------------------------------
187
-#  job<-execute()
188
-
189
-## ---- download_2, eval = FALSE------------------------------------------------
190
-#  name_dataset <- job$datasets[[1]]$name
191
-#  download_dataset(test_url, name_dataset)
192
-
193
-## ---- download_as_GRangesList_2, eval=FALSE-----------------------------------
194
-#  name_dataset <- job$datasets[[1]]$name
195
-#  grl = download_as_GRangesList(test_url, name_dataset)
196
-
197
-## ---- logout_2, eval=FALSE----------------------------------------------------
198
-#  logout_gmql(test_url)
199
-
200
-## ---- switch mode-------------------------------------------------------------
201
-test_url = "http://www.gmql.eu/gmql-rest"
202
-init_gmql(url = test_url)
203
-remote_processing(TRUE)
204
-
205
-## ---- mixed query-------------------------------------------------------------
206
-
207
-
208
-# This statement defines the path to the folder "MUT" in the subdirectory 
209
-# "example" of the package "RGMQL"
210
-
211
-mut_path <- system.file("example", "MUT", package = "RGMQL")
212
-
213
-# Read MUT folder as a GMQL dataset named "mut_ds" 
214
-
215
-mut_ds <- read_gmql(mut_path, is_local = TRUE)
216
-
217
-# Read the remote dataset HG19_BED_ANNOTATION
218
-
219
-HG19_bed_ann <- read_gmql("public.HG19_BED_ANNOTATION", is_local = FALSE)
220
-
221
-# Filter out mut_ds based on a metadata predicate to keep breast cancer 
222
-# mutations only
223
-
224
-mut = filter(mut_ds, manually_curated__dataType == 'dnaseq' & 
225
-                clinical_patient__tumor_tissue_site == 'breast')
226
-
227
-# Filter out exon_ds based on a metadata predicate to keep Refseq exons only 
228
-
229
-exon = filter(HG19_bed_ann, annotation_type == 'exons' & 
230
-                    original_provider == 'RefSeq')
231
-
232
-# For each mutation sample, map the mutations to the exon regions using 
233
-# the map() function and count mutations within each exon storing the value
234
-# in the default region attribute 'count_left_right'
235
-
236
-exon1 <- map(exon, mut)
237
-
238
-# Remove exons in each sample that do not contain mutations
239
-
240
-exon2 <- filter(exon1, r_predicate = count_left_right >= 1)
241
-
242
-# Using the extend() function, count how many exons remain in each sample and
243
-# store the result in the sample metadata as a new attribute-value pair, 
244
-# with exon_count as attribute name 
245
-
246
-exon3 <- extend(exon2, exon_count = COUNT())
247
-
248
-# Order samples in descending order of the added metadata exon_count 
249
-
250
-exon_res = arrange(exon3, list(DESC("exon_count")))
251
-
252
-
253
-## ---- mixed materialize, eval = FALSE-----------------------------------------
254
-#  collect(exon_res,"exon_result_dataset")
255
-
256
-## ---- mixed execute, eval = FALSE---------------------------------------------
257
-#  job<-execute()
258
-
259
-## ---- import------------------------------------------------------------------
260
-# This statement defines the path to the folder "EXON" in the subdirectory 
261
-# "example" of the package "RGMQL"
262
-
263
-dataset_path <- system.file("example", "EXON", package = "RGMQL")
264
-
265
-# Import the GMQL dataset EXON as GRangesList
266
-
267
-imported_data <- import_gmql(dataset_path, is_gtf = FALSE)
268
-imported_data
269
-
270
-# and its metadata
271
-
272
-imported_data@metadata
273
-
274
-
275
-## ---- export------------------------------------------------------------------
276
-# This statement defines the path to the subdirectory "exp" of the 
277
-# package "RGMQL"
278
-
279
-dir_out <- paste(system.file("example", package = "RGMQL"), 'exp', sep='/')
280
-
281
-# Export the GRangesList 'imported_data' as GMQL dataset called 'example' 
282
-# at destination path
283
-
284
-export_gmql(imported_data, dir_out, is_gtf = TRUE)
285
-
286
-## ---- filter_extract----------------------------------------------------------
287
-# This statement defines the path to the folder "TCGA-ACC" in the subdirectory 
288
-# "example" of the package "RGMQL"
289
-
290
-data_in <- system.file("example", "TCGA-ACC", package = "RGMQL")
291
-
292
-matrix <- filter_and_extract(data_in, metadata= NULL,
293
-                             region_attributes = 
294
-                               FULL(except = c('fpkm_uq','fpkm')))
295
-matrix
296
-
297
-
298
-## ---- metadata----------------------------------------------------------------
299
-# This statement defines the path to the folder "DATASET_META" in the 
300
-# subdirectory "example" of the package "RGMQL"
301
-
302
-dataset_path <- system.file("example", "DATASET_META", package = "RGMQL")
303
-
304
-# Import the GMQL dataset DATASET_META as GRangesList
305
-
306
-grl_data <- import_gmql(dataset_path, is_gtf = FALSE)
307
-grl_data
308
-
309
-# and its metadata
310
-
311
-grl_data@metadata
312
-
313
-
314
-## ---- retrieve_value----------------------------------------------------------
315
-
316
-# store metadata on variable a
317
-
318
-a = grl_data@metadata
319
-
320
-# get disease value of sample S_00000
321
-
322
-a$S_00000$disease
323
-
324
-
325
-## ---- retrieve_values---------------------------------------------------------
326
-
327
-# get all disease values of sample S_00000
328
-
329
-a$S_00000[which(names(a$S_00000) %in% "disease")]
330
-
331
-