Browse code

added logic to handle remote dataset, change name

Simone authored on 13/05/2021 14:35:27
Showing 4 changed files

... ...
@@ -82,4 +82,5 @@ Collate:
82 82
     'gmql_union.R'
83 83
     'onLoad.R'
84 84
     'ordering-functions.R'
85
+    'show_all_metadata.R'
85 86
     'web-services.R'
... ...
@@ -52,6 +52,7 @@ export(sample_region)
52 52
 export(save_query)
53 53
 export(save_query_fromfile)
54 54
 export(semijoin)
55
+export(show_all_metadata)
55 56
 export(show_datasets_list)
56 57
 export(show_job_log)
57 58
 export(show_jobs_list)
58 59
new file mode 100644
... ...
@@ -0,0 +1,136 @@
1
+#' Show all dataset metadata
2
+#'
3
+#' It creates a Data-frame from remote or local dataset containing
4
+#'
5
+#' @param dataset string with GMQL dataset folder path or remote dataset.
6
+#' In case of remote dataset to distinguish among private or public repository
7
+#' each name must be prefixed with "private." or "public." respectively.
8
+#' @param show_value whether or not show the value associated to metadata,
9
+#' otherwise only logical value (TRUE or FALSE) are shown.
10
+#' 
11
+#' @return Dataframe
12
+#'
13
+#' @seealso \code{\link{show_all_metadata}}
14
+#'
15
+#' @examples
16
+#' 
17
+#' ## This statement defines the path to the sub-directory "example" of the 
18
+#' ## package "RGMQL" and show all the metadata inside the GMQL dataset among
19
+#' ## all the meta files and return a data-frame, viewing as logical value 
20
+#' ## representing its presence or not for each file.
21
+#' 
22
+#' test_path <- system.file("example", "DATASET", package = "RGMQL")
23
+#' show_all_metadata(test_path)
24
+#' 
25
+#' ## This statement defines the path to the sub-directory "example" of the 
26
+#' ## package "RGMQL" and show all the metadata inside the GMQL dataset among
27
+#' ## all the meta files and return a data-frame, viewing also its value.
28
+#' 
29
+#' test_path <- system.file("example", "DATASET", package = "RGMQL")
30
+#' show_all_metadata(test_path, show_value = T)
31
+#' 
32
+#' ## This statement the remote dataset called Example_Dataset_1 on public
33
+#' repository and show all the metadata inside the GMQL dataset among
34
+#' ## all the meta files and return a data-frame, viewing also its value.
35
+#' 
36
+#' show_all_metadata("public.Example_Dataset_1", show_value = T)
37
+#'
38
+#' @export
39
+#' 
40
+show_all_metadata <- function(dataset, show_value = FALSE) {
41
+  isRemote <- startsWith(dataset, c("public.", "private."))
42
+
43
+  if(isRemote[2] && isRemote[1] == FALSE) {
44
+    dataset <- gsub("private.","",dataset)
45
+  }
46
+  
47
+  # since it returns an array we perform the OR condition on the response
48
+  # if at least one is TRUE means that is remote
49
+  if(isRemote[1] | isRemote[2]) {
50
+    .show_all_metadata_remote_dataset(dataset, show_value)
51
+  } else {
52
+    .show_all_metadata_downloaded_dataset(dataset, show_value)
53
+  }
54
+}
55
+
56
+.show_all_metadata_remote_dataset <- function(dataset, show_value) {
57
+  url <- GMQL_credentials$remote_url
58
+
59
+  #first we download all the region file name and its ID
60
+  region_list <- show_samples_list(url, dataset)
61
+  
62
+  metadata_list <-lapply(region_list$samples, function(x) {
63
+    sample_metadata(url, dataset, x$name)
64
+  })  
65
+  
66
+  name_samples <- sapply(region_list$samples, function(x) {
67
+    x$name
68
+  })  
69
+  
70
+  .create_dataFrame(metadata_list, name_samples, show_value)
71
+}
72
+
73
+.show_all_metadata_downloaded_dataset <- function(dataset) {
74
+  datasetName <- sub("/*[/]$","",dataset)
75
+  if(basename(datasetName) !="files")
76
+    datasetName <- file.path(datasetName,"files")
77
+  
78
+  if(!dir.exists(datasetName))
79
+    stop("Directory does not exists")
80
+  
81
+  if(!length(list.files(datasetName)))
82
+    stop("no samples present in this dataset")
83
+  
84
+  regions <- list.files(
85
+    datasetName,
86
+    pattern = "*.gtf$|*.gdm$",
87
+    full.names = TRUE
88
+  )
89
+  
90
+  if(length(regions)) {
91
+    name_samples <- gsub("*.gtf$|*.gdm$", "", basename(regions))
92
+  } else
93
+    stop("No regions files present")
94
+  
95
+  meta <- list.files(
96
+    datasetName, 
97
+    pattern = "*.gtf.meta$|*.gdm.meta$",
98
+    full.names = TRUE
99
+  )
100
+  
101
+  if(length(meta)) {
102
+    meta_list <- lapply(meta, .add_metadata)
103
+    
104
+  } else
105
+    stop("No meta files present")
106
+  
107
+  .create_dataFrame(meta_list, name_samples, show_value)
108
+}
109
+
110
+.create_dataFrame <- function(meta_list, name_samples, show_value) {
111
+  names(meta_list) <- name_samples
112
+  
113
+  set_meta <- unique(
114
+    unlist(
115
+      sapply(meta_list, names)
116
+    )
117
+  )
118
+  
119
+  complete_list <- mapply(function(x, y){
120
+    # get missing keys
121
+    missing <- set_meta[!(set_meta %in% names(meta_list[[y]]))]
122
+    list <- meta_list[[y]]
123
+    # fill list with missing keys
124
+    list[missing] <- NA
125
+    list <- list[set_meta]
126
+  },meta_list, names(meta_list))
127
+  row.names(complete_list) <- set_meta
128
+  data_frame <- data.frame(complete_list)
129
+  
130
+  # show logical data frame
131
+  if(!show_value) {
132
+    data_frame <- as.data.frame(!is.na(data_frame))
133
+  }
134
+  
135
+  return(data_frame)
136
+}
0 137
new file mode 100644
... ...
@@ -0,0 +1,31 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/show_all_metadata.R
3
+\name{show_all_metadata}
4
+\alias{show_all_metadata}
5
+\title{Create a metadata logical map}
6
+\usage{
7
+show_all_metadata(dataset_path)
8
+}
9
+\arguments{
10
+\item{dataset_path}{string with GMQL dataset folder path}
11
+}
12
+\value{
13
+Dataframe
14
+}
15
+\description{
16
+It creates a GRangesList from GMQL samples in dataset. 
17
+It reads sample files in GTF or GDM/tab-delimited format.
18
+}
19
+\examples{
20
+
21
+## This statement defines the path to the subdirectory "example" of the 
22
+## package "RGMQL" and imports as GRangesList the contained GMQL dataset
23
+
24
+test_path <- system.file("example", "DATASET", package = "RGMQL")
25
+grl = import_gmql(test_path, TRUE)
26
+
27
+
28
+}
29
+\seealso{
30
+\code{\link{show_all_metadata}}
31
+}