58 | 59 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,136 @@ |
1 |
+#' Show all dataset metadata |
|
2 |
+#' |
|
3 |
+#' It creates a Data-frame from remote or local dataset containing |
|
4 |
+#' |
|
5 |
+#' @param dataset string with GMQL dataset folder path or remote dataset. |
|
6 |
+#' In case of remote dataset to distinguish among private or public repository |
|
7 |
+#' each name must be prefixed with "private." or "public." respectively. |
|
8 |
+#' @param show_value whether or not show the value associated to metadata, |
|
9 |
+#' otherwise only logical value (TRUE or FALSE) are shown. |
|
10 |
+#' |
|
11 |
+#' @return Dataframe |
|
12 |
+#' |
|
13 |
+#' @seealso \code{\link{show_all_metadata}} |
|
14 |
+#' |
|
15 |
+#' @examples |
|
16 |
+#' |
|
17 |
+#' ## This statement defines the path to the sub-directory "example" of the |
|
18 |
+#' ## package "RGMQL" and show all the metadata inside the GMQL dataset among |
|
19 |
+#' ## all the meta files and return a data-frame, viewing as logical value |
|
20 |
+#' ## representing its presence or not for each file. |
|
21 |
+#' |
|
22 |
+#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
|
23 |
+#' show_all_metadata(test_path) |
|
24 |
+#' |
|
25 |
+#' ## This statement defines the path to the sub-directory "example" of the |
|
26 |
+#' ## package "RGMQL" and show all the metadata inside the GMQL dataset among |
|
27 |
+#' ## all the meta files and return a data-frame, viewing also its value. |
|
28 |
+#' |
|
29 |
+#' test_path <- system.file("example", "DATASET", package = "RGMQL") |
|
30 |
+#' show_all_metadata(test_path, show_value = T) |
|
31 |
+#' |
|
32 |
+#' ## This statement the remote dataset called Example_Dataset_1 on public |
|
33 |
+#' repository and show all the metadata inside the GMQL dataset among |
|
34 |
+#' ## all the meta files and return a data-frame, viewing also its value. |
|
35 |
+#' |
|
36 |
+#' show_all_metadata("public.Example_Dataset_1", show_value = T) |
|
37 |
+#' |
|
38 |
+#' @export |
|
39 |
+#' |
|
40 |
+show_all_metadata <- function(dataset, show_value = FALSE) { |
|
41 |
+ isRemote <- startsWith(dataset, c("public.", "private.")) |
|
42 |
+ |
|
43 |
+ if(isRemote[2] && isRemote[1] == FALSE) { |
|
44 |
+ dataset <- gsub("private.","",dataset) |
|
45 |
+ } |
|
46 |
+ |
|
47 |
+ # since it returns an array we perform the OR condition on the response |
|
48 |
+ # if at least one is TRUE means that is remote |
|
49 |
+ if(isRemote[1] | isRemote[2]) { |
|
50 |
+ .show_all_metadata_remote_dataset(dataset, show_value) |
|
51 |
+ } else { |
|
52 |
+ .show_all_metadata_downloaded_dataset(dataset, show_value) |
|
53 |
+ } |
|
54 |
+} |
|
55 |
+ |
|
56 |
+.show_all_metadata_remote_dataset <- function(dataset, show_value) { |
|
57 |
+ url <- GMQL_credentials$remote_url |
|
58 |
+ |
|
59 |
+ #first we download all the region file name and its ID |
|
60 |
+ region_list <- show_samples_list(url, dataset) |
|
61 |
+ |
|
62 |
+ metadata_list <-lapply(region_list$samples, function(x) { |
|
63 |
+ sample_metadata(url, dataset, x$name) |
|
64 |
+ }) |
|
65 |
+ |
|
66 |
+ name_samples <- sapply(region_list$samples, function(x) { |
|
67 |
+ x$name |
|
68 |
+ }) |
|
69 |
+ |
|
70 |
+ .create_dataFrame(metadata_list, name_samples, show_value) |
|
71 |
+} |
|
72 |
+ |
|
73 |
+.show_all_metadata_downloaded_dataset <- function(dataset) { |
|
74 |
+ datasetName <- sub("/*[/]$","",dataset) |
|
75 |
+ if(basename(datasetName) !="files") |
|
76 |
+ datasetName <- file.path(datasetName,"files") |
|
77 |
+ |
|
78 |
+ if(!dir.exists(datasetName)) |
|
79 |
+ stop("Directory does not exists") |
|
80 |
+ |
|
81 |
+ if(!length(list.files(datasetName))) |
|
82 |
+ stop("no samples present in this dataset") |
|
83 |
+ |
|
84 |
+ regions <- list.files( |
|
85 |
+ datasetName, |
|
86 |
+ pattern = "*.gtf$|*.gdm$", |
|
87 |
+ full.names = TRUE |
|
88 |
+ ) |
|
89 |
+ |
|
90 |
+ if(length(regions)) { |
|
91 |
+ name_samples <- gsub("*.gtf$|*.gdm$", "", basename(regions)) |
|
92 |
+ } else |
|
93 |
+ stop("No regions files present") |
|
94 |
+ |
|
95 |
+ meta <- list.files( |
|
96 |
+ datasetName, |
|
97 |
+ pattern = "*.gtf.meta$|*.gdm.meta$", |
|
98 |
+ full.names = TRUE |
|
99 |
+ ) |
|
100 |
+ |
|
101 |
+ if(length(meta)) { |
|
102 |
+ meta_list <- lapply(meta, .add_metadata) |
|
103 |
+ |
|
104 |
+ } else |
|
105 |
+ stop("No meta files present") |
|
106 |
+ |
|
107 |
+ .create_dataFrame(meta_list, name_samples, show_value) |
|
108 |
+} |
|
109 |
+ |
|
110 |
+.create_dataFrame <- function(meta_list, name_samples, show_value) { |
|
111 |
+ names(meta_list) <- name_samples |
|
112 |
+ |
|
113 |
+ set_meta <- unique( |
|
114 |
+ unlist( |
|
115 |
+ sapply(meta_list, names) |
|
116 |
+ ) |
|
117 |
+ ) |
|
118 |
+ |
|
119 |
+ complete_list <- mapply(function(x, y){ |
|
120 |
+ # get missing keys |
|
121 |
+ missing <- set_meta[!(set_meta %in% names(meta_list[[y]]))] |
|
122 |
+ list <- meta_list[[y]] |
|
123 |
+ # fill list with missing keys |
|
124 |
+ list[missing] <- NA |
|
125 |
+ list <- list[set_meta] |
|
126 |
+ },meta_list, names(meta_list)) |
|
127 |
+ row.names(complete_list) <- set_meta |
|
128 |
+ data_frame <- data.frame(complete_list) |
|
129 |
+ |
|
130 |
+ # show logical data frame |
|
131 |
+ if(!show_value) { |
|
132 |
+ data_frame <- as.data.frame(!is.na(data_frame)) |
|
133 |
+ } |
|
134 |
+ |
|
135 |
+ return(data_frame) |
|
136 |
+} |
0 | 137 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,31 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/show_all_metadata.R |
|
3 |
+\name{show_all_metadata} |
|
4 |
+\alias{show_all_metadata} |
|
5 |
+\title{Create a metadata logical map} |
|
6 |
+\usage{ |
|
7 |
+show_all_metadata(dataset_path) |
|
8 |
+} |
|
9 |
+\arguments{ |
|
10 |
+\item{dataset_path}{string with GMQL dataset folder path} |
|
11 |
+} |
|
12 |
+\value{ |
|
13 |
+Dataframe |
|
14 |
+} |
|
15 |
+\description{ |
|
16 |
+It creates a GRangesList from GMQL samples in dataset. |
|
17 |
+It reads sample files in GTF or GDM/tab-delimited format. |
|
18 |
+} |
|
19 |
+\examples{ |
|
20 |
+ |
|
21 |
+## This statement defines the path to the subdirectory "example" of the |
|
22 |
+## package "RGMQL" and imports as GRangesList the contained GMQL dataset |
|
23 |
+ |
|
24 |
+test_path <- system.file("example", "DATASET", package = "RGMQL") |
|
25 |
+grl = import_gmql(test_path, TRUE) |
|
26 |
+ |
|
27 |
+ |
|
28 |
+} |
|
29 |
+\seealso{ |
|
30 |
+\code{\link{show_all_metadata}} |
|
31 |
+} |