Browse code

[FIX] Fixed code for tidyselect warnings, minor adjustments

Giulia Pais authored on 30/11/2022 16:11:17
Showing 1 changed files
... ...
@@ -172,8 +172,10 @@
172 172
         dplyr::left_join(.types_mapping(), by = "types")
173 173
     if (mode == "fread") {
174 174
         specs_mappings <- specs_mappings %>%
175
-            dplyr::select(.data$names, .data$fread) %>%
176
-            dplyr::group_by(.data$fread)
175
+            dplyr::select(
176
+                dplyr::all_of(c("names", "fread"))
177
+            ) %>%
178
+            dplyr::group_by(dplyr::across(dplyr::all_of("fread")))
177 179
         types <- specs_mappings %>%
178 180
             dplyr::group_keys() %>%
179 181
             dplyr::pull(.data$fread)
... ...
@@ -195,8 +197,8 @@
195 197
         dplyr::left_join(.types_mapping(), by = "types")
196 198
     if (mode == "fread") {
197 199
         specs_mappings <- specs_mappings %>%
198
-            dplyr::select(.data$names, .data$fread) %>%
199
-            dplyr::group_by(.data$fread)
200
+            dplyr::select(dplyr::all_of(c("names", "fread"))) %>%
201
+            dplyr::group_by(dplyr::across(dplyr::all_of("fread")))
200 202
         types <- specs_mappings %>%
201 203
             dplyr::group_keys() %>%
202 204
             dplyr::pull(.data$fread)
... ...
@@ -218,8 +220,8 @@
218 220
         dplyr::left_join(.types_mapping(), by = "types")
219 221
     if (mode == "fread") {
220 222
         specs_mappings <- specs_mappings %>%
221
-            dplyr::select(.data$names, .data$fread) %>%
222
-            dplyr::group_by(.data$fread)
223
+            dplyr::select(dplyr::all_of(c("names", "fread"))) %>%
224
+            dplyr::group_by(dplyr::across(dplyr::all_of("fread")))
223 225
         types <- specs_mappings %>%
224 226
             dplyr::group_keys() %>%
225 227
             dplyr::pull(.data$fread)
... ...
@@ -299,7 +301,7 @@ reduced_AF_columns <- function() {
299 301
         vars_df = association_file_columns(TRUE),
300 302
         duplicate_politic = politics
301 303
     ) %>%
302
-        dplyr::select(.data$names, .data$tag)
304
+        dplyr::select(dplyr::all_of(c("names", "tag")))
303 305
     data.table::setDT(tag_cols)
304 306
     return(tag_cols)
305 307
 }
Browse code

[HOTFIX] Condensed vignettes due to Bioconductor package size issues

Giulia Pais authored on 21/04/2022 09:37:58
Showing 1 changed files
... ...
@@ -370,7 +370,7 @@ refGene_table_cols <- function() {
370 370
 #' @description
371 371
 #' Contains all information associated with critical tags used in the dynamic
372 372
 #' vars system. To know more see
373
-#' `vignette("setup_workflow", package="ISAnalytics")`.
373
+#' `vignette("workflow_start", package="ISAnalytics")`.
374 374
 #'
375 375
 #' @return A data frame
376 376
 #' @export
Browse code

[UPDATE] Final partial update for 1.5.4 - finished documentation, ready to merge

Giulia Pais authored on 20/04/2022 15:58:57
Showing 1 changed files
... ...
@@ -5,227 +5,231 @@
5 5
 # Internal: default mandatory IS vars and associated column types.
6 6
 # The combination of these fields defines a unique integration site.
7 7
 .default_mandatory_IS_vars <- function() {
8
-  tibble::tribble(
9
-    ~ names, ~ types, ~ transform, ~ flag, ~ tag,
10
-    "chr", "char", NULL, "required", "chromosome",
11
-    "integration_locus", "int", NULL, "required", "locus",
12
-    "strand", "char", NULL, "required", "is_strand"
13
-  )
8
+    tibble::tribble(
9
+        ~names, ~types, ~transform, ~flag, ~tag,
10
+        "chr", "char", NULL, "required", "chromosome",
11
+        "integration_locus", "int", NULL, "required", "locus",
12
+        "strand", "char", NULL, "required", "is_strand"
13
+    )
14 14
 }
15 15
 
16 16
 # Internal: default genomic annotation IS vars and associated column types.
17 17
 .default_annotation_IS_vars <- function() {
18
-  tibble::tribble(
19
-    ~ names, ~ types, ~ transform, ~ flag, ~ tag,
20
-    "GeneName", "char", NULL, "required", "gene_symbol",
21
-    "GeneStrand", "char", NULL, "required", "gene_strand"
22
-  )
18
+    tibble::tribble(
19
+        ~names, ~types, ~transform, ~flag, ~tag,
20
+        "GeneName", "char", NULL, "required", "gene_symbol",
21
+        "GeneStrand", "char", NULL, "required", "gene_strand"
22
+    )
23 23
 }
24 24
 
25 25
 # Internal: default association file columns and types
26 26
 .default_af_cols <- function() {
27
-  tibble::tribble(
28
-    ~ names, ~ types, ~ transform, ~ flag, ~ tag,
29
-    "ProjectID", "char", NULL, "required", "project_id",
30
-    "FUSIONID", "char", NULL, "optional", "fusion_id",
31
-    "PoolID", "char", NULL, "required", "pool_id",
32
-    "TagSequence", "char", NULL, "required", "tag_seq",
33
-    "SubjectID", "char", NULL, "required", "subject",
34
-    "VectorType", "char", NULL, "optional", NA_character_,
35
-    "VectorID", "char", NULL, "required", "vector_id",
36
-    "ExperimentID", "char", NULL, "optional", NA_character_,
37
-    "Tissue", "char", NULL, "required", "tissue",
38
-    "TimePoint", "char", ~ stringr::str_pad(.x, 4, side = "left", pad = "0"),
39
-    "required", "tp_days",
40
-    "DNAFragmentation", "char", NULL, "optional", NA_character_,
41
-    "PCRMethod", "char", NULL, "required", "pcr_method",
42
-    "TagIDextended", "char", NULL, "optional", NA_character_,
43
-    "Keywords","char", NULL, "optional", NA_character_,
44
-    "CellMarker", "char", NULL, "required", "cell_marker",
45
-    "TagID", "char", NULL, "required", "tag_id",
46
-    "NGSProvider", "char", NULL, "optional", NA_character_,
47
-    "NGSTechnology", "char", NULL, "required", "ngs_tech",
48
-    "ConverrtedFilesDir", "char", NULL, "optional", NA_character_,
49
-    "ConverrtedFilesName", "char", NULL, "optional", NA_character_,
50
-    "SourceFileFolder", "char", NULL, "optional", NA_character_,
51
-    "SourceFileNameR1", "char", NULL, "optional", NA_character_,
52
-    "SourceFileNameR2", "char", NULL, "optional", NA_character_,
53
-    "DNAnumber", "char", NULL, "required", "dna_num",
54
-    "ReplicateNumber", "int", NULL, "required", "pcr_replicate",
55
-    "DNAextractionDate", "date", NULL, "optional", NA_character_,
56
-    "DNAngUsed", "numeric", NULL, "required", NA_character_,
57
-    "LinearPCRID", "char", NULL, "optional", NA_character_,
58
-    "LinearPCRDate", "date", NULL, "optional", NA_character_,
59
-    "SonicationDate", "date", NULL, "optional", NA_character_,
60
-    "LigationDate", "date", NULL, "optional", NA_character_,
61
-    "1stExpoPCRID", "char", NULL, "optional", NA_character_,
62
-    "1stExpoPCRDate", "date", NULL, "optional", NA_character_,
63
-    "2ndExpoID", "char", NULL, "optional", NA_character_,
64
-    "2ndExpoDate", "date", NULL, "optional", NA_character_,
65
-    "FusionPrimerPCRID", "char", NULL, "optional", NA_character_,
66
-    "FusionPrimerPCRDate", "date", NULL, "optional", NA_character_,
67
-    "PoolDate", "date", NULL, "optional", NA_character_,
68
-    "SequencingDate", "date", NULL, "required", NA_character_,
69
-    "VCN", "numeric", NULL, "required", "vcn",
70
-    "Genome", "char", NULL, "required", "genome",
71
-    "SequencingRound", "int", NULL, "optional", NA_character_,
72
-    "Genotype", "char", NULL, "optional", NA_character_,
73
-    "TestGroup", "char", NULL, "optional", NA_character_,
74
-    "MOI", "char", NULL, "optional", NA_character_,
75
-    "Engraftment", "numeric", NULL, "optional", NA_character_,
76
-    "Transduction", "numeric", NULL, "optional", NA_character_,
77
-    "Notes", "char", NULL, "optional", NA_character_,
78
-    "AddedField1", "char", NULL, "optional", NA_character_,
79
-    "AddedField2", "char", NULL, "optional", NA_character_,
80
-    "AddedField3", "char", NULL, "optional", NA_character_,
81
-    "AddedField4", "char", NULL, "optional", NA_character_,
82
-    "concatenatePoolIDSeqRun", "char", NULL,"required", "vispa_concatenate",
83
-    "AddedField6_RelativeBloodPercentage", "char", NULL, "optional",
84
-    NA_character_,
85
-    "AddedField7_PurityTestFeasibility", "char", NULL, "optional",
86
-    NA_character_,
87
-    "AddedField8_FacsSeparationPurity", "char", NULL, "optional", NA_character_,
88
-    "Kapa", "numeric", NULL, "required", NA_character_,
89
-    "ulForPool", "numeric", NULL, "required", NA_character_,
90
-    "CompleteAmplificationID", "char", NULL, "required", "pcr_repl_id",
91
-    "UniqueID", "char", NULL, "required", NA_character_,
92
-    "StudyTestID", "char", NULL, "optional", NA_character_,
93
-    "StudyTestGroup", "char", NULL, "optional", NA_character_,
94
-    "MouseID", "char", NULL, "optional", NA_character_,
95
-    "Tigroup", "char", NULL, "optional", NA_character_,
96
-    "Tisource", "char", NULL, "optional", NA_character_,
97
-    "PathToFolderProjectID", "char", NULL, "required", "proj_folder",
98
-    "SamplesNameCheck", "char", NULL, "optional", NA_character_,
99
-    "TimepointDays", "char", NULL, "optional", NA_character_,
100
-    "TimepointMonths", "char", NULL, "optional", NA_character_,
101
-    "TimepointYears", "char", NULL, "optional", NA_character_,
102
-    "ng DNA corrected", "numeric", NULL, "optional", NA_character_
103
-  )
27
+    tibble::tribble(
28
+        ~names, ~types, ~transform, ~flag, ~tag,
29
+        "ProjectID", "char", NULL, "required", "project_id",
30
+        "FUSIONID", "char", NULL, "optional", "fusion_id",
31
+        "PoolID", "char", NULL, "required", "pool_id",
32
+        "TagSequence", "char", NULL, "required", "tag_seq",
33
+        "SubjectID", "char", NULL, "required", "subject",
34
+        "VectorType", "char", NULL, "optional", NA_character_,
35
+        "VectorID", "char", NULL, "required", "vector_id",
36
+        "ExperimentID", "char", NULL, "optional", NA_character_,
37
+        "Tissue", "char", NULL, "required", "tissue",
38
+        "TimePoint", "char", ~ stringr::str_pad(.x, 4, side = "left", pad = "0"),
39
+        "required", "tp_days",
40
+        "DNAFragmentation", "char", NULL, "optional", NA_character_,
41
+        "PCRMethod", "char", NULL, "required", "pcr_method",
42
+        "TagIDextended", "char", NULL, "optional", NA_character_,
43
+        "Keywords", "char", NULL, "optional", NA_character_,
44
+        "CellMarker", "char", NULL, "required", "cell_marker",
45
+        "TagID", "char", NULL, "required", "tag_id",
46
+        "NGSProvider", "char", NULL, "optional", NA_character_,
47
+        "NGSTechnology", "char", NULL, "required", "ngs_tech",
48
+        "ConverrtedFilesDir", "char", NULL, "optional", NA_character_,
49
+        "ConverrtedFilesName", "char", NULL, "optional", NA_character_,
50
+        "SourceFileFolder", "char", NULL, "optional", NA_character_,
51
+        "SourceFileNameR1", "char", NULL, "optional", NA_character_,
52
+        "SourceFileNameR2", "char", NULL, "optional", NA_character_,
53
+        "DNAnumber", "char", NULL, "required", "dna_num",
54
+        "ReplicateNumber", "int", NULL, "required", "pcr_replicate",
55
+        "DNAextractionDate", "date", NULL, "optional", NA_character_,
56
+        "DNAngUsed", "numeric", NULL, "required", NA_character_,
57
+        "LinearPCRID", "char", NULL, "optional", NA_character_,
58
+        "LinearPCRDate", "date", NULL, "optional", NA_character_,
59
+        "SonicationDate", "date", NULL, "optional", NA_character_,
60
+        "LigationDate", "date", NULL, "optional", NA_character_,
61
+        "1stExpoPCRID", "char", NULL, "optional", NA_character_,
62
+        "1stExpoPCRDate", "date", NULL, "optional", NA_character_,
63
+        "2ndExpoID", "char", NULL, "optional", NA_character_,
64
+        "2ndExpoDate", "date", NULL, "optional", NA_character_,
65
+        "FusionPrimerPCRID", "char", NULL, "optional", NA_character_,
66
+        "FusionPrimerPCRDate", "date", NULL, "optional", NA_character_,
67
+        "PoolDate", "date", NULL, "optional", NA_character_,
68
+        "SequencingDate", "date", NULL, "required", NA_character_,
69
+        "VCN", "numeric", NULL, "required", "vcn",
70
+        "Genome", "char", NULL, "required", "genome",
71
+        "SequencingRound", "int", NULL, "optional", NA_character_,
72
+        "Genotype", "char", NULL, "optional", NA_character_,
73
+        "TestGroup", "char", NULL, "optional", NA_character_,
74
+        "MOI", "char", NULL, "optional", NA_character_,
75
+        "Engraftment", "numeric", NULL, "optional", NA_character_,
76
+        "Transduction", "numeric", NULL, "optional", NA_character_,
77
+        "Notes", "char", NULL, "optional", NA_character_,
78
+        "AddedField1", "char", NULL, "optional", NA_character_,
79
+        "AddedField2", "char", NULL, "optional", NA_character_,
80
+        "AddedField3", "char", NULL, "optional", NA_character_,
81
+        "AddedField4", "char", NULL, "optional", NA_character_,
82
+        "concatenatePoolIDSeqRun", "char", NULL, "required",
83
+        "vispa_concatenate",
84
+        "AddedField6_RelativeBloodPercentage", "char", NULL, "optional",
85
+        NA_character_,
86
+        "AddedField7_PurityTestFeasibility", "char", NULL, "optional",
87
+        NA_character_,
88
+        "AddedField8_FacsSeparationPurity", "char", NULL, "optional",
89
+        NA_character_,
90
+        "Kapa", "numeric", NULL, "required", NA_character_,
91
+        "ulForPool", "numeric", NULL, "required", NA_character_,
92
+        "CompleteAmplificationID", "char", NULL, "required", "pcr_repl_id",
93
+        "UniqueID", "char", NULL, "required", NA_character_,
94
+        "StudyTestID", "char", NULL, "optional", NA_character_,
95
+        "StudyTestGroup", "char", NULL, "optional", NA_character_,
96
+        "MouseID", "char", NULL, "optional", NA_character_,
97
+        "Tigroup", "char", NULL, "optional", NA_character_,
98
+        "Tisource", "char", NULL, "optional", NA_character_,
99
+        "PathToFolderProjectID", "char", NULL, "required", "proj_folder",
100
+        "SamplesNameCheck", "char", NULL, "optional", NA_character_,
101
+        "TimepointDays", "char", NULL, "optional", NA_character_,
102
+        "TimepointMonths", "char", NULL, "optional", NA_character_,
103
+        "TimepointYears", "char", NULL, "optional", NA_character_,
104
+        "ng DNA corrected", "numeric", NULL, "optional", NA_character_
105
+    )
104 106
 }
105 107
 
106 108
 # Internal: default columns and types of vispa2 stats cols
107 109
 .default_iss_stats_specs <- function() {
108
-  tibble::tribble(
109
-    ~ names, ~ types, ~ transform, ~ flag, ~ tag,
110
-    "RUN_NAME", "char", NULL, "required", NA_character_,
111
-    "POOL", "char", NULL, "required", "vispa_concatenate",
112
-    "TAG", "char", ~ stringr::str_replace_all(.x, pattern = "\\.",
113
-                                              replacement = ""), "required",
114
-    "tag_seq",
115
-    "RAW_READS", "int", NULL, "optional", NA_character_,
116
-    "QUALITY_PASSED", "int", NULL, "optional", NA_character_,
117
-    "PHIX_MAPPING", "int", NULL, "optional", NA_character_,
118
-    "PLASMID_MAPPED_BYPOOL", "int", NULL, "optional", NA_character_,
119
-    "BARCODE_MUX", "int", NULL, "required", NA_character_,
120
-    "LTR_IDENTIFIED", "int", NULL, "optional", NA_character_,
121
-    "TRIMMING_FINAL_LTRLC", "int", NULL, "optional", NA_character_,
122
-    "LV_MAPPED", "int", NULL, "optional", NA_character_,
123
-    "BWA_MAPPED_OVERALL", "int", NULL, "optional", NA_character_,
124
-    "ISS_MAPPED_OVERALL", "int", NULL, "optional", NA_character_,
125
-    "ISS_MAPPED_PP", "int", NULL, "optional", NA_character_
126
-  )
110
+    tibble::tribble(
111
+        ~names, ~types, ~transform, ~flag, ~tag,
112
+        "RUN_NAME", "char", NULL, "required", NA_character_,
113
+        "POOL", "char", NULL, "required", "vispa_concatenate",
114
+        "TAG", "char", ~ stringr::str_replace_all(.x,
115
+            pattern = "\\.",
116
+            replacement = ""
117
+        ), "required",
118
+        "tag_seq",
119
+        "RAW_READS", "int", NULL, "optional", NA_character_,
120
+        "QUALITY_PASSED", "int", NULL, "optional", NA_character_,
121
+        "PHIX_MAPPING", "int", NULL, "optional", NA_character_,
122
+        "PLASMID_MAPPED_BYPOOL", "int", NULL, "optional", NA_character_,
123
+        "BARCODE_MUX", "int", NULL, "required", NA_character_,
124
+        "LTR_IDENTIFIED", "int", NULL, "optional", NA_character_,
125
+        "TRIMMING_FINAL_LTRLC", "int", NULL, "optional", NA_character_,
126
+        "LV_MAPPED", "int", NULL, "optional", NA_character_,
127
+        "BWA_MAPPED_OVERALL", "int", NULL, "optional", NA_character_,
128
+        "ISS_MAPPED_OVERALL", "int", NULL, "optional", NA_character_,
129
+        "ISS_MAPPED_PP", "int", NULL, "optional", NA_character_
130
+    )
127 131
 }
128 132
 
129 133
 # Mappings between input format and formats requested by external parsing
130 134
 # functions
131 135
 .types_mapping <- function() {
132
-  tibble::tribble(
133
-    ~ types, ~ mapping, ~ fread,
134
-    "char", "c", "character",
135
-    "int", "i", "integer",
136
-    "logi", "l", "logical",
137
-    "numeric", "d", "numeric",
138
-    "factor", "f", "factor",
139
-    "date", "c", "charcter",
140
-    "ymd", "c", "character",
141
-    "ydm", "c", "character",
142
-    "mdy", "c", "character",
143
-    "myd", "c", "character",
144
-    "dmy", "c", "character",
145
-    "yq", "c", "character",
146
-    "ym", "c", "character",
147
-    "my", "c", "character",
148
-    "ymd_hms", "c", "character",
149
-    "ymd_hm", "c", "character",
150
-    "ymd_h", "c", "character",
151
-    "dmy_hms", "c", "character",
152
-    "dmy_hm", "c", "character",
153
-    "dmy_h", "c", "character",
154
-    "mdy_hms", "c", "character",
155
-    "mdy_hm", "c", "character",
156
-    "mdy_h", "c", "character",
157
-    "ydm_hms", "c", "character",
158
-    "ydm_hm", "c", "character",
159
-    "ydm_h", "c", "character"
160
-  )
136
+    tibble::tribble(
137
+        ~types, ~mapping, ~fread,
138
+        "char", "c", "character",
139
+        "int", "i", "integer",
140
+        "logi", "l", "logical",
141
+        "numeric", "d", "numeric",
142
+        "factor", "f", "factor",
143
+        "date", "c", "charcter",
144
+        "ymd", "c", "character",
145
+        "ydm", "c", "character",
146
+        "mdy", "c", "character",
147
+        "myd", "c", "character",
148
+        "dmy", "c", "character",
149
+        "yq", "c", "character",
150
+        "ym", "c", "character",
151
+        "my", "c", "character",
152
+        "ymd_hms", "c", "character",
153
+        "ymd_hm", "c", "character",
154
+        "ymd_h", "c", "character",
155
+        "dmy_hms", "c", "character",
156
+        "dmy_hm", "c", "character",
157
+        "dmy_h", "c", "character",
158
+        "mdy_hms", "c", "character",
159
+        "mdy_hm", "c", "character",
160
+        "mdy_h", "c", "character",
161
+        "ydm_hms", "c", "character",
162
+        "ydm_hm", "c", "character",
163
+        "ydm_h", "c", "character"
164
+    )
161 165
 }
162 166
 
163 167
 # Internal: associates column types with column names for a more precise
164 168
 # import
165 169
 .mandatory_IS_types <- function(mode) {
166
-  specs <- mandatory_IS_vars(include_types = TRUE)
167
-  specs_mappings <- specs %>%
168
-    dplyr::left_join(.types_mapping(), by = "types")
169
-  if (mode == "fread") {
170
-    specs_mappings <- specs_mappings %>%
171
-      dplyr::select(.data$names, .data$fread) %>%
172
-      dplyr::group_by(.data$fread)
173
-    types <- specs_mappings %>%
174
-      dplyr::group_keys() %>%
175
-      dplyr::pull(.data$fread)
176
-    specs_mappings <- specs_mappings %>%
177
-      dplyr::group_split(.keep = FALSE)
178
-    names(specs_mappings) <- types
179
-    types <- purrr::map(specs_mappings, ~ .x$names)
170
+    specs <- mandatory_IS_vars(include_types = TRUE)
171
+    specs_mappings <- specs %>%
172
+        dplyr::left_join(.types_mapping(), by = "types")
173
+    if (mode == "fread") {
174
+        specs_mappings <- specs_mappings %>%
175
+            dplyr::select(.data$names, .data$fread) %>%
176
+            dplyr::group_by(.data$fread)
177
+        types <- specs_mappings %>%
178
+            dplyr::group_keys() %>%
179
+            dplyr::pull(.data$fread)
180
+        specs_mappings <- specs_mappings %>%
181
+            dplyr::group_split(.keep = FALSE)
182
+        names(specs_mappings) <- types
183
+        types <- purrr::map(specs_mappings, ~ .x$names)
184
+        return(types)
185
+    }
186
+    types <- as.list(setNames(specs_mappings$mapping, specs_mappings$names))
180 187
     return(types)
181
-  }
182
-  types <- as.list(setNames(specs_mappings$mapping, specs_mappings$names))
183
-  return(types)
184 188
 }
185 189
 
186 190
 # Internal: associates column types with column names for a more precise
187 191
 # import
188 192
 .annotation_IS_types <- function(mode) {
189
-  specs <- annotation_IS_vars(include_types = TRUE)
190
-  specs_mappings <- specs %>%
191
-    dplyr::left_join(.types_mapping(), by = "types")
192
-  if (mode == "fread") {
193
-    specs_mappings <- specs_mappings %>%
194
-      dplyr::select(.data$names, .data$fread) %>%
195
-      dplyr::group_by(.data$fread)
196
-    types <- specs_mappings %>%
197
-      dplyr::group_keys() %>%
198
-      dplyr::pull(.data$fread)
199
-    specs_mappings <- specs_mappings %>%
200
-      dplyr::group_split(.keep = FALSE)
201
-    names(specs_mappings) <- types
202
-    types <- purrr::map(specs_mappings, ~ .x$names)
193
+    specs <- annotation_IS_vars(include_types = TRUE)
194
+    specs_mappings <- specs %>%
195
+        dplyr::left_join(.types_mapping(), by = "types")
196
+    if (mode == "fread") {
197
+        specs_mappings <- specs_mappings %>%
198
+            dplyr::select(.data$names, .data$fread) %>%
199
+            dplyr::group_by(.data$fread)
200
+        types <- specs_mappings %>%
201
+            dplyr::group_keys() %>%
202
+            dplyr::pull(.data$fread)
203
+        specs_mappings <- specs_mappings %>%
204
+            dplyr::group_split(.keep = FALSE)
205
+        names(specs_mappings) <- types
206
+        types <- purrr::map(specs_mappings, ~ .x$names)
207
+        return(types)
208
+    }
209
+    types <- as.list(setNames(specs_mappings$mapping, specs_mappings$names))
203 210
     return(types)
204
-  }
205
-  types <- as.list(setNames(specs_mappings$mapping, specs_mappings$names))
206
-  return(types)
207 211
 }
208 212
 
209 213
 # Internal: associates column types with column names for a more precise
210 214
 # import
211 215
 .af_col_types <- function(mode) {
212
-  specs <- association_file_columns(include_types = TRUE)
213
-  specs_mappings <- specs %>%
214
-    dplyr::left_join(.types_mapping(), by = "types")
215
-  if (mode == "fread") {
216
-    specs_mappings <- specs_mappings %>%
217
-      dplyr::select(.data$names, .data$fread) %>%
218
-      dplyr::group_by(.data$fread)
219
-    types <- specs_mappings %>%
220
-      dplyr::group_keys() %>%
221
-      dplyr::pull(.data$fread)
222
-    specs_mappings <- specs_mappings %>%
223
-      dplyr::group_split(.keep = FALSE)
224
-    names(specs_mappings) <- types
225
-    types <- purrr::map(specs_mappings, ~ .x$names)
226
-    return(types)
227
-  }
228
-  types <- as.list(setNames(specs_mappings$mapping, specs_mappings$names))
216
+    specs <- association_file_columns(include_types = TRUE)
217
+    specs_mappings <- specs %>%
218
+        dplyr::left_join(.types_mapping(), by = "types")
219
+    if (mode == "fread") {
220
+        specs_mappings <- specs_mappings %>%
221
+            dplyr::select(.data$names, .data$fread) %>%
222
+            dplyr::group_by(.data$fread)
223
+        types <- specs_mappings %>%
224
+            dplyr::group_keys() %>%
225
+            dplyr::pull(.data$fread)
226
+        specs_mappings <- specs_mappings %>%
227
+            dplyr::group_split(.keep = FALSE)
228
+        names(specs_mappings) <- types
229
+        types <- purrr::map(specs_mappings, ~ .x$names)
230
+        return(types)
231
+    }
232
+    types <- as.list(setNames(specs_mappings$mapping, specs_mappings$names))
229 233
 }
230 234
 
231 235
 
... ...
@@ -266,36 +270,38 @@
266 270
 #' @examples
267 271
 #' reduced_AF_columns()
268 272
 reduced_AF_columns <- function() {
269
-  required <- list(
270
-    tag_id = "char",
271
-    tissue = "char",
272
-    subject = "char",
273
-    tp_days = c("char", "numeric", "integer"),
274
-    fusion_id = "char",
275
-    pcr_repl_id = "char",
276
-    cell_marker = "char",
277
-    project_id = "char",
278
-    vector_id = "char",
279
-    pool_id = "char"
280
-  )
281
-  politics <- list(
282
-    tag_id = "error",
283
-    tissue = "error",
284
-    subject = "error",
285
-    tp_days = "first",
286
-    fusion_id = "error",
287
-    pcr_repl_id = "error",
288
-    cell_marker = "error",
289
-    project_id = "error",
290
-    vector_id = "error",
291
-    pool_id = "error"
292
-  )
293
-  tag_cols <- .check_required_cols(required_tags = required,
294
-                                   vars_df = association_file_columns(TRUE),
295
-                                   duplicate_politic = politics) %>%
296
-    dplyr::select(.data$names, .data$tag)
297
-  data.table::setDT(tag_cols)
298
-  return(tag_cols)
273
+    required <- list(
274
+        tag_id = "char",
275
+        tissue = "char",
276
+        subject = "char",
277
+        tp_days = c("char", "numeric", "integer"),
278
+        fusion_id = "char",
279
+        pcr_repl_id = "char",
280
+        cell_marker = "char",
281
+        project_id = "char",
282
+        vector_id = "char",
283
+        pool_id = "char"
284
+    )
285
+    politics <- list(
286
+        tag_id = "error",
287
+        tissue = "error",
288
+        subject = "error",
289
+        tp_days = "first",
290
+        fusion_id = "error",
291
+        pcr_repl_id = "error",
292
+        cell_marker = "error",
293
+        project_id = "error",
294
+        vector_id = "error",
295
+        pool_id = "error"
296
+    )
297
+    tag_cols <- .check_required_cols(
298
+        required_tags = required,
299
+        vars_df = association_file_columns(TRUE),
300
+        duplicate_politic = politics
301
+    ) %>%
302
+        dplyr::select(.data$names, .data$tag)
303
+    data.table::setDT(tag_cols)
304
+    return(tag_cols)
299 305
 }
300 306
 
301 307
 # Names of the columns of iss stats considered for aggregation
... ...
@@ -359,107 +365,168 @@ refGene_table_cols <- function() {
359 365
 }
360 366
 
361 367
 
362
-#' Title
368
+#' All available tags for dynamic vars look-up tables.
363 369
 #'
364
-#' @return
370
+#' @description
371
+#' Contains all information associated with critical tags used in the dynamic
372
+#' vars system. To know more see
373
+#' `vignette("setup_workflow", package="ISAnalytics")`.
374
+#'
375
+#' @return A data frame
365 376
 #' @export
366 377
 #'
367 378
 #' @examples
379
+#' available_tags()
368 380
 available_tags <- function() {
369
-  data.table::data.table(
370
-    tag = c("chromosome", "locus", "is_strand", "gene_symbol", "gene_strand",
371
-            "project_id", "fusion_id", "tag_seq", "subject", "vector_id",
381
+    data.table::data.table(
382
+        tag = c(
383
+            "chromosome", "locus", "is_strand", "gene_symbol", "gene_strand",
384
+            "project_id", "pool_id", "fusion_id", "tag_seq", "subject",
385
+            "vector_id",
372 386
             "tissue", "tp_days", "pcr_method", "cell_marker", "tag_id",
373 387
             "ngs_tech", "dna_num", "pcr_replicate", "vcn", "vispa_concatenate",
374 388
             "pcr_repl_id", "proj_folder", "genome",
375
-            "vispa_concatenate", "tag_seq"),
376
-    needed_in = list(c("top_targeted_genes",
377
-                       "CIS_grubbs",
378
-                       "compute_near_integrations"),
379
-                     c("top_targeted_genes",
380
-                       "CIS_grubbs",
381
-                       "compute_near_integrations"),
382
-                     c("CIS_grubbs",
383
-                       "compute_near_integrations"),
384
-                     c("top_targeted_genes",
385
-                       "CIS_grubbs",
386
-                       "compute_near_integrations",
387
-                       "CIS_volcano_plot"),
388
-                     c("top_targeted_genes",
389
-                       "CIS_grubbs"),
390
-                     c("generate_default_folder_structure",
391
-                       "import_Vispa2_stats", "remove_collisions",
392
-                       "generate_Vispa2_launch_AF", "import_association_file",
393
-                       "import_parallel_Vispa2Matrices"),
394
-                     c("generate_Vispa2_launch_AF"),
395
-                     c("generate_default_folder_structure",
396
-                       "import_association_file", "import_Vispa2_stats"),
397
-                     c("import_association_file",
398
-                       "HSC_population_size_estimate"),
399
-                     c("generate_Vispa2_launch_AF"),
400
-                     c("generate_Vispa2_launch_AF", "import_association_file",
401
-                       "HSC_population_size_estimate"),
402
-                     c("generate_Vispa2_launch_AF", "import_association_file"),
403
-                     c(),
404
-                     c("generate_Vispa2_launch_AF", "import_association_file",
405
-                       "HSC_population_size_estimate"),
406
-                     c("generate_Vispa2_launch_AF"),
407
-                     c(),
408
-                     c(),
409
-                     c("import_association_file", "remove_collisions"),
410
-                     c(),
411
-                     c("import_association_file", "generate_Vispa2_launch_AF",
412
-                       "generate_default_folder_structure",
413
-                       "import_Vispa2_stats", "import_parallel_Vispa2Matrices"),
414
-                     c("pcr_id_column", "generate_Vispa2_launch_AF",
415
-                       "import_association_file", "import_Vispa2_stats"),
416
-                     c("import_association_file"),
417
-                     c(),
418
-                     c("import_association_file", "generate_Vispa2_launch_AF",
419
-                       "generate_default_folder_structure",
420
-                       "import_Vispa2_stats", "import_parallel_Vispa2Matrices"),
421
-                     c("generate_default_folder_structure",
422
-                       "import_association_file", "import_Vispa2_stats")
423
-                     ),
424
-    description = c(paste("Number of the chromosome"),
425
-                    paste("The locus at which the integration occurs"),
426
-                    paste("The DNA strand in which the integration occurs"),
427
-                    paste("The symbol of the gene"),
428
-                    paste("The strand of the gene"),
429
-                    paste("Unique identifier of a project"),
430
-                    paste("Identification code/number of the",
431
-                          "barcoded (SLiM-)PCR product included in the",
432
-                          "sequencing library"),
433
-                    paste("The barcode tag sequence"),
434
-                    paste("Unique identifier of a study subject",
435
-                          "(usually a patient)"),
436
-                    paste("Unique identifier of the vector used"),
437
-                    paste("The biological tissue the sample belongs to"),
438
-                    paste("The time point expressed in days"),
439
-                    paste("The PCR method used"),
440
-                    paste("Cell marker associated with isolated",
441
-                          "cells carrying the IS"),
442
-                    paste("Unique identifier of the barcode tag, as specified",
443
-                          "in VISPA2 requirements"),
444
-                    paste("Technology used for next generation sequencing"),
445
-                    paste("Identification code/number of the DNA extraction",
446
-                          "from a specific biological sample"),
447
-                    paste("Number of the PCR replicate"),
448
-                    paste("Vector copy number"),
449
-                    paste("Unique identifier of a pool as specified in VISPA2"),
450
-                    paste("Unique identifier of the pcr replicate, used as",
451
-                          "key to join data and metadata"),
452
-                    paste("Path on disk containing the standard VISPA2 folder",
453
-                          "structure of the project"),
454
-                    paste("The reference genome (e.g. “hg19”)"),
455
-                    paste("Unique identifier of a pool as specified in VISPA2"),
456
-                    paste("The barcode tag sequence")
457
-                    ),
458
-    dyn_vars_tbl = c("mand_vars", "mand_vars", "mand_vars",
459
-                     "annot_vars", "annot_vars",
460
-                     "af_vars", "af_vars", "af_vars", "af_vars", "af_vars",
461
-                     "af_vars", "af_vars", "af_vars", "af_vars", "af_vars",
462
-                     "af_vars", "af_vars", "af_vars", "af_vars", "af_vars",
463
-                     "af_vars", "af_vars", "af_vars", "iss_vars", "iss_vars")
464
-  )
389
+            "vispa_concatenate", "tag_seq"
390
+        ),
391
+        needed_in = list(
392
+            c(
393
+                "top_targeted_genes",
394
+                "CIS_grubbs",
395
+                "compute_near_integrations"
396
+            ),
397
+            c(
398
+                "top_targeted_genes",
399
+                "CIS_grubbs",
400
+                "compute_near_integrations"
401
+            ),
402
+            c(
403
+                "CIS_grubbs",
404
+                "compute_near_integrations"
405
+            ),
406
+            c(
407
+                "top_targeted_genes",
408
+                "CIS_grubbs",
409
+                "compute_near_integrations",
410
+                "CIS_volcano_plot"
411
+            ),
412
+            c(
413
+                "top_targeted_genes",
414
+                "CIS_grubbs"
415
+            ),
416
+            c(
417
+                "generate_default_folder_structure",
418
+                "import_Vispa2_stats", "remove_collisions",
419
+                "generate_Vispa2_launch_AF", "import_association_file",
420
+                "import_parallel_Vispa2Matrices"
421
+            ),
422
+            c(
423
+                "generate_Vispa2_launch_AF", "remove_collisions",
424
+                "import_association_file"
425
+            ),
426
+            c("generate_Vispa2_launch_AF"),
427
+            c(
428
+                "generate_default_folder_structure",
429
+                "import_association_file", "import_Vispa2_stats"
430
+            ),
431
+            c(
432
+                "import_association_file",
433
+                "HSC_population_size_estimate"
434
+            ),
435
+            c("generate_Vispa2_launch_AF"),
436
+            c(
437
+                "generate_Vispa2_launch_AF", "import_association_file",
438
+                "HSC_population_size_estimate"
439
+            ),
440
+            c("generate_Vispa2_launch_AF", "import_association_file"),
441
+            c(),
442
+            c(
443
+                "generate_Vispa2_launch_AF", "import_association_file",
444
+                "HSC_population_size_estimate"
445
+            ),
446
+            c("generate_Vispa2_launch_AF"),
447
+            c(),
448
+            c(),
449
+            c("import_association_file", "remove_collisions"),
450
+            c(),
451
+            c(
452
+                "import_association_file", "generate_Vispa2_launch_AF",
453
+                "generate_default_folder_structure",
454
+                "import_Vispa2_stats", "import_parallel_Vispa2Matrices"
455
+            ),
456
+            c(
457
+                "pcr_id_column", "generate_Vispa2_launch_AF",
458
+                "import_association_file", "import_Vispa2_stats"
459
+            ),
460
+            c("import_association_file"),
461
+            c(),
462
+            c(
463
+                "import_association_file", "generate_Vispa2_launch_AF",
464
+                "generate_default_folder_structure",
465
+                "import_Vispa2_stats", "import_parallel_Vispa2Matrices"
466
+            ),
467
+            c(
468
+                "generate_default_folder_structure",
469
+                "import_association_file", "import_Vispa2_stats"
470
+            )
471
+        ),
472
+        description = c(
473
+            paste("Number of the chromosome"),
474
+            paste("The locus at which the integration occurs"),
475
+            paste("The DNA strand in which the integration occurs"),
476
+            paste("The symbol of the gene"),
477
+            paste("The strand of the gene"),
478
+            paste("Unique identifier of a project"),
479
+            paste("Unique identifier of a sequencing pool"),
480
+            paste(
481
+                "Identification code/number of the",
482
+                "barcoded (SLiM-)PCR product included in the",
483
+                "sequencing library"
484
+            ),
485
+            paste("The barcode tag sequence"),
486
+            paste(
487
+                "Unique identifier of a study subject",
488
+                "(usually a patient)"
489
+            ),
490
+            paste("Unique identifier of the vector used"),
491
+            paste("The biological tissue the sample belongs to"),
492
+            paste("The time point expressed in days"),
493
+            paste("The PCR method used"),
494
+            paste(
495
+                "Cell marker associated with isolated",
496
+                "cells carrying the IS"
497
+            ),
498
+            paste(
499
+                "Unique identifier of the barcode tag, as specified",
500
+                "in VISPA2 requirements"
501
+            ),
502
+            paste("Technology used for next generation sequencing"),
503
+            paste(
504
+                "Identification code/number of the DNA extraction",
505
+                "from a specific biological sample"
506
+            ),
507
+            paste("Number of the PCR replicate"),
508
+            paste("Vector copy number"),
509
+            paste("Unique identifier of a pool as specified in VISPA2"),
510
+            paste(
511
+                "Unique identifier of the pcr replicate, used as",
512
+                "key to join data and metadata"
513
+            ),
514
+            paste(
515
+                "Path on disk containing the standard VISPA2 folder",
516
+                "structure of the project"
517
+            ),
518
+            paste("The reference genome (e.g. 'hg19')"),
519
+            paste("Unique identifier of a pool as specified in VISPA2"),
520
+            paste("The barcode tag sequence")
521
+        ),
522
+        dyn_vars_tbl = c(
523
+            "mand_vars", "mand_vars", "mand_vars",
524
+            "annot_vars", "annot_vars",
525
+            "af_vars", "af_vars", "af_vars", "af_vars", "af_vars",
526
+            "af_vars", "af_vars", "af_vars", "af_vars", "af_vars",
527
+            "af_vars", "af_vars", "af_vars", "af_vars", "af_vars",
528
+            "af_vars", "af_vars", "af_vars", "af_vars",
529
+            "iss_vars", "iss_vars"
530
+        )
531
+    )
465 532
 }
Browse code

[UPDATE] Partial update 1.5.4 - fixed all functions, increased package coverage, documentation still to fix

Giulia Pais authored on 15/04/2022 16:47:59
Showing 1 changed files
... ...
@@ -27,12 +27,12 @@
27 27
   tibble::tribble(
28 28
     ~ names, ~ types, ~ transform, ~ flag, ~ tag,
29 29
     "ProjectID", "char", NULL, "required", "project_id",
30
-    "FUSIONID", "char", NULL, "optional", NA_character_,
30
+    "FUSIONID", "char", NULL, "optional", "fusion_id",
31 31
     "PoolID", "char", NULL, "required", "pool_id",
32 32
     "TagSequence", "char", NULL, "required", "tag_seq",
33 33
     "SubjectID", "char", NULL, "required", "subject",
34 34
     "VectorType", "char", NULL, "optional", NA_character_,
35
-    "VectorID", "char", NULL, "required", NA_character_,
35
+    "VectorID", "char", NULL, "required", "vector_id",
36 36
     "ExperimentID", "char", NULL, "optional", NA_character_,
37 37
     "Tissue", "char", NULL, "required", "tissue",
38 38
     "TimePoint", "char", ~ stringr::str_pad(.x, 4, side = "left", pad = "0"),
... ...
@@ -42,7 +42,7 @@
42 42
     "TagIDextended", "char", NULL, "optional", NA_character_,
43 43
     "Keywords","char", NULL, "optional", NA_character_,
44 44
     "CellMarker", "char", NULL, "required", "cell_marker",
45
-    "TagID", "char", NULL, "required", NA_character_,
45
+    "TagID", "char", NULL, "required", "tag_id",
46 46
     "NGSProvider", "char", NULL, "optional", NA_character_,
47 47
     "NGSTechnology", "char", NULL, "required", "ngs_tech",
48 48
     "ConverrtedFilesDir", "char", NULL, "optional", NA_character_,
... ...
@@ -266,11 +266,36 @@
266 266
 #' @examples
267 267
 #' reduced_AF_columns()
268 268
 reduced_AF_columns <- function() {
269
-    c(
270
-        "TagID", "Tissue", "SubjectID", "TimePoint", "FUSIONID",
271
-        "CompleteAmplificationID", "CellMarker", "ProjectID", "VectorID",
272
-        "PoolID"
273
-    )
269
+  required <- list(
270
+    tag_id = "char",
271
+    tissue = "char",
272
+    subject = "char",
273
+    tp_days = c("char", "numeric", "integer"),
274
+    fusion_id = "char",
275
+    pcr_repl_id = "char",
276
+    cell_marker = "char",
277
+    project_id = "char",
278
+    vector_id = "char",
279
+    pool_id = "char"
280
+  )
281
+  politics <- list(
282
+    tag_id = "error",
283
+    tissue = "error",
284
+    subject = "error",
285
+    tp_days = "first",
286
+    fusion_id = "error",
287
+    pcr_repl_id = "error",
288
+    cell_marker = "error",
289
+    project_id = "error",
290
+    vector_id = "error",
291
+    pool_id = "error"
292
+  )
293
+  tag_cols <- .check_required_cols(required_tags = required,
294
+                                   vars_df = association_file_columns(TRUE),
295
+                                   duplicate_politic = politics) %>%
296
+    dplyr::select(.data$names, .data$tag)
297
+  data.table::setDT(tag_cols)
298
+  return(tag_cols)
274 299
 }
275 300
 
276 301
 # Names of the columns of iss stats considered for aggregation
... ...
@@ -334,20 +359,107 @@ refGene_table_cols <- function() {
334 359
 }
335 360
 
336 361
 
337
-available_column_tags <- function() {
338
-    list(
339
-        critical = list(af = c(
340
-            "project_id", "pool_id", "tag_seq", "subject", "tissue",
341
-            "cell_marker", "pcr_replicate", "vispa_concatenate",
342
-            "pcr_repl_id", "proj_folder"
343
-        ),
344