... | ... |
@@ -172,8 +172,10 @@ |
172 | 172 |
dplyr::left_join(.types_mapping(), by = "types") |
173 | 173 |
if (mode == "fread") { |
174 | 174 |
specs_mappings <- specs_mappings %>% |
175 |
- dplyr::select(.data$names, .data$fread) %>% |
|
176 |
- dplyr::group_by(.data$fread) |
|
175 |
+ dplyr::select( |
|
176 |
+ dplyr::all_of(c("names", "fread")) |
|
177 |
+ ) %>% |
|
178 |
+ dplyr::group_by(dplyr::across(dplyr::all_of("fread"))) |
|
177 | 179 |
types <- specs_mappings %>% |
178 | 180 |
dplyr::group_keys() %>% |
179 | 181 |
dplyr::pull(.data$fread) |
... | ... |
@@ -195,8 +197,8 @@ |
195 | 197 |
dplyr::left_join(.types_mapping(), by = "types") |
196 | 198 |
if (mode == "fread") { |
197 | 199 |
specs_mappings <- specs_mappings %>% |
198 |
- dplyr::select(.data$names, .data$fread) %>% |
|
199 |
- dplyr::group_by(.data$fread) |
|
200 |
+ dplyr::select(dplyr::all_of(c("names", "fread"))) %>% |
|
201 |
+ dplyr::group_by(dplyr::across(dplyr::all_of("fread"))) |
|
200 | 202 |
types <- specs_mappings %>% |
201 | 203 |
dplyr::group_keys() %>% |
202 | 204 |
dplyr::pull(.data$fread) |
... | ... |
@@ -218,8 +220,8 @@ |
218 | 220 |
dplyr::left_join(.types_mapping(), by = "types") |
219 | 221 |
if (mode == "fread") { |
220 | 222 |
specs_mappings <- specs_mappings %>% |
221 |
- dplyr::select(.data$names, .data$fread) %>% |
|
222 |
- dplyr::group_by(.data$fread) |
|
223 |
+ dplyr::select(dplyr::all_of(c("names", "fread"))) %>% |
|
224 |
+ dplyr::group_by(dplyr::across(dplyr::all_of("fread"))) |
|
223 | 225 |
types <- specs_mappings %>% |
224 | 226 |
dplyr::group_keys() %>% |
225 | 227 |
dplyr::pull(.data$fread) |
... | ... |
@@ -299,7 +301,7 @@ reduced_AF_columns <- function() { |
299 | 301 |
vars_df = association_file_columns(TRUE), |
300 | 302 |
duplicate_politic = politics |
301 | 303 |
) %>% |
302 |
- dplyr::select(.data$names, .data$tag) |
|
304 |
+ dplyr::select(dplyr::all_of(c("names", "tag"))) |
|
303 | 305 |
data.table::setDT(tag_cols) |
304 | 306 |
return(tag_cols) |
305 | 307 |
} |
... | ... |
@@ -370,7 +370,7 @@ refGene_table_cols <- function() { |
370 | 370 |
#' @description |
371 | 371 |
#' Contains all information associated with critical tags used in the dynamic |
372 | 372 |
#' vars system. To know more see |
373 |
-#' `vignette("setup_workflow", package="ISAnalytics")`. |
|
373 |
+#' `vignette("workflow_start", package="ISAnalytics")`. |
|
374 | 374 |
#' |
375 | 375 |
#' @return A data frame |
376 | 376 |
#' @export |
... | ... |
@@ -5,227 +5,231 @@ |
5 | 5 |
# Internal: default mandatory IS vars and associated column types. |
6 | 6 |
# The combination of these fields defines a unique integration site. |
7 | 7 |
.default_mandatory_IS_vars <- function() { |
8 |
- tibble::tribble( |
|
9 |
- ~ names, ~ types, ~ transform, ~ flag, ~ tag, |
|
10 |
- "chr", "char", NULL, "required", "chromosome", |
|
11 |
- "integration_locus", "int", NULL, "required", "locus", |
|
12 |
- "strand", "char", NULL, "required", "is_strand" |
|
13 |
- ) |
|
8 |
+ tibble::tribble( |
|
9 |
+ ~names, ~types, ~transform, ~flag, ~tag, |
|
10 |
+ "chr", "char", NULL, "required", "chromosome", |
|
11 |
+ "integration_locus", "int", NULL, "required", "locus", |
|
12 |
+ "strand", "char", NULL, "required", "is_strand" |
|
13 |
+ ) |
|
14 | 14 |
} |
15 | 15 |
|
16 | 16 |
# Internal: default genomic annotation IS vars and associated column types. |
17 | 17 |
.default_annotation_IS_vars <- function() { |
18 |
- tibble::tribble( |
|
19 |
- ~ names, ~ types, ~ transform, ~ flag, ~ tag, |
|
20 |
- "GeneName", "char", NULL, "required", "gene_symbol", |
|
21 |
- "GeneStrand", "char", NULL, "required", "gene_strand" |
|
22 |
- ) |
|
18 |
+ tibble::tribble( |
|
19 |
+ ~names, ~types, ~transform, ~flag, ~tag, |
|
20 |
+ "GeneName", "char", NULL, "required", "gene_symbol", |
|
21 |
+ "GeneStrand", "char", NULL, "required", "gene_strand" |
|
22 |
+ ) |
|
23 | 23 |
} |
24 | 24 |
|
25 | 25 |
# Internal: default association file columns and types |
26 | 26 |
.default_af_cols <- function() { |
27 |
- tibble::tribble( |
|
28 |
- ~ names, ~ types, ~ transform, ~ flag, ~ tag, |
|
29 |
- "ProjectID", "char", NULL, "required", "project_id", |
|
30 |
- "FUSIONID", "char", NULL, "optional", "fusion_id", |
|
31 |
- "PoolID", "char", NULL, "required", "pool_id", |
|
32 |
- "TagSequence", "char", NULL, "required", "tag_seq", |
|
33 |
- "SubjectID", "char", NULL, "required", "subject", |
|
34 |
- "VectorType", "char", NULL, "optional", NA_character_, |
|
35 |
- "VectorID", "char", NULL, "required", "vector_id", |
|
36 |
- "ExperimentID", "char", NULL, "optional", NA_character_, |
|
37 |
- "Tissue", "char", NULL, "required", "tissue", |
|
38 |
- "TimePoint", "char", ~ stringr::str_pad(.x, 4, side = "left", pad = "0"), |
|
39 |
- "required", "tp_days", |
|
40 |
- "DNAFragmentation", "char", NULL, "optional", NA_character_, |
|
41 |
- "PCRMethod", "char", NULL, "required", "pcr_method", |
|
42 |
- "TagIDextended", "char", NULL, "optional", NA_character_, |
|
43 |
- "Keywords","char", NULL, "optional", NA_character_, |
|
44 |
- "CellMarker", "char", NULL, "required", "cell_marker", |
|
45 |
- "TagID", "char", NULL, "required", "tag_id", |
|
46 |
- "NGSProvider", "char", NULL, "optional", NA_character_, |
|
47 |
- "NGSTechnology", "char", NULL, "required", "ngs_tech", |
|
48 |
- "ConverrtedFilesDir", "char", NULL, "optional", NA_character_, |
|
49 |
- "ConverrtedFilesName", "char", NULL, "optional", NA_character_, |
|
50 |
- "SourceFileFolder", "char", NULL, "optional", NA_character_, |
|
51 |
- "SourceFileNameR1", "char", NULL, "optional", NA_character_, |
|
52 |
- "SourceFileNameR2", "char", NULL, "optional", NA_character_, |
|
53 |
- "DNAnumber", "char", NULL, "required", "dna_num", |
|
54 |
- "ReplicateNumber", "int", NULL, "required", "pcr_replicate", |
|
55 |
- "DNAextractionDate", "date", NULL, "optional", NA_character_, |
|
56 |
- "DNAngUsed", "numeric", NULL, "required", NA_character_, |
|
57 |
- "LinearPCRID", "char", NULL, "optional", NA_character_, |
|
58 |
- "LinearPCRDate", "date", NULL, "optional", NA_character_, |
|
59 |
- "SonicationDate", "date", NULL, "optional", NA_character_, |
|
60 |
- "LigationDate", "date", NULL, "optional", NA_character_, |
|
61 |
- "1stExpoPCRID", "char", NULL, "optional", NA_character_, |
|
62 |
- "1stExpoPCRDate", "date", NULL, "optional", NA_character_, |
|
63 |
- "2ndExpoID", "char", NULL, "optional", NA_character_, |
|
64 |
- "2ndExpoDate", "date", NULL, "optional", NA_character_, |
|
65 |
- "FusionPrimerPCRID", "char", NULL, "optional", NA_character_, |
|
66 |
- "FusionPrimerPCRDate", "date", NULL, "optional", NA_character_, |
|
67 |
- "PoolDate", "date", NULL, "optional", NA_character_, |
|
68 |
- "SequencingDate", "date", NULL, "required", NA_character_, |
|
69 |
- "VCN", "numeric", NULL, "required", "vcn", |
|
70 |
- "Genome", "char", NULL, "required", "genome", |
|
71 |
- "SequencingRound", "int", NULL, "optional", NA_character_, |
|
72 |
- "Genotype", "char", NULL, "optional", NA_character_, |
|
73 |
- "TestGroup", "char", NULL, "optional", NA_character_, |
|
74 |
- "MOI", "char", NULL, "optional", NA_character_, |
|
75 |
- "Engraftment", "numeric", NULL, "optional", NA_character_, |
|
76 |
- "Transduction", "numeric", NULL, "optional", NA_character_, |
|
77 |
- "Notes", "char", NULL, "optional", NA_character_, |
|
78 |
- "AddedField1", "char", NULL, "optional", NA_character_, |
|
79 |
- "AddedField2", "char", NULL, "optional", NA_character_, |
|
80 |
- "AddedField3", "char", NULL, "optional", NA_character_, |
|
81 |
- "AddedField4", "char", NULL, "optional", NA_character_, |
|
82 |
- "concatenatePoolIDSeqRun", "char", NULL,"required", "vispa_concatenate", |
|
83 |
- "AddedField6_RelativeBloodPercentage", "char", NULL, "optional", |
|
84 |
- NA_character_, |
|
85 |
- "AddedField7_PurityTestFeasibility", "char", NULL, "optional", |
|
86 |
- NA_character_, |
|
87 |
- "AddedField8_FacsSeparationPurity", "char", NULL, "optional", NA_character_, |
|
88 |
- "Kapa", "numeric", NULL, "required", NA_character_, |
|
89 |
- "ulForPool", "numeric", NULL, "required", NA_character_, |
|
90 |
- "CompleteAmplificationID", "char", NULL, "required", "pcr_repl_id", |
|
91 |
- "UniqueID", "char", NULL, "required", NA_character_, |
|
92 |
- "StudyTestID", "char", NULL, "optional", NA_character_, |
|
93 |
- "StudyTestGroup", "char", NULL, "optional", NA_character_, |
|
94 |
- "MouseID", "char", NULL, "optional", NA_character_, |
|
95 |
- "Tigroup", "char", NULL, "optional", NA_character_, |
|
96 |
- "Tisource", "char", NULL, "optional", NA_character_, |
|
97 |
- "PathToFolderProjectID", "char", NULL, "required", "proj_folder", |
|
98 |
- "SamplesNameCheck", "char", NULL, "optional", NA_character_, |
|
99 |
- "TimepointDays", "char", NULL, "optional", NA_character_, |
|
100 |
- "TimepointMonths", "char", NULL, "optional", NA_character_, |
|
101 |
- "TimepointYears", "char", NULL, "optional", NA_character_, |
|
102 |
- "ng DNA corrected", "numeric", NULL, "optional", NA_character_ |
|
103 |
- ) |
|
27 |
+ tibble::tribble( |
|
28 |
+ ~names, ~types, ~transform, ~flag, ~tag, |
|
29 |
+ "ProjectID", "char", NULL, "required", "project_id", |
|
30 |
+ "FUSIONID", "char", NULL, "optional", "fusion_id", |
|
31 |
+ "PoolID", "char", NULL, "required", "pool_id", |
|
32 |
+ "TagSequence", "char", NULL, "required", "tag_seq", |
|
33 |
+ "SubjectID", "char", NULL, "required", "subject", |
|
34 |
+ "VectorType", "char", NULL, "optional", NA_character_, |
|
35 |
+ "VectorID", "char", NULL, "required", "vector_id", |
|
36 |
+ "ExperimentID", "char", NULL, "optional", NA_character_, |
|
37 |
+ "Tissue", "char", NULL, "required", "tissue", |
|
38 |
+ "TimePoint", "char", ~ stringr::str_pad(.x, 4, side = "left", pad = "0"), |
|
39 |
+ "required", "tp_days", |
|
40 |
+ "DNAFragmentation", "char", NULL, "optional", NA_character_, |
|
41 |
+ "PCRMethod", "char", NULL, "required", "pcr_method", |
|
42 |
+ "TagIDextended", "char", NULL, "optional", NA_character_, |
|
43 |
+ "Keywords", "char", NULL, "optional", NA_character_, |
|
44 |
+ "CellMarker", "char", NULL, "required", "cell_marker", |
|
45 |
+ "TagID", "char", NULL, "required", "tag_id", |
|
46 |
+ "NGSProvider", "char", NULL, "optional", NA_character_, |
|
47 |
+ "NGSTechnology", "char", NULL, "required", "ngs_tech", |
|
48 |
+ "ConverrtedFilesDir", "char", NULL, "optional", NA_character_, |
|
49 |
+ "ConverrtedFilesName", "char", NULL, "optional", NA_character_, |
|
50 |
+ "SourceFileFolder", "char", NULL, "optional", NA_character_, |
|
51 |
+ "SourceFileNameR1", "char", NULL, "optional", NA_character_, |
|
52 |
+ "SourceFileNameR2", "char", NULL, "optional", NA_character_, |
|
53 |
+ "DNAnumber", "char", NULL, "required", "dna_num", |
|
54 |
+ "ReplicateNumber", "int", NULL, "required", "pcr_replicate", |
|
55 |
+ "DNAextractionDate", "date", NULL, "optional", NA_character_, |
|
56 |
+ "DNAngUsed", "numeric", NULL, "required", NA_character_, |
|
57 |
+ "LinearPCRID", "char", NULL, "optional", NA_character_, |
|
58 |
+ "LinearPCRDate", "date", NULL, "optional", NA_character_, |
|
59 |
+ "SonicationDate", "date", NULL, "optional", NA_character_, |
|
60 |
+ "LigationDate", "date", NULL, "optional", NA_character_, |
|
61 |
+ "1stExpoPCRID", "char", NULL, "optional", NA_character_, |
|
62 |
+ "1stExpoPCRDate", "date", NULL, "optional", NA_character_, |
|
63 |
+ "2ndExpoID", "char", NULL, "optional", NA_character_, |
|
64 |
+ "2ndExpoDate", "date", NULL, "optional", NA_character_, |
|
65 |
+ "FusionPrimerPCRID", "char", NULL, "optional", NA_character_, |
|
66 |
+ "FusionPrimerPCRDate", "date", NULL, "optional", NA_character_, |
|
67 |
+ "PoolDate", "date", NULL, "optional", NA_character_, |
|
68 |
+ "SequencingDate", "date", NULL, "required", NA_character_, |
|
69 |
+ "VCN", "numeric", NULL, "required", "vcn", |
|
70 |
+ "Genome", "char", NULL, "required", "genome", |
|
71 |
+ "SequencingRound", "int", NULL, "optional", NA_character_, |
|
72 |
+ "Genotype", "char", NULL, "optional", NA_character_, |
|
73 |
+ "TestGroup", "char", NULL, "optional", NA_character_, |
|
74 |
+ "MOI", "char", NULL, "optional", NA_character_, |
|
75 |
+ "Engraftment", "numeric", NULL, "optional", NA_character_, |
|
76 |
+ "Transduction", "numeric", NULL, "optional", NA_character_, |
|
77 |
+ "Notes", "char", NULL, "optional", NA_character_, |
|
78 |
+ "AddedField1", "char", NULL, "optional", NA_character_, |
|
79 |
+ "AddedField2", "char", NULL, "optional", NA_character_, |
|
80 |
+ "AddedField3", "char", NULL, "optional", NA_character_, |
|
81 |
+ "AddedField4", "char", NULL, "optional", NA_character_, |
|
82 |
+ "concatenatePoolIDSeqRun", "char", NULL, "required", |
|
83 |
+ "vispa_concatenate", |
|
84 |
+ "AddedField6_RelativeBloodPercentage", "char", NULL, "optional", |
|
85 |
+ NA_character_, |
|
86 |
+ "AddedField7_PurityTestFeasibility", "char", NULL, "optional", |
|
87 |
+ NA_character_, |
|
88 |
+ "AddedField8_FacsSeparationPurity", "char", NULL, "optional", |
|
89 |
+ NA_character_, |
|
90 |
+ "Kapa", "numeric", NULL, "required", NA_character_, |
|
91 |
+ "ulForPool", "numeric", NULL, "required", NA_character_, |
|
92 |
+ "CompleteAmplificationID", "char", NULL, "required", "pcr_repl_id", |
|
93 |
+ "UniqueID", "char", NULL, "required", NA_character_, |
|
94 |
+ "StudyTestID", "char", NULL, "optional", NA_character_, |
|
95 |
+ "StudyTestGroup", "char", NULL, "optional", NA_character_, |
|
96 |
+ "MouseID", "char", NULL, "optional", NA_character_, |
|
97 |
+ "Tigroup", "char", NULL, "optional", NA_character_, |
|
98 |
+ "Tisource", "char", NULL, "optional", NA_character_, |
|
99 |
+ "PathToFolderProjectID", "char", NULL, "required", "proj_folder", |
|
100 |
+ "SamplesNameCheck", "char", NULL, "optional", NA_character_, |
|
101 |
+ "TimepointDays", "char", NULL, "optional", NA_character_, |
|
102 |
+ "TimepointMonths", "char", NULL, "optional", NA_character_, |
|
103 |
+ "TimepointYears", "char", NULL, "optional", NA_character_, |
|
104 |
+ "ng DNA corrected", "numeric", NULL, "optional", NA_character_ |
|
105 |
+ ) |
|
104 | 106 |
} |
105 | 107 |
|
106 | 108 |
# Internal: default columns and types of vispa2 stats cols |
107 | 109 |
.default_iss_stats_specs <- function() { |
108 |
- tibble::tribble( |
|
109 |
- ~ names, ~ types, ~ transform, ~ flag, ~ tag, |
|
110 |
- "RUN_NAME", "char", NULL, "required", NA_character_, |
|
111 |
- "POOL", "char", NULL, "required", "vispa_concatenate", |
|
112 |
- "TAG", "char", ~ stringr::str_replace_all(.x, pattern = "\\.", |
|
113 |
- replacement = ""), "required", |
|
114 |
- "tag_seq", |
|
115 |
- "RAW_READS", "int", NULL, "optional", NA_character_, |
|
116 |
- "QUALITY_PASSED", "int", NULL, "optional", NA_character_, |
|
117 |
- "PHIX_MAPPING", "int", NULL, "optional", NA_character_, |
|
118 |
- "PLASMID_MAPPED_BYPOOL", "int", NULL, "optional", NA_character_, |
|
119 |
- "BARCODE_MUX", "int", NULL, "required", NA_character_, |
|
120 |
- "LTR_IDENTIFIED", "int", NULL, "optional", NA_character_, |
|
121 |
- "TRIMMING_FINAL_LTRLC", "int", NULL, "optional", NA_character_, |
|
122 |
- "LV_MAPPED", "int", NULL, "optional", NA_character_, |
|
123 |
- "BWA_MAPPED_OVERALL", "int", NULL, "optional", NA_character_, |
|
124 |
- "ISS_MAPPED_OVERALL", "int", NULL, "optional", NA_character_, |
|
125 |
- "ISS_MAPPED_PP", "int", NULL, "optional", NA_character_ |
|
126 |
- ) |
|
110 |
+ tibble::tribble( |
|
111 |
+ ~names, ~types, ~transform, ~flag, ~tag, |
|
112 |
+ "RUN_NAME", "char", NULL, "required", NA_character_, |
|
113 |
+ "POOL", "char", NULL, "required", "vispa_concatenate", |
|
114 |
+ "TAG", "char", ~ stringr::str_replace_all(.x, |
|
115 |
+ pattern = "\\.", |
|
116 |
+ replacement = "" |
|
117 |
+ ), "required", |
|
118 |
+ "tag_seq", |
|
119 |
+ "RAW_READS", "int", NULL, "optional", NA_character_, |
|
120 |
+ "QUALITY_PASSED", "int", NULL, "optional", NA_character_, |
|
121 |
+ "PHIX_MAPPING", "int", NULL, "optional", NA_character_, |
|
122 |
+ "PLASMID_MAPPED_BYPOOL", "int", NULL, "optional", NA_character_, |
|
123 |
+ "BARCODE_MUX", "int", NULL, "required", NA_character_, |
|
124 |
+ "LTR_IDENTIFIED", "int", NULL, "optional", NA_character_, |
|
125 |
+ "TRIMMING_FINAL_LTRLC", "int", NULL, "optional", NA_character_, |
|
126 |
+ "LV_MAPPED", "int", NULL, "optional", NA_character_, |
|
127 |
+ "BWA_MAPPED_OVERALL", "int", NULL, "optional", NA_character_, |
|
128 |
+ "ISS_MAPPED_OVERALL", "int", NULL, "optional", NA_character_, |
|
129 |
+ "ISS_MAPPED_PP", "int", NULL, "optional", NA_character_ |
|
130 |
+ ) |
|
127 | 131 |
} |
128 | 132 |
|
129 | 133 |
# Mappings between input format and formats requested by external parsing |
130 | 134 |
# functions |
131 | 135 |
.types_mapping <- function() { |
132 |
- tibble::tribble( |
|
133 |
- ~ types, ~ mapping, ~ fread, |
|
134 |
- "char", "c", "character", |
|
135 |
- "int", "i", "integer", |
|
136 |
- "logi", "l", "logical", |
|
137 |
- "numeric", "d", "numeric", |
|
138 |
- "factor", "f", "factor", |
|
139 |
- "date", "c", "charcter", |
|
140 |
- "ymd", "c", "character", |
|
141 |
- "ydm", "c", "character", |
|
142 |
- "mdy", "c", "character", |
|
143 |
- "myd", "c", "character", |
|
144 |
- "dmy", "c", "character", |
|
145 |
- "yq", "c", "character", |
|
146 |
- "ym", "c", "character", |
|
147 |
- "my", "c", "character", |
|
148 |
- "ymd_hms", "c", "character", |
|
149 |
- "ymd_hm", "c", "character", |
|
150 |
- "ymd_h", "c", "character", |
|
151 |
- "dmy_hms", "c", "character", |
|
152 |
- "dmy_hm", "c", "character", |
|
153 |
- "dmy_h", "c", "character", |
|
154 |
- "mdy_hms", "c", "character", |
|
155 |
- "mdy_hm", "c", "character", |
|
156 |
- "mdy_h", "c", "character", |
|
157 |
- "ydm_hms", "c", "character", |
|
158 |
- "ydm_hm", "c", "character", |
|
159 |
- "ydm_h", "c", "character" |
|
160 |
- ) |
|
136 |
+ tibble::tribble( |
|
137 |
+ ~types, ~mapping, ~fread, |
|
138 |
+ "char", "c", "character", |
|
139 |
+ "int", "i", "integer", |
|
140 |
+ "logi", "l", "logical", |
|
141 |
+ "numeric", "d", "numeric", |
|
142 |
+ "factor", "f", "factor", |
|
143 |
+ "date", "c", "charcter", |
|
144 |
+ "ymd", "c", "character", |
|
145 |
+ "ydm", "c", "character", |
|
146 |
+ "mdy", "c", "character", |
|
147 |
+ "myd", "c", "character", |
|
148 |
+ "dmy", "c", "character", |
|
149 |
+ "yq", "c", "character", |
|
150 |
+ "ym", "c", "character", |
|
151 |
+ "my", "c", "character", |
|
152 |
+ "ymd_hms", "c", "character", |
|
153 |
+ "ymd_hm", "c", "character", |
|
154 |
+ "ymd_h", "c", "character", |
|
155 |
+ "dmy_hms", "c", "character", |
|
156 |
+ "dmy_hm", "c", "character", |
|
157 |
+ "dmy_h", "c", "character", |
|
158 |
+ "mdy_hms", "c", "character", |
|
159 |
+ "mdy_hm", "c", "character", |
|
160 |
+ "mdy_h", "c", "character", |
|
161 |
+ "ydm_hms", "c", "character", |
|
162 |
+ "ydm_hm", "c", "character", |
|
163 |
+ "ydm_h", "c", "character" |
|
164 |
+ ) |
|
161 | 165 |
} |
162 | 166 |
|
163 | 167 |
# Internal: associates column types with column names for a more precise |
164 | 168 |
# import |
165 | 169 |
.mandatory_IS_types <- function(mode) { |
166 |
- specs <- mandatory_IS_vars(include_types = TRUE) |
|
167 |
- specs_mappings <- specs %>% |
|
168 |
- dplyr::left_join(.types_mapping(), by = "types") |
|
169 |
- if (mode == "fread") { |
|
170 |
- specs_mappings <- specs_mappings %>% |
|
171 |
- dplyr::select(.data$names, .data$fread) %>% |
|
172 |
- dplyr::group_by(.data$fread) |
|
173 |
- types <- specs_mappings %>% |
|
174 |
- dplyr::group_keys() %>% |
|
175 |
- dplyr::pull(.data$fread) |
|
176 |
- specs_mappings <- specs_mappings %>% |
|
177 |
- dplyr::group_split(.keep = FALSE) |
|
178 |
- names(specs_mappings) <- types |
|
179 |
- types <- purrr::map(specs_mappings, ~ .x$names) |
|
170 |
+ specs <- mandatory_IS_vars(include_types = TRUE) |
|
171 |
+ specs_mappings <- specs %>% |
|
172 |
+ dplyr::left_join(.types_mapping(), by = "types") |
|
173 |
+ if (mode == "fread") { |
|
174 |
+ specs_mappings <- specs_mappings %>% |
|
175 |
+ dplyr::select(.data$names, .data$fread) %>% |
|
176 |
+ dplyr::group_by(.data$fread) |
|
177 |
+ types <- specs_mappings %>% |
|
178 |
+ dplyr::group_keys() %>% |
|
179 |
+ dplyr::pull(.data$fread) |
|
180 |
+ specs_mappings <- specs_mappings %>% |
|
181 |
+ dplyr::group_split(.keep = FALSE) |
|
182 |
+ names(specs_mappings) <- types |
|
183 |
+ types <- purrr::map(specs_mappings, ~ .x$names) |
|
184 |
+ return(types) |
|
185 |
+ } |
|
186 |
+ types <- as.list(setNames(specs_mappings$mapping, specs_mappings$names)) |
|
180 | 187 |
return(types) |
181 |
- } |
|
182 |
- types <- as.list(setNames(specs_mappings$mapping, specs_mappings$names)) |
|
183 |
- return(types) |
|
184 | 188 |
} |
185 | 189 |
|
186 | 190 |
# Internal: associates column types with column names for a more precise |
187 | 191 |
# import |
188 | 192 |
.annotation_IS_types <- function(mode) { |
189 |
- specs <- annotation_IS_vars(include_types = TRUE) |
|
190 |
- specs_mappings <- specs %>% |
|
191 |
- dplyr::left_join(.types_mapping(), by = "types") |
|
192 |
- if (mode == "fread") { |
|
193 |
- specs_mappings <- specs_mappings %>% |
|
194 |
- dplyr::select(.data$names, .data$fread) %>% |
|
195 |
- dplyr::group_by(.data$fread) |
|
196 |
- types <- specs_mappings %>% |
|
197 |
- dplyr::group_keys() %>% |
|
198 |
- dplyr::pull(.data$fread) |
|
199 |
- specs_mappings <- specs_mappings %>% |
|
200 |
- dplyr::group_split(.keep = FALSE) |
|
201 |
- names(specs_mappings) <- types |
|
202 |
- types <- purrr::map(specs_mappings, ~ .x$names) |
|
193 |
+ specs <- annotation_IS_vars(include_types = TRUE) |
|
194 |
+ specs_mappings <- specs %>% |
|
195 |
+ dplyr::left_join(.types_mapping(), by = "types") |
|
196 |
+ if (mode == "fread") { |
|
197 |
+ specs_mappings <- specs_mappings %>% |
|
198 |
+ dplyr::select(.data$names, .data$fread) %>% |
|
199 |
+ dplyr::group_by(.data$fread) |
|
200 |
+ types <- specs_mappings %>% |
|
201 |
+ dplyr::group_keys() %>% |
|
202 |
+ dplyr::pull(.data$fread) |
|
203 |
+ specs_mappings <- specs_mappings %>% |
|
204 |
+ dplyr::group_split(.keep = FALSE) |
|
205 |
+ names(specs_mappings) <- types |
|
206 |
+ types <- purrr::map(specs_mappings, ~ .x$names) |
|
207 |
+ return(types) |
|
208 |
+ } |
|
209 |
+ types <- as.list(setNames(specs_mappings$mapping, specs_mappings$names)) |
|
203 | 210 |
return(types) |
204 |
- } |
|
205 |
- types <- as.list(setNames(specs_mappings$mapping, specs_mappings$names)) |
|
206 |
- return(types) |
|
207 | 211 |
} |
208 | 212 |
|
209 | 213 |
# Internal: associates column types with column names for a more precise |
210 | 214 |
# import |
211 | 215 |
.af_col_types <- function(mode) { |
212 |
- specs <- association_file_columns(include_types = TRUE) |
|
213 |
- specs_mappings <- specs %>% |
|
214 |
- dplyr::left_join(.types_mapping(), by = "types") |
|
215 |
- if (mode == "fread") { |
|
216 |
- specs_mappings <- specs_mappings %>% |
|
217 |
- dplyr::select(.data$names, .data$fread) %>% |
|
218 |
- dplyr::group_by(.data$fread) |
|
219 |
- types <- specs_mappings %>% |
|
220 |
- dplyr::group_keys() %>% |
|
221 |
- dplyr::pull(.data$fread) |
|
222 |
- specs_mappings <- specs_mappings %>% |
|
223 |
- dplyr::group_split(.keep = FALSE) |
|
224 |
- names(specs_mappings) <- types |
|
225 |
- types <- purrr::map(specs_mappings, ~ .x$names) |
|
226 |
- return(types) |
|
227 |
- } |
|
228 |
- types <- as.list(setNames(specs_mappings$mapping, specs_mappings$names)) |
|
216 |
+ specs <- association_file_columns(include_types = TRUE) |
|
217 |
+ specs_mappings <- specs %>% |
|
218 |
+ dplyr::left_join(.types_mapping(), by = "types") |
|
219 |
+ if (mode == "fread") { |
|
220 |
+ specs_mappings <- specs_mappings %>% |
|
221 |
+ dplyr::select(.data$names, .data$fread) %>% |
|
222 |
+ dplyr::group_by(.data$fread) |
|
223 |
+ types <- specs_mappings %>% |
|
224 |
+ dplyr::group_keys() %>% |
|
225 |
+ dplyr::pull(.data$fread) |
|
226 |
+ specs_mappings <- specs_mappings %>% |
|
227 |
+ dplyr::group_split(.keep = FALSE) |
|
228 |
+ names(specs_mappings) <- types |
|
229 |
+ types <- purrr::map(specs_mappings, ~ .x$names) |
|
230 |
+ return(types) |
|
231 |
+ } |
|
232 |
+ types <- as.list(setNames(specs_mappings$mapping, specs_mappings$names)) |
|
229 | 233 |
} |
230 | 234 |
|
231 | 235 |
|
... | ... |
@@ -266,36 +270,38 @@ |
266 | 270 |
#' @examples |
267 | 271 |
#' reduced_AF_columns() |
268 | 272 |
reduced_AF_columns <- function() { |
269 |
- required <- list( |
|
270 |
- tag_id = "char", |
|
271 |
- tissue = "char", |
|
272 |
- subject = "char", |
|
273 |
- tp_days = c("char", "numeric", "integer"), |
|
274 |
- fusion_id = "char", |
|
275 |
- pcr_repl_id = "char", |
|
276 |
- cell_marker = "char", |
|
277 |
- project_id = "char", |
|
278 |
- vector_id = "char", |
|
279 |
- pool_id = "char" |
|
280 |
- ) |
|
281 |
- politics <- list( |
|
282 |
- tag_id = "error", |
|
283 |
- tissue = "error", |
|
284 |
- subject = "error", |
|
285 |
- tp_days = "first", |
|
286 |
- fusion_id = "error", |
|
287 |
- pcr_repl_id = "error", |
|
288 |
- cell_marker = "error", |
|
289 |
- project_id = "error", |
|
290 |
- vector_id = "error", |
|
291 |
- pool_id = "error" |
|
292 |
- ) |
|
293 |
- tag_cols <- .check_required_cols(required_tags = required, |
|
294 |
- vars_df = association_file_columns(TRUE), |
|
295 |
- duplicate_politic = politics) %>% |
|
296 |
- dplyr::select(.data$names, .data$tag) |
|
297 |
- data.table::setDT(tag_cols) |
|
298 |
- return(tag_cols) |
|
273 |
+ required <- list( |
|
274 |
+ tag_id = "char", |
|
275 |
+ tissue = "char", |
|
276 |
+ subject = "char", |
|
277 |
+ tp_days = c("char", "numeric", "integer"), |
|
278 |
+ fusion_id = "char", |
|
279 |
+ pcr_repl_id = "char", |
|
280 |
+ cell_marker = "char", |
|
281 |
+ project_id = "char", |
|
282 |
+ vector_id = "char", |
|
283 |
+ pool_id = "char" |
|
284 |
+ ) |
|
285 |
+ politics <- list( |
|
286 |
+ tag_id = "error", |
|
287 |
+ tissue = "error", |
|
288 |
+ subject = "error", |
|
289 |
+ tp_days = "first", |
|
290 |
+ fusion_id = "error", |
|
291 |
+ pcr_repl_id = "error", |
|
292 |
+ cell_marker = "error", |
|
293 |
+ project_id = "error", |
|
294 |
+ vector_id = "error", |
|
295 |
+ pool_id = "error" |
|
296 |
+ ) |
|
297 |
+ tag_cols <- .check_required_cols( |
|
298 |
+ required_tags = required, |
|
299 |
+ vars_df = association_file_columns(TRUE), |
|
300 |
+ duplicate_politic = politics |
|
301 |
+ ) %>% |
|
302 |
+ dplyr::select(.data$names, .data$tag) |
|
303 |
+ data.table::setDT(tag_cols) |
|
304 |
+ return(tag_cols) |
|
299 | 305 |
} |
300 | 306 |
|
301 | 307 |
# Names of the columns of iss stats considered for aggregation |
... | ... |
@@ -359,107 +365,168 @@ refGene_table_cols <- function() { |
359 | 365 |
} |
360 | 366 |
|
361 | 367 |
|
362 |
-#' Title |
|
368 |
+#' All available tags for dynamic vars look-up tables. |
|
363 | 369 |
#' |
364 |
-#' @return |
|
370 |
+#' @description |
|
371 |
+#' Contains all information associated with critical tags used in the dynamic |
|
372 |
+#' vars system. To know more see |
|
373 |
+#' `vignette("setup_workflow", package="ISAnalytics")`. |
|
374 |
+#' |
|
375 |
+#' @return A data frame |
|
365 | 376 |
#' @export |
366 | 377 |
#' |
367 | 378 |
#' @examples |
379 |
+#' available_tags() |
|
368 | 380 |
available_tags <- function() { |
369 |
- data.table::data.table( |
|
370 |
- tag = c("chromosome", "locus", "is_strand", "gene_symbol", "gene_strand", |
|
371 |
- "project_id", "fusion_id", "tag_seq", "subject", "vector_id", |
|
381 |
+ data.table::data.table( |
|
382 |
+ tag = c( |
|
383 |
+ "chromosome", "locus", "is_strand", "gene_symbol", "gene_strand", |
|
384 |
+ "project_id", "pool_id", "fusion_id", "tag_seq", "subject", |
|
385 |
+ "vector_id", |
|
372 | 386 |
"tissue", "tp_days", "pcr_method", "cell_marker", "tag_id", |
373 | 387 |
"ngs_tech", "dna_num", "pcr_replicate", "vcn", "vispa_concatenate", |
374 | 388 |
"pcr_repl_id", "proj_folder", "genome", |
375 |
- "vispa_concatenate", "tag_seq"), |
|
376 |
- needed_in = list(c("top_targeted_genes", |
|
377 |
- "CIS_grubbs", |
|
378 |
- "compute_near_integrations"), |
|
379 |
- c("top_targeted_genes", |
|
380 |
- "CIS_grubbs", |
|
381 |
- "compute_near_integrations"), |
|
382 |
- c("CIS_grubbs", |
|
383 |
- "compute_near_integrations"), |
|
384 |
- c("top_targeted_genes", |
|
385 |
- "CIS_grubbs", |
|
386 |
- "compute_near_integrations", |
|
387 |
- "CIS_volcano_plot"), |
|
388 |
- c("top_targeted_genes", |
|
389 |
- "CIS_grubbs"), |
|
390 |
- c("generate_default_folder_structure", |
|
391 |
- "import_Vispa2_stats", "remove_collisions", |
|
392 |
- "generate_Vispa2_launch_AF", "import_association_file", |
|
393 |
- "import_parallel_Vispa2Matrices"), |
|
394 |
- c("generate_Vispa2_launch_AF"), |
|
395 |
- c("generate_default_folder_structure", |
|
396 |
- "import_association_file", "import_Vispa2_stats"), |
|
397 |
- c("import_association_file", |
|
398 |
- "HSC_population_size_estimate"), |
|
399 |
- c("generate_Vispa2_launch_AF"), |
|
400 |
- c("generate_Vispa2_launch_AF", "import_association_file", |
|
401 |
- "HSC_population_size_estimate"), |
|
402 |
- c("generate_Vispa2_launch_AF", "import_association_file"), |
|
403 |
- c(), |
|
404 |
- c("generate_Vispa2_launch_AF", "import_association_file", |
|
405 |
- "HSC_population_size_estimate"), |
|
406 |
- c("generate_Vispa2_launch_AF"), |
|
407 |
- c(), |
|
408 |
- c(), |
|
409 |
- c("import_association_file", "remove_collisions"), |
|
410 |
- c(), |
|
411 |
- c("import_association_file", "generate_Vispa2_launch_AF", |
|
412 |
- "generate_default_folder_structure", |
|
413 |
- "import_Vispa2_stats", "import_parallel_Vispa2Matrices"), |
|
414 |
- c("pcr_id_column", "generate_Vispa2_launch_AF", |
|
415 |
- "import_association_file", "import_Vispa2_stats"), |
|
416 |
- c("import_association_file"), |
|
417 |
- c(), |
|
418 |
- c("import_association_file", "generate_Vispa2_launch_AF", |
|
419 |
- "generate_default_folder_structure", |
|
420 |
- "import_Vispa2_stats", "import_parallel_Vispa2Matrices"), |
|
421 |
- c("generate_default_folder_structure", |
|
422 |
- "import_association_file", "import_Vispa2_stats") |
|
423 |
- ), |
|
424 |
- description = c(paste("Number of the chromosome"), |
|
425 |
- paste("The locus at which the integration occurs"), |
|
426 |
- paste("The DNA strand in which the integration occurs"), |
|
427 |
- paste("The symbol of the gene"), |
|
428 |
- paste("The strand of the gene"), |
|
429 |
- paste("Unique identifier of a project"), |
|
430 |
- paste("Identification code/number of the", |
|
431 |
- "barcoded (SLiM-)PCR product included in the", |
|
432 |
- "sequencing library"), |
|
433 |
- paste("The barcode tag sequence"), |
|
434 |
- paste("Unique identifier of a study subject", |
|
435 |
- "(usually a patient)"), |
|
436 |
- paste("Unique identifier of the vector used"), |
|
437 |
- paste("The biological tissue the sample belongs to"), |
|
438 |
- paste("The time point expressed in days"), |
|
439 |
- paste("The PCR method used"), |
|
440 |
- paste("Cell marker associated with isolated", |
|
441 |
- "cells carrying the IS"), |
|
442 |
- paste("Unique identifier of the barcode tag, as specified", |
|
443 |
- "in VISPA2 requirements"), |
|
444 |
- paste("Technology used for next generation sequencing"), |
|
445 |
- paste("Identification code/number of the DNA extraction", |
|
446 |
- "from a specific biological sample"), |
|
447 |
- paste("Number of the PCR replicate"), |
|
448 |
- paste("Vector copy number"), |
|
449 |
- paste("Unique identifier of a pool as specified in VISPA2"), |
|
450 |
- paste("Unique identifier of the pcr replicate, used as", |
|
451 |
- "key to join data and metadata"), |
|
452 |
- paste("Path on disk containing the standard VISPA2 folder", |
|
453 |
- "structure of the project"), |
|
454 |
- paste("The reference genome (e.g. “hg19”)"), |
|
455 |
- paste("Unique identifier of a pool as specified in VISPA2"), |
|
456 |
- paste("The barcode tag sequence") |
|
457 |
- ), |
|
458 |
- dyn_vars_tbl = c("mand_vars", "mand_vars", "mand_vars", |
|
459 |
- "annot_vars", "annot_vars", |
|
460 |
- "af_vars", "af_vars", "af_vars", "af_vars", "af_vars", |
|
461 |
- "af_vars", "af_vars", "af_vars", "af_vars", "af_vars", |
|
462 |
- "af_vars", "af_vars", "af_vars", "af_vars", "af_vars", |
|
463 |
- "af_vars", "af_vars", "af_vars", "iss_vars", "iss_vars") |
|
464 |
- ) |
|
389 |
+ "vispa_concatenate", "tag_seq" |
|
390 |
+ ), |
|
391 |
+ needed_in = list( |
|
392 |
+ c( |
|
393 |
+ "top_targeted_genes", |
|
394 |
+ "CIS_grubbs", |
|
395 |
+ "compute_near_integrations" |
|
396 |
+ ), |
|
397 |
+ c( |
|
398 |
+ "top_targeted_genes", |
|
399 |
+ "CIS_grubbs", |
|
400 |
+ "compute_near_integrations" |
|
401 |
+ ), |
|
402 |
+ c( |
|
403 |
+ "CIS_grubbs", |
|
404 |
+ "compute_near_integrations" |
|
405 |
+ ), |
|
406 |
+ c( |
|
407 |
+ "top_targeted_genes", |
|
408 |
+ "CIS_grubbs", |
|
409 |
+ "compute_near_integrations", |
|
410 |
+ "CIS_volcano_plot" |
|
411 |
+ ), |
|
412 |
+ c( |
|
413 |
+ "top_targeted_genes", |
|
414 |
+ "CIS_grubbs" |
|
415 |
+ ), |
|
416 |
+ c( |
|
417 |
+ "generate_default_folder_structure", |
|
418 |
+ "import_Vispa2_stats", "remove_collisions", |
|
419 |
+ "generate_Vispa2_launch_AF", "import_association_file", |
|
420 |
+ "import_parallel_Vispa2Matrices" |
|
421 |
+ ), |
|
422 |
+ c( |
|
423 |
+ "generate_Vispa2_launch_AF", "remove_collisions", |
|
424 |
+ "import_association_file" |
|
425 |
+ ), |
|
426 |
+ c("generate_Vispa2_launch_AF"), |
|
427 |
+ c( |
|
428 |
+ "generate_default_folder_structure", |
|
429 |
+ "import_association_file", "import_Vispa2_stats" |
|
430 |
+ ), |
|
431 |
+ c( |
|
432 |
+ "import_association_file", |
|
433 |
+ "HSC_population_size_estimate" |
|
434 |
+ ), |
|
435 |
+ c("generate_Vispa2_launch_AF"), |
|
436 |
+ c( |
|
437 |
+ "generate_Vispa2_launch_AF", "import_association_file", |
|
438 |
+ "HSC_population_size_estimate" |
|
439 |
+ ), |
|
440 |
+ c("generate_Vispa2_launch_AF", "import_association_file"), |
|
441 |
+ c(), |
|
442 |
+ c( |
|
443 |
+ "generate_Vispa2_launch_AF", "import_association_file", |
|
444 |
+ "HSC_population_size_estimate" |
|
445 |
+ ), |
|
446 |
+ c("generate_Vispa2_launch_AF"), |
|
447 |
+ c(), |
|
448 |
+ c(), |
|
449 |
+ c("import_association_file", "remove_collisions"), |
|
450 |
+ c(), |
|
451 |
+ c( |
|
452 |
+ "import_association_file", "generate_Vispa2_launch_AF", |
|
453 |
+ "generate_default_folder_structure", |
|
454 |
+ "import_Vispa2_stats", "import_parallel_Vispa2Matrices" |
|
455 |
+ ), |
|
456 |
+ c( |
|
457 |
+ "pcr_id_column", "generate_Vispa2_launch_AF", |
|
458 |
+ "import_association_file", "import_Vispa2_stats" |
|
459 |
+ ), |
|
460 |
+ c("import_association_file"), |
|
461 |
+ c(), |
|
462 |
+ c( |
|
463 |
+ "import_association_file", "generate_Vispa2_launch_AF", |
|
464 |
+ "generate_default_folder_structure", |
|
465 |
+ "import_Vispa2_stats", "import_parallel_Vispa2Matrices" |
|
466 |
+ ), |
|
467 |
+ c( |
|
468 |
+ "generate_default_folder_structure", |
|
469 |
+ "import_association_file", "import_Vispa2_stats" |
|
470 |
+ ) |
|
471 |
+ ), |
|
472 |
+ description = c( |
|
473 |
+ paste("Number of the chromosome"), |
|
474 |
+ paste("The locus at which the integration occurs"), |
|
475 |
+ paste("The DNA strand in which the integration occurs"), |
|
476 |
+ paste("The symbol of the gene"), |
|
477 |
+ paste("The strand of the gene"), |
|
478 |
+ paste("Unique identifier of a project"), |
|
479 |
+ paste("Unique identifier of a sequencing pool"), |
|
480 |
+ paste( |
|
481 |
+ "Identification code/number of the", |
|
482 |
+ "barcoded (SLiM-)PCR product included in the", |
|
483 |
+ "sequencing library" |
|
484 |
+ ), |
|
485 |
+ paste("The barcode tag sequence"), |
|
486 |
+ paste( |
|
487 |
+ "Unique identifier of a study subject", |
|
488 |
+ "(usually a patient)" |
|
489 |
+ ), |
|
490 |
+ paste("Unique identifier of the vector used"), |
|
491 |
+ paste("The biological tissue the sample belongs to"), |
|
492 |
+ paste("The time point expressed in days"), |
|
493 |
+ paste("The PCR method used"), |
|
494 |
+ paste( |
|
495 |
+ "Cell marker associated with isolated", |
|
496 |
+ "cells carrying the IS" |
|
497 |
+ ), |
|
498 |
+ paste( |
|
499 |
+ "Unique identifier of the barcode tag, as specified", |
|
500 |
+ "in VISPA2 requirements" |
|
501 |
+ ), |
|
502 |
+ paste("Technology used for next generation sequencing"), |
|
503 |
+ paste( |
|
504 |
+ "Identification code/number of the DNA extraction", |
|
505 |
+ "from a specific biological sample" |
|
506 |
+ ), |
|
507 |
+ paste("Number of the PCR replicate"), |
|
508 |
+ paste("Vector copy number"), |
|
509 |
+ paste("Unique identifier of a pool as specified in VISPA2"), |
|
510 |
+ paste( |
|
511 |
+ "Unique identifier of the pcr replicate, used as", |
|
512 |
+ "key to join data and metadata" |
|
513 |
+ ), |
|
514 |
+ paste( |
|
515 |
+ "Path on disk containing the standard VISPA2 folder", |
|
516 |
+ "structure of the project" |
|
517 |
+ ), |
|
518 |
+ paste("The reference genome (e.g. 'hg19')"), |
|
519 |
+ paste("Unique identifier of a pool as specified in VISPA2"), |
|
520 |
+ paste("The barcode tag sequence") |
|
521 |
+ ), |
|
522 |
+ dyn_vars_tbl = c( |
|
523 |
+ "mand_vars", "mand_vars", "mand_vars", |
|
524 |
+ "annot_vars", "annot_vars", |
|
525 |
+ "af_vars", "af_vars", "af_vars", "af_vars", "af_vars", |
|
526 |
+ "af_vars", "af_vars", "af_vars", "af_vars", "af_vars", |
|
527 |
+ "af_vars", "af_vars", "af_vars", "af_vars", "af_vars", |
|
528 |
+ "af_vars", "af_vars", "af_vars", "af_vars", |
|
529 |
+ "iss_vars", "iss_vars" |
|
530 |
+ ) |
|
531 |
+ ) |
|
465 | 532 |
} |
... | ... |
@@ -27,12 +27,12 @@ |
27 | 27 |
tibble::tribble( |
28 | 28 |
~ names, ~ types, ~ transform, ~ flag, ~ tag, |
29 | 29 |
"ProjectID", "char", NULL, "required", "project_id", |
30 |
- "FUSIONID", "char", NULL, "optional", NA_character_, |
|
30 |
+ "FUSIONID", "char", NULL, "optional", "fusion_id", |
|
31 | 31 |
"PoolID", "char", NULL, "required", "pool_id", |
32 | 32 |
"TagSequence", "char", NULL, "required", "tag_seq", |
33 | 33 |
"SubjectID", "char", NULL, "required", "subject", |
34 | 34 |
"VectorType", "char", NULL, "optional", NA_character_, |
35 |
- "VectorID", "char", NULL, "required", NA_character_, |
|
35 |
+ "VectorID", "char", NULL, "required", "vector_id", |
|
36 | 36 |
"ExperimentID", "char", NULL, "optional", NA_character_, |
37 | 37 |
"Tissue", "char", NULL, "required", "tissue", |
38 | 38 |
"TimePoint", "char", ~ stringr::str_pad(.x, 4, side = "left", pad = "0"), |
... | ... |
@@ -42,7 +42,7 @@ |
42 | 42 |
"TagIDextended", "char", NULL, "optional", NA_character_, |
43 | 43 |
"Keywords","char", NULL, "optional", NA_character_, |
44 | 44 |
"CellMarker", "char", NULL, "required", "cell_marker", |
45 |
- "TagID", "char", NULL, "required", NA_character_, |
|
45 |
+ "TagID", "char", NULL, "required", "tag_id", |
|
46 | 46 |
"NGSProvider", "char", NULL, "optional", NA_character_, |
47 | 47 |
"NGSTechnology", "char", NULL, "required", "ngs_tech", |
48 | 48 |
"ConverrtedFilesDir", "char", NULL, "optional", NA_character_, |
... | ... |
@@ -266,11 +266,36 @@ |
266 | 266 |
#' @examples |
267 | 267 |
#' reduced_AF_columns() |
268 | 268 |
reduced_AF_columns <- function() { |
269 |
- c( |
|
270 |
- "TagID", "Tissue", "SubjectID", "TimePoint", "FUSIONID", |
|
271 |
- "CompleteAmplificationID", "CellMarker", "ProjectID", "VectorID", |
|
272 |
- "PoolID" |
|
273 |
- ) |
|
269 |
+ required <- list( |
|
270 |
+ tag_id = "char", |
|
271 |
+ tissue = "char", |
|
272 |
+ subject = "char", |
|
273 |
+ tp_days = c("char", "numeric", "integer"), |
|
274 |
+ fusion_id = "char", |
|
275 |
+ pcr_repl_id = "char", |
|
276 |
+ cell_marker = "char", |
|
277 |
+ project_id = "char", |
|
278 |
+ vector_id = "char", |
|
279 |
+ pool_id = "char" |
|
280 |
+ ) |
|
281 |
+ politics <- list( |
|
282 |
+ tag_id = "error", |
|
283 |
+ tissue = "error", |
|
284 |
+ subject = "error", |
|
285 |
+ tp_days = "first", |
|
286 |
+ fusion_id = "error", |
|
287 |
+ pcr_repl_id = "error", |
|
288 |
+ cell_marker = "error", |
|
289 |
+ project_id = "error", |
|
290 |
+ vector_id = "error", |
|
291 |
+ pool_id = "error" |
|
292 |
+ ) |
|
293 |
+ tag_cols <- .check_required_cols(required_tags = required, |
|
294 |
+ vars_df = association_file_columns(TRUE), |
|
295 |
+ duplicate_politic = politics) %>% |
|
296 |
+ dplyr::select(.data$names, .data$tag) |
|
297 |
+ data.table::setDT(tag_cols) |
|
298 |
+ return(tag_cols) |
|
274 | 299 |
} |
275 | 300 |
|
276 | 301 |
# Names of the columns of iss stats considered for aggregation |
... | ... |
@@ -334,20 +359,107 @@ refGene_table_cols <- function() { |
334 | 359 |
} |
335 | 360 |
|
336 | 361 |
|
337 |
-available_column_tags <- function() { |
|
338 |
- list( |
|
339 |
- critical = list(af = c( |
|
340 |
- "project_id", "pool_id", "tag_seq", "subject", "tissue", |
|
341 |
- "cell_marker", "pcr_replicate", "vispa_concatenate", |
|
342 |
- "pcr_repl_id", "proj_folder" |
|
343 |
- ), |
|
344 |