Browse code

[U] For next version - AF and file system fix

Giulia Pais authored on 13/12/2021 13:42:19
Showing 3 changed files

... ...
@@ -366,7 +366,7 @@ import_association_file <- function(path,
366 366
     import_stats_rep <- NULL
367 367
     missing_stats_rep <- NULL
368 368
     if (import_iss) {
369
-        dots <- rlang::dots_list(.named = TRUE)
369
+        dots <- rlang::dots_list(..., .named = TRUE)
370 370
         dots <- dots[!names(dots) %in% c(
371 371
             "association_file",
372 372
             "report_path",
... ...
@@ -274,7 +274,7 @@
274 274
             function(x) {
275 275
                 el <- getElement(t_readr, x)
276 276
                 el <- if (is.null(el)) {
277
-                  "guess"
277
+                    "guess"
278 278
                 } else if (el == "c") {
279 279
                     "text"
280 280
                 } else if (el %in% c("i", "d")) {
... ...
@@ -357,11 +357,6 @@
357 357
 # @param df The imported association file (data.frame or tibble)
358 358
 # @param root_folder Path to the root folder
359 359
 # @keywords internal
360
-#' @importFrom dplyr select distinct mutate
361
-#' @importFrom fs dir_ls path dir_exists
362
-#' @importFrom purrr pmap_dfr is_empty
363
-#' @importFrom stringr str_replace_all
364
-#' @importFrom tibble tibble
365 360
 #' @importFrom rlang .data `:=`
366 361
 #
367 362
 # @return A data frame containing, for each ProjectID and
... ...
@@ -379,70 +374,90 @@
379 374
         ) %>%
380 375
         dplyr::distinct()
381 376
     path_cols <- .path_cols_names()
382
-    results_df <- purrr::pmap_dfr(
383
-        temp_df,
384
-        function(...) {
385
-            cur <- tibble::tibble(...)
386
-            if (is.na(cur$PathToFolderProjectID)) {
387
-                return(cur %>%
388
-                    dplyr::mutate(
389
-                        !!path_cols$project := NA_character_,
390
-                        !!path_cols$quant := NA_character_,
391
-                        !!path_cols$iss := NA_character_
392
-                    ))
393
-            }
394
-            project_folder <- fs::path(
395
-                fs::path(root_folder),
396
-                cur$PathToFolderProjectID
397
-            )
398
-            quant_folder <- paste0(fs::path(
377
+    proj_folders_exist <- temp_df %>%
378
+        dplyr::select(.data$PathToFolderProjectID) %>%
379
+        dplyr::distinct() %>%
380
+        dplyr::mutate(Found := !is.na(
381
+            .data$PathToFolderProjectID
382
+        ) &
383
+            unname(fs::dir_exists(
384
+                fs::path(
385
+                    fs::path(root_folder),
386
+                    .data$PathToFolderProjectID
387
+                )
388
+            )))
389
+    partial_check <- temp_df %>%
390
+        dplyr::left_join(proj_folders_exist, by = "PathToFolderProjectID")
391
+    temp_df <- partial_check %>%
392
+        dplyr::filter(Found == TRUE)
393
+    partial_check <- partial_check %>%
394
+        dplyr::filter(Found == FALSE) %>%
395
+        dplyr::mutate(
396
+            !!path_cols$project := NA_character_,
397
+            !!path_cols$quant := NA_character_,
398
+            !!path_cols$iss := NA_character_
399
+        )
400
+    FUN <- function(...) {
401
+        cur <- tibble::tibble(...)
402
+        project_folder <- fs::path(
403
+            fs::path(root_folder),
404
+            cur$PathToFolderProjectID
405
+        )
406
+        quant_folder <- if (!is.na(cur$concatenatePoolIDSeqRun)) {
407
+            paste0(fs::path(
399 408
                 "quantification",
400 409
                 fs::path(cur$concatenatePoolIDSeqRun)
401 410
             ), "$")
402
-            iss_folder <- paste0(fs::path(
411
+        } else {
412
+            if (getOption("ISAnalytics.verbose") == TRUE) {
413
+                rlang::inform(c(paste(
414
+                    "Warning: found NA",
415
+                    "concatenatePoolIDSeqRun field"
416
+                ),
417
+                i = "Check association file for possible issues"
418
+                ))
419
+            }
420
+            NA_character_
421
+        }
422
+        iss_folder <- if (!is.na(cur$concatenatePoolIDSeqRun)) {
423
+            paste0(fs::path(
403 424
                 "iss",
404 425
                 fs::path(cur$concatenatePoolIDSeqRun)
405 426
             ), "$")
406
-            dirExists <- fs::dir_exists(project_folder)
407
-            if (!dirExists) {
408
-                return(cur %>%
409
-                    dplyr::mutate(
410
-                        !!path_cols$project := NA_character_,
411
-                        !!path_cols$quant := NA_character_,
412
-                        !!path_cols$iss := NA_character_
413
-                    ))
414
-            }
415
-            quant_found <- fs::dir_ls(
416
-                path = project_folder, recurse = TRUE,
417
-                type = "directory", fail = FALSE,
418
-                regexp = quant_folder
419
-            )
420
-            if (length(quant_found) == 0) {
421
-                quant_found <- NA_character_
422
-            }
423
-            iss_found <- fs::dir_ls(
424
-                path = project_folder, recurse = TRUE,
425
-                type = "directory", fail = FALSE,
426
-                regexp = iss_folder
427
-            )
428
-            if (length(iss_found) == 0) {
429
-                iss_found <- NA_character_
430
-            }
431
-            return(
432
-                cur %>%
433
-                    dplyr::mutate(
434
-                        !!path_cols$project := project_folder,
435
-                        !!path_cols$quant := quant_found,
436
-                        !!path_cols$iss := iss_found
437
-                    )
438
-            )
427
+        } else {
428
+            NA_character_
429
+        }
430
+        quant_found <- unique(fs::dir_ls(
431
+            path = project_folder, recurse = TRUE,
432
+            type = "directory", fail = FALSE,
433
+            regexp = quant_folder
434
+        ))
435
+        if (length(quant_found) == 0 || all(is.na(quant_found))) {
436
+            quant_found <- NA_character_
437
+        }
438
+        iss_found <- unique(fs::dir_ls(
439
+            path = project_folder, recurse = TRUE,
440
+            type = "directory", fail = FALSE,
441
+            regexp = iss_folder
442
+        ))
443
+        if (length(iss_found) == 0 || all(is.na(iss_found))) {
444
+            iss_found <- NA_character_
439 445
         }
446
+        return(
447
+            cur %>%
448
+                dplyr::mutate(
449
+                    !!path_cols$project := project_folder,
450
+                    !!path_cols$quant := quant_found,
451
+                    !!path_cols$iss := iss_found
452
+                )
453
+        )
454
+    }
455
+    results_df <- purrr::pmap_dfr(
456
+        temp_df,
457
+        FUN
440 458
     )
441 459
     checker_df <- results_df %>%
442
-        dplyr::mutate(
443
-            Found = ifelse(!is.na(.data[[path_cols$project]]), TRUE, FALSE),
444
-            .before = .data[[path_cols$project]]
445
-        )
460
+        dplyr::bind_rows(partial_check)
446 461
     checker_df
447 462
 }
448 463
 
... ...
@@ -2313,19 +2328,20 @@
2313 2328
     exceeding <- nrow(nested) %% max_workers
2314 2329
     ampl <- trunc(nrow(nested) / max_workers)
2315 2330
     chunks <- unlist(lapply(seq_len(max_workers),
2316
-                            FUN = rep_len,
2317
-                            length.out = ampl))
2331
+        FUN = rep_len,
2332
+        length.out = ampl
2333
+    ))
2318 2334
     if (exceeding > 0) {
2319
-      chunks <- c(chunks, rep_len(
2320
-        x = tail(max_workers, n = 1),
2321
-        length.out = exceeding
2322
-      ))
2335
+        chunks <- c(chunks, rep_len(
2336
+            x = tail(max_workers, n = 1),
2337
+            length.out = exceeding
2338
+        ))
2323 2339
     }
2324 2340
     chunks <- tibble::as_tibble_col(chunks, column_name = "chunk")
2325 2341
     nested <- nested %>% tibble::add_column(chunks)
2326 2342
     split_data <- nested %>%
2327
-      dplyr::group_by(.data$chunk) %>%
2328
-      dplyr::group_split(.keep = FALSE)
2343
+        dplyr::group_by(.data$chunk) %>%
2344
+        dplyr::group_split(.keep = FALSE)
2329 2345
 
2330 2346
     # Register backend according to platform
2331 2347
     if (.Platform$OS.type == "windows") {
... ...
@@ -97,6 +97,27 @@ test_that(paste(func_name[2], "finds missing projects for incorrect fs"), {
97 97
     )
98 98
 })
99 99
 
100
+test_that(paste(func_name[2], "no error for NA concat"), {
101
+    withr::with_dir(tempdir(), {
102
+        fake_pj <- fs::path("Project")
103
+        fs::dir_create(fake_pj)
104
+        fs::dir_create(fs::path(fake_pj, "fold_1_NA"))
105
+        fs::dir_create(fs::path(fake_pj, "fold_2_NA"))
106
+        fs::dir_create(fs::path(fake_pj, "fold_3_NA"))
107
+        test_df <- tibble::tribble(
108
+            ~ProjectID, ~PoolID, ~concatenatePoolIDSeqRun, ~PathToFolderProjectID,
109
+            "Project", "POOL1", NA_character_, "/Project",
110
+            "Project", "POOL2", NA_character_, "/Project",
111
+            "Project", "POOL3", NA_character_, "/Project",
112
+        )
113
+        expect_error(
114
+            {
115
+                checks <- .check_file_system_alignment(test_df, ".")
116
+            },
117
+            regexp = NA
118
+        )
119
+    })
120
+})
100 121
 
101 122
 #------------------------------------------------------------------------------#
102 123
 # Tests .update_af_after_alignment