Bioconductor Code: POMA

Browse code

xx

pcastellanoescuder authored on 04/08/2024 02:02:22
Showing 7 changed files

NEWS.md index 2b5b6f0..246517d 100644
R/PomaGSEA.R index 0000000..0543619
R/PomaImpute.R index 4ef5b29..209e2ed 100644
R/PomaNorm.R index 643328d..2bf8710 100644
R/PomaPCA.R index 2045992..fa19b5f 100644
R/PomaUnivariate.R index 0d2bd4b..aa337c9 100644
tests/testthat/test-PomaNorm.R index 7da6ce9..f13ebbe 100644

History View file @ 7617d15

@@ -3,6 +3,7 @@
                      * Analyzing data with replicates in `PomaLimma`
                      * Select outcome factor in `PomaBoxplots`, `PomaDensity`, and `PomaOutliers`
                      * Documentation improvements
                     +* Introduces `PomaORA` and `PomaGSEA` for enrichment analysis
                      # POMA 1.14.0

R/PomaGSEA.R

History View file @ 7617d15

                     new file mode 100644
@@ -0,0 +1,32 @@
+                    +
                     +#' Gene Set Enrichment Analysis
                     +#'
                     +#' @description `PomaGSEA` performs missing value imputation on a dataset using various imputation methods.
                     +#'
                     +#' @param data A `SummarizedExperiment` object.
                     +#' @param zeros_as_na Logical. Indicates if the zeros in the data are missing values. Default is FALSE.
                     +#' @param remove_na Logical. Indicates if features with a percentage of missing values over the `cutoff` parameter should be removed. Default is TRUE.
                     +#' @param cutoff Numeric. Percentage of missing values allowed in each feature.
                     +#' @param group_by Logical. If `metadata` file is present and its first variable is a factor, it can be used to compute missing values per group and drop them accordingly. Features will be removed only if all of the groups contain more missing values than allowed. Default is TRUE.
                     +#' @param method Character. The imputation method to use. Options include "none" (no imputation, replace missing values by zeros), "half_min" (replace missing values with half of the minimum value), "median" (replace missing values with the median), "mean" (replace missing values with the mean), "min" (replace missing values with the minimum value), "knn" (replace missing values using k-nearest neighbors imputation), and "random_forest" (replace missing values using random forest imputation).
                     +#'
                     +#' @export
                     +#'
                     +#' @return A `SummarizedExperiment` object without missing values.
                     +#' @references Armitage, E. G., Godzien, J., Alonso‐Herranz, V., López‐Gonzálvez, Á., & Barbas, C. (2015). Missing value imputation strategies for metabolomics data. Electrophoresis, 36(24), 3050-3060.
                     +#' @author Pol Castellano-Escuder
                     +#'
                     +#' @importFrom magrittr %>%
                     +#'
                     +#' @examples
                     +#' data("st000336")
                     +#'
                     +#' PomaGSEA(st000336, method = "knn")
                     +PomaGSEA <- function(data) {
+                    +
                     +  ranked_data <- data %>%
                     +    as.data.frame() %>%
                     +    dplyr::select(feature = 1, rank = 2)
+                    +
                     +}
+                    +

R/PomaImpute.R

History View file @ 7617d15

@@ -35,10 +35,12 @@ PomaImpute <- function(data,
                        if (!(method %in% c("none", "half_min", "median", "mean", "min", "knn", "random_forest"))) {
                          stop("Incorrect value for method argument")
+                       }
                     -  if (missing(method)) {
                     -    message("method argument is empty. KNN will be used")
                     -  }
                     +  # if (missing(method)) {
                     +  #   message("method argument is empty. KNN will be used")
                     +  # }
                     +  n_features_raw <- length(rownames(data))
+                    +
                        to_impute <- t(SummarizedExperiment::assay(data)) %>%
                          as.data.frame()
@@ -107,9 +109,11 @@ PomaImpute <- function(data,
+                       }
                        else if (method == "knn"){
                     -    imputed_t <- t(to_impute)
                     -    imputed_res <- impute::impute.knn(imputed_t)
                     -    imputed <- t(imputed_res$data)
                     +    suppressWarnings({
                     +      imputed_t <- t(to_impute)
                     +      imputed_res <- impute::impute.knn(imputed_t)
                     +      imputed <- t(imputed_res$data)
                     +    })
+                       }
                        else if (method == "random_forest"){
@@ -128,7 +132,11 @@ PomaImpute <- function(data,
                        } else {
                          data <- SummarizedExperiment::SummarizedExperiment(assays = t(imputed))
+                       }
+                    -
+                    +
                     +  n_features_imputed <- length(rownames(data))
+                    +
                     +  message(paste0(n_features_raw - n_features_imputed, " features removed."))
+                    +
                        if (validObject(data))
                          return(data)
+                     }

R/PomaNorm.R

History View file @ 7617d15

@@ -52,7 +52,7 @@ box_cox_transformation <- function(data) {
                      #'
                      #' @param data A `SummarizedExperiment` object.
                      #' @param sample_norm Character. Sample normalization method. Options include "none" (default), "sum", or "quantile".
                     -#' @param method Character. The normalization method to use. Options include "none" (no normalization), "auto_scaling" (autoscaling normalization, i.e., Z-score normalization), "level_scaling" (level scaling normalization), "log_scaling" (log scaling normalization), "log_transform" (log transformation normalization), "vast_scaling" (vast scaling normalization), "log_pareto" (log Pareto scaling normalization), "min_max" (min-max normalization), and "box_cox" (Box-Cox transformation).
                     +#' @param method Character. The normalization method to use. Options include "none" (no normalization), "auto_scaling" (autoscaling, i.e., Z-score normalization), "level_scaling" (level scaling), "log_scaling" (log scaling), "log" (log transformation), "vast_scaling" (vast scaling), "log_pareto" (log Pareto scaling), "min_max" (min-max), and "box_cox" (Box-Cox transformation).
                      #'
                      #' @export
                      #'
@@ -71,13 +71,13 @@ PomaNorm <- function(data,
                        if(!is(data, "SummarizedExperiment")){
                          stop("data is not a SummarizedExperiment object. \nSee POMA::PomaCreateObject or SummarizedExperiment::SummarizedExperiment")
+                       }
                     -  if (!(method %in% c("none", "auto_scaling", "level_scaling", "log_scaling", "log_transform",
                     +  if (!(method %in% c("none", "auto_scaling", "level_scaling", "log_scaling", "log",
                                            "vast_scaling", "log_pareto", "min_max", "box_cox"))) {
                          stop("Incorrect value for method argument")
+                       }
                     -  if (missing(method)) {
                     -    message("method argument is empty. log Pareto will be used")
                     -  }
                     +  # if (missing(method)) {
                     +  #   message("method argument is empty. log Pareto will be used")
                     +  # }
                        to_norm <- t(SummarizedExperiment::assay(data)) %>%
                          as.data.frame()
@@ -122,11 +122,11 @@ PomaNorm <- function(data,
+                       }
                        else if (method == "log_scaling"){
                     -    normalized <- apply(to_norm, 2, function(x) (log10(x + 1) - mean(log10(x + 1), na.rm = TRUE)) / sd(log10(x + 1), na.rm = TRUE))
                     +    normalized <- apply(to_norm, 2, function(x) (log(x + 1) - mean(log(x + 1), na.rm = TRUE)) / sd(log(x + 1), na.rm = TRUE))
+                       }
                     -  else if (method == "log_transform"){
                     -    normalized <- apply(to_norm, 2, function(x) (log10(x + 1)))
                     +  else if (method == "log"){
                     +    normalized <- apply(to_norm, 2, function(x) (log(x + 1)))
+                       }
                        else if (method == "vast_scaling"){
@@ -134,7 +134,7 @@ PomaNorm <- function(data,
+                       }
                        else if (method == "log_pareto"){
                     -    normalized <- apply(to_norm, 2, function(x) (log10(x + 1) - mean(log10(x + 1), na.rm = TRUE)) / sqrt(sd(log10(x + 1), na.rm = TRUE)))
                     +    normalized <- apply(to_norm, 2, function(x) (log(x + 1) - mean(log(x + 1), na.rm = TRUE)) / sqrt(sd(log(x + 1), na.rm = TRUE)))
+                       }
                        else if (method == "min_max") {

R/PomaPCA.R

History View file @ 7617d15

@@ -73,7 +73,7 @@ PomaPCA <- function(data,
                        # eigenvalues
                        eigenvalues <- data.frame(comp = paste0("PC", 1:ncomp),
                     -                            var_exp = round(100*(((pca_res$sdev[1:ncomp]^2)) / sum(pca_res$sdev[1:ncomp]^2)), 2)) %>%
                     +                            var_exp = 100*(((pca_res$sdev[1:ncomp]^2)) / sum(pca_res$sdev[1:ncomp]^2))) %>%
                          dplyr::as_tibble()
                        # eigenvalues plot

R/PomaUnivariate.R

History View file @ 7617d15

@@ -130,8 +130,8 @@ PomaUnivariate <- function(data,
                            tibble::rownames_to_column("feature") %>%
                            dplyr::mutate(adj_pvalue = p.adjust(pvalue, method = adjust)) %>%
                            dplyr::bind_cols(group_means, group_sd) %>%
                     -      dplyr::mutate(fold_change = as.numeric(round(group_means[,2] / group_means[,1], 3)),
                     -                    diff_means = as.numeric(round(group_means[,2] - group_means[,1], 3))) %>%
                     +      dplyr::mutate(fold_change = as.numeric(group_means[,2] / group_means[,1]),
                     +                    diff_means = as.numeric(group_means[,2] - group_means[,1])) %>%
                            dplyr::select(feature, fold_change, diff_means, pvalue, adj_pvalue, dplyr::everything()) %>%
                            dplyr::arrange(pvalue) %>%
                            dplyr::as_tibble()
@@ -252,8 +252,8 @@ PomaUnivariate <- function(data,
                              tibble::rownames_to_column("feature") %>%
                              dplyr::mutate(adj_pvalue = p.adjust(pvalue, method = adjust)) %>%
                              dplyr::bind_cols(group_means, group_sd) %>%
                     -        dplyr::mutate(fold_change = as.numeric(round(group_means[,2]/group_means[,1], 3)),
                     -                      diff_means = as.numeric(round(group_means[,2] - group_means[,1], 3))) %>%
                     +        dplyr::mutate(fold_change = as.numeric(group_means[,2]/group_means[,1]),
                     +                      diff_means = as.numeric(group_means[,2] - group_means[,1])) %>%
                              dplyr::select(feature, fold_change, diff_means, pvalue, adj_pvalue, dplyr::everything()) %>%
                              dplyr::arrange(pvalue) %>%
                              dplyr::as_tibble()

tests/testthat/test-PomaNorm.R

History View file @ 7617d15

@@ -20,7 +20,7 @@ test_that("PomaNorm handles sample normalization methods correctly", {
                      test_that("PomaNorm handles different normalization methods correctly", {
                        data <- create_mock_summarized_experiment()
                     -  for (method in c("none", "auto_scaling", "level_scaling", "log_scaling", "log_transform",
                     +  for (method in c("none", "auto_scaling", "level_scaling", "log_scaling", "log",
                                         "vast_scaling", "log_pareto", "min_max", "box_cox")) {
                          normalized_data <- PomaNorm(data, method = method)
                          expect_is(normalized_data, "SummarizedExperiment")