pcastellanoescuder authored on 04/08/2024 02:02:22
Showing 7 changed files

... ...
@@ -3,6 +3,7 @@
3 3
 * Analyzing data with replicates in `PomaLimma`
4 4
 * Select outcome factor in `PomaBoxplots`, `PomaDensity`, and `PomaOutliers`
5 5
 * Documentation improvements
6
+* Introduces `PomaORA` and `PomaGSEA` for enrichment analysis
6 7
 
7 8
 # POMA 1.14.0
8 9
 
9 10
new file mode 100644
... ...
@@ -0,0 +1,32 @@
1
+
2
+#' Gene Set Enrichment Analysis
3
+#'
4
+#' @description `PomaGSEA` performs missing value imputation on a dataset using various imputation methods.
5
+#'
6
+#' @param data A `SummarizedExperiment` object.
7
+#' @param zeros_as_na Logical. Indicates if the zeros in the data are missing values. Default is FALSE.
8
+#' @param remove_na Logical. Indicates if features with a percentage of missing values over the `cutoff` parameter should be removed. Default is TRUE.
9
+#' @param cutoff Numeric. Percentage of missing values allowed in each feature.
10
+#' @param group_by Logical. If `metadata` file is present and its first variable is a factor, it can be used to compute missing values per group and drop them accordingly. Features will be removed only if all of the groups contain more missing values than allowed. Default is TRUE.
11
+#' @param method Character. The imputation method to use. Options include "none" (no imputation, replace missing values by zeros), "half_min" (replace missing values with half of the minimum value), "median" (replace missing values with the median), "mean" (replace missing values with the mean), "min" (replace missing values with the minimum value), "knn" (replace missing values using k-nearest neighbors imputation), and "random_forest" (replace missing values using random forest imputation).
12
+#'
13
+#' @export
14
+#'
15
+#' @return A `SummarizedExperiment` object without missing values.
16
+#' @references Armitage, E. G., Godzien, J., Alonso‐Herranz, V., López‐Gonzálvez, Á., & Barbas, C. (2015). Missing value imputation strategies for metabolomics data. Electrophoresis, 36(24), 3050-3060.
17
+#' @author Pol Castellano-Escuder
18
+#'
19
+#' @importFrom magrittr %>%
20
+#' 
21
+#' @examples 
22
+#' data("st000336")
23
+#' 
24
+#' PomaGSEA(st000336, method = "knn")
25
+PomaGSEA <- function(data) {
26
+  
27
+  ranked_data <- data %>% 
28
+    as.data.frame() %>% 
29
+    dplyr::select(feature = 1, rank = 2)
30
+  
31
+}
32
+
... ...
@@ -35,10 +35,12 @@ PomaImpute <- function(data,
35 35
   if (!(method %in% c("none", "half_min", "median", "mean", "min", "knn", "random_forest"))) {
36 36
     stop("Incorrect value for method argument")
37 37
   }
38
-  if (missing(method)) {
39
-    message("method argument is empty. KNN will be used")
40
-  }
38
+  # if (missing(method)) {
39
+  #   message("method argument is empty. KNN will be used")
40
+  # }
41 41
 
42
+  n_features_raw <- length(rownames(data))
43
+  
42 44
   to_impute <- t(SummarizedExperiment::assay(data)) %>% 
43 45
     as.data.frame()
44 46
   
... ...
@@ -107,9 +109,11 @@ PomaImpute <- function(data,
107 109
   }
108 110
 
109 111
   else if (method == "knn"){
110
-    imputed_t <- t(to_impute)
111
-    imputed_res <- impute::impute.knn(imputed_t)
112
-    imputed <- t(imputed_res$data)
112
+    suppressWarnings({
113
+      imputed_t <- t(to_impute)
114
+      imputed_res <- impute::impute.knn(imputed_t)
115
+      imputed <- t(imputed_res$data)
116
+    })
113 117
   }
114 118
   
115 119
   else if (method == "random_forest"){
... ...
@@ -128,7 +132,11 @@ PomaImpute <- function(data,
128 132
   } else {
129 133
     data <- SummarizedExperiment::SummarizedExperiment(assays = t(imputed))
130 134
   }
131
-    
135
+  
136
+  n_features_imputed <- length(rownames(data))
137
+  
138
+  message(paste0(n_features_raw - n_features_imputed, " features removed."))
139
+  
132 140
   if (validObject(data))
133 141
     return(data)
134 142
 }
... ...
@@ -52,7 +52,7 @@ box_cox_transformation <- function(data) {
52 52
 #'
53 53
 #' @param data A `SummarizedExperiment` object.
54 54
 #' @param sample_norm Character. Sample normalization method. Options include "none" (default), "sum", or "quantile".
55
-#' @param method Character. The normalization method to use. Options include "none" (no normalization), "auto_scaling" (autoscaling normalization, i.e., Z-score normalization), "level_scaling" (level scaling normalization), "log_scaling" (log scaling normalization), "log_transform" (log transformation normalization), "vast_scaling" (vast scaling normalization), "log_pareto" (log Pareto scaling normalization), "min_max" (min-max normalization), and "box_cox" (Box-Cox transformation).
55
+#' @param method Character. The normalization method to use. Options include "none" (no normalization), "auto_scaling" (autoscaling, i.e., Z-score normalization), "level_scaling" (level scaling), "log_scaling" (log scaling), "log" (log transformation), "vast_scaling" (vast scaling), "log_pareto" (log Pareto scaling), "min_max" (min-max), and "box_cox" (Box-Cox transformation).
56 56
 #'
57 57
 #' @export
58 58
 #'
... ...
@@ -71,13 +71,13 @@ PomaNorm <- function(data,
71 71
   if(!is(data, "SummarizedExperiment")){
72 72
     stop("data is not a SummarizedExperiment object. \nSee POMA::PomaCreateObject or SummarizedExperiment::SummarizedExperiment")
73 73
   }
74
-  if (!(method %in% c("none", "auto_scaling", "level_scaling", "log_scaling", "log_transform",
74
+  if (!(method %in% c("none", "auto_scaling", "level_scaling", "log_scaling", "log",
75 75
                       "vast_scaling", "log_pareto", "min_max", "box_cox"))) {
76 76
     stop("Incorrect value for method argument")
77 77
   }
78
-  if (missing(method)) {
79
-    message("method argument is empty. log Pareto will be used")
80
-  }
78
+  # if (missing(method)) {
79
+  #   message("method argument is empty. log Pareto will be used")
80
+  # }
81 81
 
82 82
   to_norm <- t(SummarizedExperiment::assay(data)) %>% 
83 83
     as.data.frame()
... ...
@@ -122,11 +122,11 @@ PomaNorm <- function(data,
122 122
   }
123 123
 
124 124
   else if (method == "log_scaling"){
125
-    normalized <- apply(to_norm, 2, function(x) (log10(x + 1) - mean(log10(x + 1), na.rm = TRUE)) / sd(log10(x + 1), na.rm = TRUE))
125
+    normalized <- apply(to_norm, 2, function(x) (log(x + 1) - mean(log(x + 1), na.rm = TRUE)) / sd(log(x + 1), na.rm = TRUE))
126 126
   }
127 127
 
128
-  else if (method == "log_transform"){
129
-    normalized <- apply(to_norm, 2, function(x) (log10(x + 1)))
128
+  else if (method == "log"){
129
+    normalized <- apply(to_norm, 2, function(x) (log(x + 1)))
130 130
   }
131 131
 
132 132
   else if (method == "vast_scaling"){
... ...
@@ -134,7 +134,7 @@ PomaNorm <- function(data,
134 134
   }
135 135
 
136 136
   else if (method == "log_pareto"){
137
-    normalized <- apply(to_norm, 2, function(x) (log10(x + 1) - mean(log10(x + 1), na.rm = TRUE)) / sqrt(sd(log10(x + 1), na.rm = TRUE)))
137
+    normalized <- apply(to_norm, 2, function(x) (log(x + 1) - mean(log(x + 1), na.rm = TRUE)) / sqrt(sd(log(x + 1), na.rm = TRUE)))
138 138
   }
139 139
   
140 140
   else if (method == "min_max") {
... ...
@@ -73,7 +73,7 @@ PomaPCA <- function(data,
73 73
 
74 74
   # eigenvalues
75 75
   eigenvalues <- data.frame(comp = paste0("PC", 1:ncomp),
76
-                            var_exp = round(100*(((pca_res$sdev[1:ncomp]^2)) / sum(pca_res$sdev[1:ncomp]^2)), 2)) %>% 
76
+                            var_exp = 100*(((pca_res$sdev[1:ncomp]^2)) / sum(pca_res$sdev[1:ncomp]^2))) %>% 
77 77
     dplyr::as_tibble()
78 78
   
79 79
   # eigenvalues plot
... ...
@@ -130,8 +130,8 @@ PomaUnivariate <- function(data,
130 130
       tibble::rownames_to_column("feature") %>%
131 131
       dplyr::mutate(adj_pvalue = p.adjust(pvalue, method = adjust)) %>%
132 132
       dplyr::bind_cols(group_means, group_sd) %>%
133
-      dplyr::mutate(fold_change = as.numeric(round(group_means[,2] / group_means[,1], 3)),
134
-                    diff_means = as.numeric(round(group_means[,2] - group_means[,1], 3))) %>%
133
+      dplyr::mutate(fold_change = as.numeric(group_means[,2] / group_means[,1]),
134
+                    diff_means = as.numeric(group_means[,2] - group_means[,1])) %>%
135 135
       dplyr::select(feature, fold_change, diff_means, pvalue, adj_pvalue, dplyr::everything()) %>% 
136 136
       dplyr::arrange(pvalue) %>% 
137 137
       dplyr::as_tibble()
... ...
@@ -252,8 +252,8 @@ PomaUnivariate <- function(data,
252 252
         tibble::rownames_to_column("feature") %>%
253 253
         dplyr::mutate(adj_pvalue = p.adjust(pvalue, method = adjust)) %>%
254 254
         dplyr::bind_cols(group_means, group_sd) %>%
255
-        dplyr::mutate(fold_change = as.numeric(round(group_means[,2]/group_means[,1], 3)),
256
-                      diff_means = as.numeric(round(group_means[,2] - group_means[,1], 3))) %>% 
255
+        dplyr::mutate(fold_change = as.numeric(group_means[,2]/group_means[,1]),
256
+                      diff_means = as.numeric(group_means[,2] - group_means[,1])) %>% 
257 257
         dplyr::select(feature, fold_change, diff_means, pvalue, adj_pvalue, dplyr::everything()) %>% 
258 258
         dplyr::arrange(pvalue) %>% 
259 259
         dplyr::as_tibble()
... ...
@@ -20,7 +20,7 @@ test_that("PomaNorm handles sample normalization methods correctly", {
20 20
 
21 21
 test_that("PomaNorm handles different normalization methods correctly", {
22 22
   data <- create_mock_summarized_experiment()
23
-  for (method in c("none", "auto_scaling", "level_scaling", "log_scaling", "log_transform",
23
+  for (method in c("none", "auto_scaling", "level_scaling", "log_scaling", "log",
24 24
                    "vast_scaling", "log_pareto", "min_max", "box_cox")) {
25 25
     normalized_data <- PomaNorm(data, method = method)
26 26
     expect_is(normalized_data, "SummarizedExperiment")