5 | 5 |
deleted file mode 100644 |
... | ... |
@@ -1,49 +0,0 @@ |
1 |
- |
|
2 |
-#' Automatic Exploratory Data Analysis HTML Report |
|
3 |
-#' |
|
4 |
-#' @description This function automatically generates a HTML report with different exploratory plots and tables from an `SummarizedExperiment` object. |
|
5 |
-#' |
|
6 |
-#' @param data A `SummarizedExperiment` object. |
|
7 |
-#' @param imputation Imputation method. See `?POMA::PomaImpute()`. |
|
8 |
-#' @param normalization Normalization method. See `?POMA::PomaNorm()`. |
|
9 |
-#' @param clean_outliers Logical. If it's set to TRUE, outliers will be removed from EDA. |
|
10 |
-#' @param coeff_outliers This value corresponds to the classical 1.5 in \eqn{Q3 + 1.5*IQR} formula to detect outliers. See `?POMA::PomaOutliers()`. |
|
11 |
-#' @param username Author name in the report. |
|
12 |
-#' @param institution Institution name in the report. |
|
13 |
-#' |
|
14 |
-#' @export |
|
15 |
-#' |
|
16 |
-#' @return An exploratory data analysis HTML report. |
|
17 |
-#' @author Pol Castellano-Escuder |
|
18 |
-PomaEDA <- function(data, # nocov start |
|
19 |
- imputation = "knn", |
|
20 |
- normalization = "log_pareto", |
|
21 |
- clean_outliers = TRUE, |
|
22 |
- coeff_outliers = 1.5, |
|
23 |
- username = NULL, |
|
24 |
- institution = NULL){ |
|
25 |
- |
|
26 |
- .Deprecated(new = return(NULL), msg = "This function has been deprecated") |
|
27 |
- |
|
28 |
- if (missing(data)) { |
|
29 |
- stop("data argument is empty!") |
|
30 |
- } |
|
31 |
- if(!is(data, "SummarizedExperiment")){ |
|
32 |
- stop("data is not a SummarizedExperiment object. \nSee POMA::PomaSummarizedExperiment or SummarizedExperiment::SummarizedExperiment") |
|
33 |
- } |
|
34 |
- if (!(imputation %in% c("none", "half_min", "median", "mean", "min", "knn"))) { |
|
35 |
- stop("Incorrect value for imputation argument!") |
|
36 |
- } |
|
37 |
- if (!(normalization %in% c("none", "auto_scaling", "level_scaling", "log_scaling", |
|
38 |
- "log_transformation", "vast_scaling", "log_pareto"))) { |
|
39 |
- stop("Incorrect value for normalization argument!") |
|
40 |
- } |
|
41 |
- if(!is(SummarizedExperiment::colData(data)[,1], "character") & |
|
42 |
- !is(SummarizedExperiment::colData(data)[,1], "factor")){ |
|
43 |
- stop("PomaEDA expects the first column of your target to be a factor or character. More report options coming soon...") |
|
44 |
- } |
|
45 |
- |
|
46 |
- rmarkdown::render(system.file("rmd", "POMA_EDA_report.Rmd", package = "POMA"), "html_document") |
|
47 |
- |
|
48 |
-} # nocov end |
|
49 |
- |
... | ... |
@@ -48,8 +48,7 @@ PomaLMM <- function(data, |
48 | 48 |
x = NULL, |
49 | 49 |
y = NULL, |
50 | 50 |
adjust = "fdr", |
51 |
- clean_plot = FALSE, |
|
52 |
- ...) { |
|
51 |
+ clean_plot = FALSE) { |
|
53 | 52 |
|
54 | 53 |
if (!is(data, "SummarizedExperiment")){ |
55 | 54 |
stop("data is not a SummarizedExperiment object. \nSee POMA::PomaCreateObject or SummarizedExperiment::SummarizedExperiment") |
... | ... |
@@ -35,9 +35,9 @@ PomaOddsRatio <- function(data, |
35 | 35 |
} |
36 | 36 |
if (!is.factor(SummarizedExperiment::colData(data)[,1])) { |
37 | 37 |
stop("Grouping factor must be a factor (first column of the metadata file)") |
38 |
- if (length(table(SummarizedExperiment::colData(data)[,1])[table(SummarizedExperiment::colData(data)[,1]) != 0]) != 2) { |
|
39 |
- stop("Grouping factor must have exactly 2 levels (first column of the metadata file)") |
|
40 |
- } |
|
38 |
+ } |
|
39 |
+ if (length(table(SummarizedExperiment::colData(data)[,1])[table(SummarizedExperiment::colData(data)[,1]) != 0]) != 2) { |
|
40 |
+ stop("Grouping factor must have exactly 2 levels (first column of the metadata file)") |
|
41 | 41 |
} |
42 | 42 |
if (!is.null(feature_name)) { |
43 | 43 |
if(!any(feature_name %in% rownames(SummarizedExperiment::assay(data)))) { |
... | ... |
@@ -99,7 +99,7 @@ PomaOddsRatio <- function(data, |
99 | 99 |
ggplot2::geom_point(size = 3, pch = 21, fill = "orange") + |
100 | 100 |
ggplot2::labs(x = "Odds Ratio", |
101 | 101 |
y = NULL) + |
102 |
- POMA::theme_poma() |
|
102 |
+ theme_poma() |
|
103 | 103 |
|
104 | 104 |
return(list(odds_ratio_table = odds, |
105 | 105 |
odds_ratio_plot = ORPlot)) |
... | ... |
@@ -324,7 +324,7 @@ PomaPLS <- function(data, |
324 | 324 |
test.keepX = c(1:num_features), nrepeat = nrepeat) |
325 | 325 |
|
326 | 326 |
opt_ncomp <- tune_splsda$choice.ncomp$ncomp # optimal number of components based on t-tests |
327 |
- select_keepX <- tune_splsda$choice.keepX[1:ncomp] # optimal number of variables to select |
|
327 |
+ select_keepX <- tune_splsda$choice.keepX[1:ncomp] # optimal number of variables to select |
|
328 | 328 |
|
329 | 329 |
errors_splsda_sd <- data.frame(tune_splsda$error.rate.sd) %>% |
330 | 330 |
tibble::rownames_to_column("feature_sd") %>% |
... | ... |
@@ -75,12 +75,8 @@ PomaRankProd <- function(data, |
75 | 75 |
|
76 | 76 |
one <- as.data.frame(top_rank$Table1) |
77 | 77 |
two <- as.data.frame(top_rank$Table2) |
78 |
- |
|
79 |
- if(nrow(one) == 0 & nrow(two) == 0){ |
|
80 |
- stop("No significant features found...") |
|
81 |
- } |
|
82 | 78 |
|
83 |
- if(nrow(one) != 0){ |
|
79 |
+ if (nrow(one) != 0){ |
|
84 | 80 |
|
85 | 81 |
one <- one %>% |
86 | 82 |
tibble::rownames_to_column("feature") %>% |
... | ... |
@@ -92,7 +88,7 @@ PomaRankProd <- function(data, |
92 | 88 |
colnames(one)[4] <- paste0("FC_", class1, "_", class2) |
93 | 89 |
} |
94 | 90 |
|
95 |
- if(nrow(two) != 0){ |
|
91 |
+ if (nrow(two) != 0){ |
|
96 | 92 |
|
97 | 93 |
two <- two %>% |
98 | 94 |
tibble::rownames_to_column("feature") %>% |
... | ... |
@@ -132,14 +128,14 @@ PomaRankProd <- function(data, |
132 | 128 |
|
133 | 129 |
plot1 <- ggplot2::ggplot(rp_plot, ggplot2::aes(x = rank1, y = pfp1)) + |
134 | 130 |
ggplot2::geom_point(size = 1.5, alpha=0.9) + |
135 |
- ggplot2::theme_bw() + |
|
131 |
+ theme_poma() + |
|
136 | 132 |
ggplot2::labs(x = "Number of identified features", |
137 | 133 |
y = "Estimated PFP", |
138 | 134 |
title = paste0("Up-regulated features in ", class2)) |
139 | 135 |
|
140 | 136 |
plot2 <- ggplot2::ggplot(rp_plot, ggplot2::aes(x = rank2, y = pfp2)) + |
141 | 137 |
ggplot2::geom_point(size = 1.5, alpha=0.9) + |
142 |
- ggplot2::theme_bw() + |
|
138 |
+ theme_poma() + |
|
143 | 139 |
ggplot2::labs(x = "Number of identified features", |
144 | 140 |
y = "Estimated PFP", |
145 | 141 |
title = paste0("Down-regulated features in ", class2)) |
... | ... |
@@ -148,6 +144,5 @@ PomaRankProd <- function(data, |
148 | 144 |
down_regulated = two, |
149 | 145 |
up_regulated_plot = plot1, |
150 | 146 |
down_regulated_plot = plot2)) |
151 |
- |
|
152 | 147 |
} |
153 | 148 |
|
... | ... |
@@ -48,6 +48,8 @@ PomaUMAP <- function(data, |
48 | 48 |
stop("data is not a SummarizedExperiment object. \nSee POMA::PomaCreateObject or SummarizedExperiment::SummarizedExperiment") |
49 | 49 |
} |
50 | 50 |
|
51 |
+ if (hdbscan_minpts < 2) {hdbscan_minpts <- 2} |
|
52 |
+ |
|
51 | 53 |
to_umap <- t(SummarizedExperiment::assay(data)) |
52 | 54 |
|
53 | 55 |
umap_res <- uwot::umap(to_umap, |
... | ... |
@@ -42,8 +42,7 @@ PomaVolcano <- function(data, |
42 | 42 |
log2fc_cutoff = NULL, |
43 | 43 |
labels = FALSE, |
44 | 44 |
paired = FALSE, |
45 |
- var_equal = FALSE, |
|
46 |
- ...) { |
|
45 |
+ var_equal = FALSE) { |
|
47 | 46 |
|
48 | 47 |
if(!is(data, "SummarizedExperiment")){ |
49 | 48 |
stop("data is not a SummarizedExperiment object. \nSee POMA::PomaCreateObject or SummarizedExperiment::SummarizedExperiment") |
... | ... |
@@ -43,12 +43,30 @@ make_legend <- function(fun, |
43 | 43 |
return(legend) |
44 | 44 |
} |
45 | 45 |
|
46 |
-create_mock_summarized_experiment <- function(binary = FALSE) { |
|
46 |
+create_mock_summarized_experiment <- function(binary = FALSE, paired = FALSE, integers = FALSE) { |
|
47 | 47 |
|
48 |
- if (!binary) {g_labels <- c("A", "B", "C")} else {g_labels <- c("A", "B")} |
|
48 |
+ if (!binary) { |
|
49 |
+ g_labels <- sample(c("A", "B", "C"), 20, replace = TRUE) |
|
50 |
+ } else { |
|
51 |
+ g_labels <- sample(c("A", "B"), 20, replace = TRUE) |
|
52 |
+ if (paired) { |
|
53 |
+ g_labels <- c(rep("A", 10), rep("B", 10)) |
|
54 |
+ } |
|
55 |
+ } |
|
56 |
+ |
|
57 |
+ if (integers) { |
|
58 |
+ matrix_data <- matrix(sample(1:100, 20 * 10, replace = TRUE), nrow = 20, ncol = 10) |
|
59 |
+ } else { |
|
60 |
+ matrix_data <- matrix(runif(100), nrow = 20) |
|
61 |
+ } |
|
49 | 62 |
|
50 |
- matrix_data <- matrix(runif(100), nrow = 20) |
|
51 |
- col_data <- data.frame(sample = paste0("Sample", 1:20), group = sample(g_labels, 20, replace = TRUE)) |
|
63 |
+ col_data <- data.frame(sample = paste0("Sample", 1:20), group = g_labels) |
|
52 | 64 |
PomaCreateObject(features = matrix_data, metadata = col_data) |
53 | 65 |
} |
54 | 66 |
|
67 |
+create_mock_data <- function() { |
|
68 |
+ features <- as.data.frame(matrix(runif(100), ncol = 10)) |
|
69 |
+ metadata <- data.frame(ID = 1:10, Group = factor(rep(c("A", "B"), each = 5))) |
|
70 |
+ list(features = features, metadata = metadata) |
|
71 |
+} |
|
72 |
+ |
... | ... |
@@ -16,7 +16,7 @@ output: github_document |
16 | 16 |
| _BioC_ branch | Status | Version | Dependencies | Rank | |
17 | 17 |
|- |- |- |- |- | |
18 | 18 |
| [Release](http://bioconductor.org/packages/release/bioc/html/POMA.html) | [](https://bioconductor.org/checkResults/release/bioc-LATEST/POMA/) | [](https://www.bioconductor.org/packages/POMA) | [](http://bioconductor.org/packages/release/bioc/html/POMA.html#since) | [](https://bioconductor.org/packages/stats/bioc/POMA) | |
19 |
-| [Devel](http://bioconductor.org/packages/devel/bioc/html/POMA.html) | [](https://bioconductor.org/checkResults/devel/bioc-LATEST/POMA/) | [](https://bioconductor.org/packages/devel/bioc/html/POMA.html) | [](http://bioconductor.org/packages/devel/bioc/html/POMA.html#since) | [](https://bioconductor.org/packages/stats/bioc/POMA) | |
|
19 |
+| [Devel](http://bioconductor.org/packages/devel/bioc/html/POMA.html) | [](https://bioconductor.org/checkResults/devel/bioc-LATEST/POMA/) | [](https://bioconductor.org/packages/devel/bioc/html/POMA.html) | [](http://bioconductor.org/packages/devel/bioc/html/POMA.html#since) | [](https://bioconductor.org/packages/stats/bioc/POMA) | |
|
20 | 20 |
|
21 | 21 |
<!-- badges: end --> |
22 | 22 |
|
... | ... |
@@ -18,7 +18,7 @@ v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/li |
18 | 18 |
| *BioC* branch | Status | Version | Dependencies | Rank | |
19 | 19 |
|-------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------| |
20 | 20 |
| [Release](http://bioconductor.org/packages/release/bioc/html/POMA.html) | [](https://bioconductor.org/checkResults/release/bioc-LATEST/POMA/) | [](https://www.bioconductor.org/packages/POMA) | [](http://bioconductor.org/packages/release/bioc/html/POMA.html#since) | [](https://bioconductor.org/packages/stats/bioc/POMA) | |
21 |
-| [Devel](http://bioconductor.org/packages/devel/bioc/html/POMA.html) | [](https://bioconductor.org/checkResults/devel/bioc-LATEST/POMA/) | [](https://bioconductor.org/packages/devel/bioc/html/POMA.html) | [](http://bioconductor.org/packages/devel/bioc/html/POMA.html#since) | [](https://bioconductor.org/packages/stats/bioc/POMA) | |
|
21 |
+| [Devel](http://bioconductor.org/packages/devel/bioc/html/POMA.html) | [](https://bioconductor.org/checkResults/devel/bioc-LATEST/POMA/) | [](https://bioconductor.org/packages/devel/bioc/html/POMA.html) | [](http://bioconductor.org/packages/devel/bioc/html/POMA.html#since) | [](https://bioconductor.org/packages/stats/bioc/POMA) | |
|
22 | 22 |
|
23 | 23 |
<!-- badges: end --> |
24 | 24 |
|
25 | 25 |
deleted file mode 100644 |
... | ... |
@@ -1,239 +0,0 @@ |
1 |
-title: "Exploratory Data Analysis Report" |
|
2 |
-subtitle: "Generated with POMA `r paste0('(', packageVersion('POMA'), ')')`" |
|
3 |
-author: '`r paste0(username, institution)`' |
|
4 |
-date: '`r Sys.Date()`' |
|
5 |
-output: |
|
6 |
- html_document: |
|
7 |
- toc: true |
|
8 |
- number_sections: true |
|
9 |
- |
|
10 |
-```{r, echo = FALSE, warning = FALSE, comment = NA, message = FALSE} |
|
11 |
-# This file is part of POMA. |
|
12 |
- |
|
13 |
-# POMA is free software: you can redistribute it and/or modify |
|
14 |
-# it under the terms of the GNU General Public License as published by |
|
15 |
-# the Free Software Foundation, either version 3 of the License, or |
|
16 |
-# (at your option) any later version. |
|
17 |
- |
|
18 |
-# POMA is distributed in the hope that it will be useful, |
|
19 |
-# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
20 |
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
21 |
-# GNU General Public License for more details. |
|
22 |
- |
|
23 |
-# You should have received a copy of the GNU General Public License |
|
24 |
-# along with POMA. If not, see <https://www.gnu.org/licenses/>. |
|
25 |
- |
|
26 |
-library(POMA) |
|
27 |
- |
|
28 |
-e <- t(SummarizedExperiment::assay(data)) |
|
29 |
-target <- SummarizedExperiment::colData(data) %>% |
|
30 |
- as.data.frame() %>% |
|
31 |
- tibble::rownames_to_column("ID") %>% |
|
32 |
- dplyr::rename(Group = 2) %>% |
|
33 |
- dplyr::select(ID, Group) |
|
34 |
-``` |
|
35 |
- |
|
36 |
-```{r, echo = FALSE, warning = FALSE, comment = NA, message = FALSE} |
|
37 |
-if(clean_outliers){ |
|
38 |
- imputed <- PomaImpute(data, method = imputation) |
|
39 |
- pre_processed <- PomaNorm(imputed, method = normalization) %>% |
|
40 |
- PomaOutliers(coef = coeff_outliers) |
|
41 |
-} else { |
|
42 |
- imputed <- PomaImpute(data, method = imputation) |
|
43 |
- pre_processed <- PomaNorm(imputed, method = normalization) |
|
44 |
-} |
|
45 |
-``` |
|
46 |
- |
|
47 |
-```{r, echo = FALSE, warning = FALSE, comment = NA, message = FALSE} |
|
48 |
-# zeros |
|
49 |
-zeros <- data.frame(number = colSums(e == 0, na.rm = TRUE)) %>% |
|
50 |
- tibble::rownames_to_column("names") %>% |
|
51 |
- dplyr::filter(number != 0) |
|
52 |
- |
|
53 |
-all_zero <- zeros %>% |
|
54 |
- dplyr::filter(number == nrow(e)) |
|
55 |
- |
|
56 |
-# missing values |
|
57 |
-nas <- data.frame(number = colSums(is.na(e))) %>% |
|
58 |
- tibble::rownames_to_column("names") %>% |
|
59 |
- dplyr::filter(number != 0) |
|
60 |
- |
|
61 |
-# zero variance |
|
62 |
-var_zero <- e %>% |
|
63 |
- as.data.frame() %>% |
|
64 |
- dplyr::summarise_all(~ var(., na.rm = TRUE)) %>% |
|
65 |
- t() %>% |
|
66 |
- as.data.frame() %>% |
|
67 |
- tibble::rownames_to_column("names") %>% |
|
68 |
- dplyr::filter(V1 == 0) |
|
69 |
-``` |
|
70 |
- |
|
71 |
-# Know your data |
|
72 |
- |
|
73 |
- + Your data have **`r nrow(e)`** samples, **`r ncol(e)`** features and **`r length(table(target$Group))`** groups, that are **`r noquote(paste(shQuote(levels(as.factor(target$Group))), collapse=", "))`**. `r ifelse(ncol(SummarizedExperiment::colData(data)) > 1, paste0("Furthermore, **", ncol(SummarizedExperiment::colData(data)) - 1,"** covariates have been found in your data. These covariates are **",noquote(paste(shQuote(paste0(colnames(SummarizedExperiment::colData(data))[2:ncol(SummarizedExperiment::colData(data))])), collapse=", ")),"**."), "")` |
|
74 |
- |
|
75 |
- + A **`r round((sum(is.na(e))/(nrow(e)*ncol(e)))*100, 2)`%** of values in your data are NAs (missing values). `r ifelse(nrow(nas) >= 1, paste0("Variables that have NA values are **",noquote(paste(shQuote(paste0(nas$names," (",nas$number,")")), collapse=", ")),"**."), "")` |
|
76 |
- |
|
77 |
- + A **`r round((sum(zeros$number)/(nrow(e)*ncol(e)))*100, 2)`%** of values in your data are zeros. `r ifelse(nrow(zeros) >= 1, paste0("Variables that have zeros are **",noquote(paste(shQuote(paste0(zeros$names," (",zeros$number,")")), collapse=", ")),"**."), "")` |
|
78 |
- |
|
79 |
- + Removed from the exploratory data analysis **`r nrow(all_zero)`** features that only have zeros. `r ifelse(nrow(all_zero) >= 1, paste0("These variables are **",noquote(paste(shQuote(all_zero$names), collapse=", ")),"**."), "")` |
|
80 |
- |
|
81 |
- + Removed from the exploratory data analysis **`r nrow(var_zero)`** features that have zero variance. `r ifelse(nrow(var_zero) >= 1, paste0("These variables are **",noquote(paste(shQuote(var_zero$names), collapse=", ")),"**."), "")` |
|
82 |
- |
|
83 |
-## Summary Tables |
|
84 |
- |
|
85 |
-```{r, echo = FALSE, warning = FALSE, comment = NA, message = FALSE} |
|
86 |
-summary_table1 <- data.frame(Samples = nrow(e), |
|
87 |
- Features = ncol(e), |
|
88 |
- Covariates = ncol(SummarizedExperiment::colData(data)) - 1) |
|
89 |
- |
|
90 |
-summary_table2 <- data.frame(Number_Zeros = sum(zeros$number), |
|
91 |
- Percentage_Zeros = paste(round((sum(zeros$number)/(nrow(e)*ncol(e)))*100, 2), "%")) |
|
92 |
- |
|
93 |
-summary_table3 <- data.frame(Number_Missings = sum(is.na(e)), |
|
94 |
- Percentage_Missings = paste(round((sum(is.na(e))/(nrow(e)*ncol(e)))*100, 2), "%")) |
|
95 |
- |
|
96 |
-summary_table1 |
|
97 |
-summary_table2 |
|
98 |
-summary_table3 |
|
99 |
-``` |
|
100 |
- |
|
101 |
-```{r, echo = FALSE, warning = FALSE, comment = NA, message = FALSE, dpi = 300} |
|
102 |
-if (nrow(nas) >= 1){ |
|
103 |
- ggplot2::ggplot(nas, ggplot2::aes(reorder(names, number), number, fill = number)) + |
|
104 |
- ggplot2::geom_col() + |
|
105 |
- ggplot2::labs(x = NULL, |
|
106 |
- y = "Missing values", |
|
107 |
- title = "Missing Value Plot") + |
|
108 |
- ggplot2::theme_bw() + |
|
109 |
- ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1), |
|
110 |
- legend.position = "none") + |
|
111 |
- ggplot2::scale_fill_viridis_c(begin = 0, end = 0.8) |
|
112 |
-} |
|
113 |
-``` |
|
114 |
- |
|
115 |
-```{r, echo = FALSE, warning = FALSE, comment = NA, message = FALSE, dpi = 300} |
|
116 |
-if (nrow(zeros) >= 1){ |
|
117 |
- ggplot2::ggplot(zeros, ggplot2::aes(reorder(names, number), number, fill = number)) + |
|
118 |
- ggplot2::geom_col() + |
|
119 |
- ggplot2::labs(x = NULL, |
|
120 |
- y = "Zeros", |
|
121 |
- title = "Zeros Plot") + |
|
122 |
- ggplot2::theme_bw() + |
|
123 |
- ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1), |
|
124 |
- legend.position = "none") + |
|
125 |
- ggplot2::scale_fill_viridis_c(begin = 0, end = 0.8) |
|
126 |
-} |
|
127 |
-``` |
|
128 |
- |
|
129 |
-## Samples by Group |
|
130 |
- |
|
131 |
-```{r, echo = FALSE, warning = FALSE, comment = NA, message = FALSE, dpi = 300} |
|
132 |
-counts <- data.frame(table(target$Group)) |
|
133 |
-colnames(counts) <- c("Group", "Counts") |
|
134 |
- |
|
135 |
-ggplot2::ggplot(counts, ggplot2::aes(reorder(Group, Counts), Counts, fill = Group)) + |
|
136 |
- ggplot2::geom_col() + |
|
137 |
- ggplot2::labs(x = NULL, |
|
138 |
- y = "Counts") + |
|
139 |
- ggplot2::theme_bw() + |
|
140 |
- ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1), |
|
141 |
- legend.position = "none") + |
|
142 |
- ggplot2::scale_fill_viridis_d(begin = 0, end = 0.8) |
|
143 |
-``` |
|
144 |
- |
|
145 |
-# Normalization Plots |
|
146 |
- |
|
147 |
-```{r, echo = FALSE, warning = FALSE, comment = NA, message = FALSE, dpi = 300} |
|
148 |
-indNum <- nrow(SummarizedExperiment::colData(pre_processed)) |
|
149 |
-jttr <- ifelse(indNum <= 10, TRUE, FALSE) |
|
150 |
- |
|
151 |
-p1 <- PomaBoxplots(imputed, |
|
152 |
- jitter = jttr, |
|
153 |
- label_size = 8, |
|
154 |
- legend_position = "bottom") + |
|
155 |
- ggplot2::labs(x = "Samples", |
|
156 |
- y = "Value", |
|
157 |
- title = "Not Normalized") |
|
158 |
- |
|
159 |
-p2 <- PomaBoxplots(pre_processed, |
|
160 |
- jitter = jttr, |
|
161 |
- label_size = 8, |
|
162 |
- legend_position = "bottom") + |
|
163 |
- ggplot2::labs(x = "Samples", |
|
164 |
- y = "Value", |
|
165 |
- title = paste0("Normalized (", normalization, ")")) |
|
166 |
- |
|
167 |
-p1 |
|
168 |
-p2 |
|
169 |
-``` |
|
170 |
- |
|
171 |
-# Group Distribution Plots |
|
172 |
- |
|
173 |
-```{r, echo = FALSE, warning = FALSE, comment = NA, message = FALSE, dpi = 300} |
|
174 |
-p3 <- PomaDensity(imputed) + |
|
175 |
- ggplot2::ggtitle("Not Normalized") |
|
176 |
- |
|
177 |
-p4 <- PomaDensity(pre_processed) + |
|
178 |
- ggplot2::ggtitle(paste0("Normalized (", normalization, ")")) |
|
179 |
- |
|
180 |
-p3 |
|
181 |
-p4 |
|
182 |
-``` |
|
183 |
- |
|
184 |
-# Outlier Detection |
|
185 |
- |
|
186 |
-```{r, echo = FALSE, warning = FALSE, comment = NA, message = FALSE} |
|
187 |
-outliers <- data %>% |
|
188 |
- PomaImpute(method = imputation) %>% |
|
189 |
- PomaNorm(method = normalization) %>% |
|
190 |
- PomaOutliers(do = "analyze", coef = coeff_outliers) |
|
191 |
-outliers$polygon_plot |
|
192 |
-``` |
|
193 |
- |
|
194 |
-**`r nrow(outliers$outliers)`** possible outliers detected in your data. `r ifelse(nrow(outliers$outliers) >= 1, paste0("These outliers are **",noquote(paste(shQuote(paste0(outliers$outliers$sample)), collapse=", ")),"**."), "")` |
|
195 |
- |
|
196 |
-```{r, echo = FALSE, warning = FALSE, comment = NA, message = FALSE} |
|
197 |
-if(nrow(outliers$outliers) >= 1){ |
|
198 |
- outliers$outliers |
|
199 |
- } |
|
200 |
-``` |
|
201 |
- |
|
202 |
-# High Correlated Features (r > 0.97) |
|
203 |
- |
|
204 |
-```{r, echo = FALSE, warning = FALSE, comment = NA, message = FALSE} |
|
205 |
-correlations <- PomaCorr(pre_processed, |
|
206 |
- label_size = 8) |
|
207 |
- |
|
208 |
-high_correlations <- correlations$correlations %>% |
|
209 |
- dplyr::filter(abs(corr) > 0.97) |
|
210 |
-``` |
|
211 |
- |
|
212 |
-There are **`r nrow(high_correlations)`** high correlated feature pairs in your data. `r ifelse(nrow(high_correlations) >= 1, paste0("These features are **",noquote(paste(shQuote(paste0(high_correlations$feature1, " - " , high_correlations$feature2)), collapse=", ")),"**."), "")` |
|
213 |
- |
|
214 |
-```{r, echo = FALSE, warning = FALSE, comment = NA, message = FALSE, dpi = 300, fig.align = 'center'} |
|
215 |
-correlations$corrplot |
|
216 |
-``` |
|
217 |
- |
|
218 |
-# Heatmap and Clustering |
|
219 |
- |
|
220 |
-```{r, echo = FALSE, warning = FALSE, comment = NA, message = FALSE, dpi = 300} |
|
221 |
-PomaHeatmap(pre_processed, sample_names = FALSE) |
|
222 |
-``` |
|
223 |
- |
|
224 |
-# Principal Component Analysis |
|
225 |
- |
|
226 |
-```{r, echo = FALSE, warning = FALSE, comment = NA, message = FALSE, dpi = 300} |
|
227 |
-PomaMultivariate(pre_processed, method = "pca", ellipse = FALSE)$scoresplot |
|
228 |
-``` |
|
229 |
- |
|
230 |
-# Uniform Manifold Approximation and Projection Clustering |
|
231 |
- |
|
232 |
-```{r, echo = FALSE, warning = FALSE, comment = NA, message = FALSE, dpi = 300} |
|
233 |
-PomaUMAP(pre_processed, |
|
234 |
- hdbscan_minpts = 5, |
|
235 |
- show_clusters = TRUE)$umap_plot |
|
236 |
-``` |
|
237 |
- |
238 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,40 +0,0 @@ |
1 |
-% Generated by roxygen2: do not edit by hand |
|
2 |
-% Please edit documentation in R/PomaEDA.R |
|
3 |
-\name{PomaEDA} |
|
4 |
-\alias{PomaEDA} |
|
5 |
-\title{Automatic Exploratory Data Analysis HTML Report} |
|
6 |
-\usage{ |
|
7 |
-PomaEDA( |
|
8 |
- data, |
|
9 |
- imputation = "knn", |
|
10 |
- normalization = "log_pareto", |
|
11 |
- clean_outliers = TRUE, |
|
12 |
- coeff_outliers = 1.5, |
|
13 |
- username = NULL, |
|
14 |
- institution = NULL |
|
15 |
-) |
|
16 |
-} |
|
17 |
-\arguments{ |
|
18 |
-\item{data}{A \code{SummarizedExperiment} object.} |
|
19 |
- |
|
20 |
-\item{imputation}{Imputation method. See \code{?POMA::PomaImpute()}.} |
|
21 |
- |
|
22 |
-\item{normalization}{Normalization method. See \code{?POMA::PomaNorm()}.} |
|
23 |
- |
|
24 |
-\item{clean_outliers}{Logical. If it's set to TRUE, outliers will be removed from EDA.} |
|
25 |
- |
|
26 |
-\item{coeff_outliers}{This value corresponds to the classical 1.5 in \eqn{Q3 + 1.5*IQR} formula to detect outliers. See \code{?POMA::PomaOutliers()}.} |
|
27 |
- |
|
28 |
-\item{username}{Author name in the report.} |
|
29 |
- |
|
30 |
-\item{institution}{Institution name in the report.} |
|
31 |
-} |
|
32 |
-\value{ |
|
33 |
-An exploratory data analysis HTML report. |
|
34 |
-} |
|
35 |
-\description{ |
|
36 |
-This function automatically generates a HTML report with different exploratory plots and tables from an \code{SummarizedExperiment} object. |
|
37 |
-} |
|
38 |
-\author{ |
|
39 |
-Pol Castellano-Escuder |
|
40 |
-} |
... | ... |
@@ -4,7 +4,7 @@ |
4 | 4 |
\alias{PomaLMM} |
5 | 5 |
\title{Linear Mixed Models} |
6 | 6 |
\usage{ |
7 |
-PomaLMM(data, x = NULL, y = NULL, adjust = "fdr", clean_plot = FALSE, ...) |
|
7 |
+PomaLMM(data, x = NULL, y = NULL, adjust = "fdr", clean_plot = FALSE) |
|
8 | 8 |
} |
9 | 9 |
\arguments{ |
10 | 10 |
\item{data}{A \code{SummarizedExperiment} object.} |
... | ... |
@@ -1,68 +1,42 @@ |
1 |
-context("PomaBoxplots") |
|
2 | 1 |
|
3 |
-test_that("PomaBoxplots works", { |
|
4 |
- |
|
5 |
- data("st000284") |
|
6 |
- |
|
7 |
- norm_none <- PomaNorm(st000284, method = "none") |
|
8 |
- norm_ls <- PomaNorm(st000284, method = "log_scaling") |
|
9 |
- |
|
10 |
- a <- PomaBoxplots(norm_none, label_size = 12) |
|
11 |
- b <- PomaBoxplots(norm_ls, label_size = 10) |
|
12 |
- c <- PomaBoxplots(norm_none, x = "features") |
|
13 |
- d <- PomaBoxplots(norm_ls, x = "features") |
|
14 |
- |
|
15 |
- e <- PomaBoxplots(norm_none, x = "samples") |
|
16 |
- |
|
17 |
- f <- PomaBoxplots(norm_none, x = "samples", violin = TRUE) |
|
18 |
- g <- PomaBoxplots(norm_none, x = "samples", violin = FALSE) |
|
19 |
- h <- PomaBoxplots(norm_none, x = "features", violin = TRUE) |
|
20 |
- i <- PomaBoxplots(norm_none, x = "features", violin = FALSE) |
|
21 |
- |
|
22 |
- j <- PomaBoxplots(norm_ls, x = "features", feature_name = "methyl_succinate") |
|
23 |
- k <- PomaBoxplots(norm_ls, x = "features", feature_name = c("methyl_succinate", "linoleic_acid")) |
|
24 |
- |
|
25 |
- |
|
26 |
- df_a <- ggplot2::layer_data(a) |
|
27 |
- df_b <- ggplot2::layer_data(b) |
|
28 |
- df_c <- ggplot2::layer_data(c) |
|
29 |
- df_d <- ggplot2::layer_data(d) |
|
30 |
- df_e <- ggplot2::layer_data(e) |
|
31 |
- |
|
32 |
- df_f <- ggplot2::layer_data(f) |
|
33 |
- df_g <- ggplot2::layer_data(g) |
|
34 |
- df_h <- ggplot2::layer_data(h) |
|
35 |
- df_i <- ggplot2::layer_data(i) |
|
36 |
- |
|
37 |
- df_j <- ggplot2::layer_data(j) |
|
38 |
- df_k <- ggplot2::layer_data(k) |
|
39 |
- |
|
40 |
- #### |
|
41 |
- |
|
42 |
- expect_true(min(df_a$ymin) == min(df_c$ymin)) |
|
43 |
- expect_true(min(df_b$ymin) != min(df_d$ymin)) |
|
44 |
- expect_false(all(df_a$ymin == df_b$ymin)) |
|
45 |
- expect_false(all(df_c$ymin == df_d$ymin)) |
|
46 |
- |
|
47 |
- expect_equal(df_a, df_e) |
|
48 |
- |
|
49 |
- expect_false(nrow(df_j) == nrow(df_k)) |
|
50 |
- expect_false(nrow(df_j) == nrow(df_h)) |
|
51 |
- expect_false(nrow(df_k) == nrow(df_i)) |
|
52 |
- |
|
53 |
- ## |
|
54 |
- |
|
55 |
- expect_error(PomaBoxplots(norm_ls, x = "samp")) |
|
56 |
- expect_error(PomaBoxplots(x = "sample")) |
|
57 |
- expect_error(PomaBoxplots(iris, x = "sample")) |
|
58 |
- |
|
59 |
- ## |
|
60 |
- |
|
61 |
- expect_error(PomaBoxplots(norm_ls, x = "features", feature_name = "hello")) |
|
62 |
- expect_error(PomaBoxplots(norm_ls, x = "features", feature_name = "methyl_succina")) |
|
63 |
- |
|
64 |
- expect_message(PomaBoxplots(norm_ls, x = "features", feature_name = c("methyl_succina", "linoleic_acid"))) |
|
65 |
- expect_message(PomaBoxplots(norm_ls, x = "features", feature_name = c("methyl_succinate", "linoleic_aci"))) |
|
66 |
- |
|
2 |
+test_that("PomaBoxplots handles valid SummarizedExperiment objects", { |
|
3 |
+ data <- create_mock_summarized_experiment() |
|
4 |
+ plot_samples <- PomaBoxplots(data, x = "samples") |
|
5 |
+ plot_features <- PomaBoxplots(data, x = "features") |
|
6 |
+ expect_is(plot_samples, "ggplot") |
|
7 |
+ expect_is(plot_features, "ggplot") |
|
8 |
+}) |
|
9 |
+ |
|
10 |
+test_that("PomaBoxplots stops with non-SummarizedExperiment objects", { |
|
11 |
+ data <- data.frame(matrix(runif(100), ncol = 10)) |
|
12 |
+ expect_error(PomaBoxplots(data), "data is not a SummarizedExperiment object") |
|
13 |
+}) |
|
14 |
+ |
|
15 |
+test_that("PomaBoxplots handles violin plot option correctly", { |
|
16 |
+ data <- create_mock_summarized_experiment() |
|
17 |
+ plot_violin <- PomaBoxplots(data, violin = TRUE) |
|
18 |
+ expect_is(plot_violin, "ggplot") |
|
19 |
+}) |
|
20 |
+ |
|
21 |
+test_that("PomaBoxplots stops with incorrect x argument", { |
|
22 |
+ data <- create_mock_summarized_experiment() |
|
23 |
+ expect_error(PomaBoxplots(data, x = "invalid_option"), "Incorrect value for x argument") |
|
24 |
+}) |
|
25 |
+ |
|
26 |
+test_that("PomaBoxplots handles feature_name parameter correctly", { |
|
27 |
+ data <- create_mock_summarized_experiment() |
|
28 |
+ plot_specific_feature <- PomaBoxplots(data, x = "features", feature_name = c("V2")) |
|
29 |
+ expect_is(plot_specific_feature, "ggplot") |
|
30 |
+}) |
|
31 |
+ |
|
32 |
+test_that("PomaBoxplots stops with non-existing feature names", { |
|
33 |
+ data <- create_mock_summarized_experiment() |
|
34 |
+ expect_error(PomaBoxplots(data, x = "features", feature_name = c("non_existing_feature")), "Features not found") |
|
35 |
+}) |
|
36 |
+ |
|
37 |
+test_that("PomaBoxplots applies theme parameters correctly", { |
|
38 |
+ data <- create_mock_summarized_experiment() |
|
39 |
+ plot_with_theme <- PomaBoxplots(data, theme_params = list(legend_title = TRUE)) |
|
40 |
+ expect_is(plot_with_theme, "ggplot") |
|
67 | 41 |
}) |
68 | 42 |
|
... | ... |
@@ -1,64 +1,34 @@ |
1 |
-context("PomaClust") |
|
2 | 1 |
|
3 |
-test_that("PomaClust works", { |
|
4 |
- |
|
5 |
- data("st000284") |
|
6 |
- data("st000336") |
|
7 |
- imp_st000336 <- PomaImpute(st000336, method = "knn") |
|
8 |
- |
|
9 |
- a <- PomaClust(st000284) |
|
10 |
- b <- PomaClust(imp_st000336) |
|
11 |
- |
|
12 |
- c <- PomaClust(st000284, method = "maximum", k = 5, show_clusters = FALSE, labels = TRUE) |
|
13 |
- d <- PomaClust(imp_st000336, method = "manhattan", k = 2, show_clusters = FALSE, labels = TRUE) |
|
14 |
- |
|
15 |
- e <- PomaClust(st000284, method = "canberra", k = 6, show_clusters = FALSE, labels = TRUE, show_group = TRUE) |
|
16 |
- f <- PomaClust(imp_st000336, method = "minkowski", k = 4, show_clusters = TRUE, labels = TRUE, show_group = TRUE) |
|
17 |
- |
|
18 |
- g <- PomaClust(st000284) |
|
19 |
- h <- PomaClust(st000284, k = 3) |
|
20 |
- |
|
21 |
- ## table |
|
22 |
- |
|
23 |
- expect_equal(nrow(a$mds_values), nrow(c$mds_values)) |
|
24 |
- expect_equal(nrow(b$mds_values), nrow(d$mds_values)) |
|
25 |
- expect_equal(nrow(e$mds_values), nrow(a$mds_values)) |
|
26 |
- expect_equal(nrow(f$mds_values), nrow(b$mds_values)) |
|
27 |
- |
|
28 |
- expect_equal(5, ncol(a$mds_values)) |
|
29 |
- expect_equal(5, ncol(b$mds_values)) |
|
30 |
- expect_equal(5, ncol(c$mds_values)) |
|
31 |
- expect_equal(5, ncol(d$mds_values)) |
|
32 |
- expect_equal(5, ncol(e$mds_values)) |
|
33 |
- expect_equal(5, ncol(f$mds_values)) |
|
34 |
- |
|
35 |
- ## plot |
|
36 |
- |
|
37 |
- expect_equal(class(a$mds_plot)[2], "ggplot") |
|
38 |
- expect_equal(class(b$mds_plot)[2], "ggplot") |
|
39 |
- expect_equal(class(c$mds_plot)[2], "ggplot") |
|
40 |
- expect_equal(class(d$mds_plot)[2], "ggplot") |
|
41 |
- expect_equal(class(e$mds_plot)[2], "ggplot") |
|
42 |
- expect_equal(class(f$mds_plot)[2], "ggplot") |
|
43 |
- |
|
44 |
- ## optimum clusters |
|
45 |
- |
|
46 |
- expect_equal(class(g$optimum_cluster_plot)[2], "ggplot") |
|
47 |
- expect_equal(class(h$optimum_cluster_plot)[2], "ggplot") |
|
48 |
- |
|
49 |
- # expect_equal(g$optimum_cluster_num, h$optimum_cluster_num) |
|
50 |
- |
|
51 |
- expect_equal(5, ncol(g$mds_values)) |
|
52 |
- expect_equal(5, ncol(h$mds_values)) |
|
53 |
- |
|
54 |
- expect_false(length(levels(g$mds_values$clust)) == length(levels(h$mds_values$clust))) |
|
55 |
- |
|
56 |
- ## errors |
|
57 |
- |
|
58 |
- expect_error(PomaClust()) |
|
59 |
- expect_error(PomaClust(iris)) |
|
60 |
- expect_error(PomaClust(st000284, method = "euclid")) |
|
61 |
- expect_error(PomaClust(st000284, method = "max")) |
|
62 |
- |
|
2 |
+test_that("PomaClust handles valid SummarizedExperiment objects", { |
|
3 |
+ data <- create_mock_summarized_experiment() |
|
4 |
+ result <- PomaClust(data) |
|
5 |
+ expect_is(result, "list") |
|
6 |
+ expect_true(all(c("mds_coordinates", "mds_plot", "optimal_clusters_number", "optimal_clusters_plot") %in% names(result))) |
|
7 |
+}) |
|
8 |
+ |
|
9 |
+test_that("PomaClust stops with non-SummarizedExperiment objects", { |
|
10 |
+ data <- data.frame(matrix(runif(100), ncol = 10)) |
|
11 |
+ expect_error(PomaClust(data), "data is not a SummarizedExperiment object") |
|
12 |
+}) |
|
13 |
+ |
|
14 |
+test_that("PomaClust handles different methods correctly", { |
|
15 |
+ data <- create_mock_summarized_experiment() |
|
16 |
+ for (method in c("euclidean", "maximum", "manhattan", "canberra", "minkowski")) { |
|
17 |
+ result <- PomaClust(data, method = method) |
|
18 |
+ expect_is(result, "list") |
|
19 |
+ } |
|
20 |
+}) |
|
21 |
+ |
|
22 |
+test_that("PomaClust stops with incorrect method argument", { |
|
23 |
+ data <- create_mock_summarized_experiment() |
|
24 |
+ expect_error(PomaClust(data, method = "invalid_method"), "Incorrect value for method argument") |
|
25 |
+}) |
|
26 |
+ |
|
27 |
+test_that("PomaClust handles show_clusters and labels parameters correctly", { |
|
28 |
+ data <- create_mock_summarized_experiment() |
|
29 |
+ result_with_clusters <- PomaClust(data, show_clusters = TRUE) |
|
30 |
+ result_with_labels <- PomaClust(data, labels = TRUE) |
|
31 |
+ expect_is(result_with_clusters, "list") |
|
32 |
+ expect_is(result_with_labels, "list") |
|
63 | 33 |
}) |
64 | 34 |
|
... | ... |
@@ -1,72 +1,33 @@ |
1 |
-context("PomaCorr") |
|
2 | 1 |
|
3 |
-test_that("PomaCorr works", { |
|
4 |
- |
|
5 |
- library(ggraph) |
|
6 |
- |
|
7 |
- data("st000284") |
|
8 |
- data("st000336") |
|
9 |
- imp_st000336 <- PomaImpute(st000336, method = "knn") |
|
10 |
- |
|
11 |
- a <- PomaCorr(st000284) |
|
12 |
- b <- PomaCorr(imp_st000336) |
|
13 |
- |
|
14 |
- c <- PomaCorr(st000284, corr_type = "glasso", coeff = 0.3, method = "spearman") |
|
15 |
- d <- PomaCorr(imp_st000336 , corr_type = "glasso", coeff = 0.5) |
|
16 |
- |
|
17 |
- e <- PomaCorr(st000284, corr_type = "glasso", coeff = 0.5) |
|
18 |
- f <- PomaCorr(st000284, coeff = 0.5) |
|
19 |
- |
|
20 |
- ## table |
|
21 |
- |
|
22 |
- expect_equal(((113*113)-113)/2, nrow(a$correlations)) |
|
23 |
- expect_equal(((30*30)-30)/2, nrow(b$correlations)) |
|
24 |
- expect_equal(((113*113)-113)/2, nrow(c$correlations)) |
|
25 |
- expect_equal(((30*30)-30)/2, nrow(d$correlations)) |
|
26 |
- |
|
27 |
- expect_equal(5, ncol(a$correlations)) |
|
28 |
- expect_equal(5, ncol(b$correlations)) |
|
29 |
- expect_equal(5, ncol(c$correlations)) |
|
30 |
- expect_equal(5, ncol(d$correlations)) |
|
31 |
- |
|
32 |
- expect_equal(class(e$data_glasso)[1], "tbl_df") |
|
33 |
- expect_equal(ncol(e$data_glasso), 3) |
|
34 |
- expect_equal(ncol(e$data_glasso) + 2, ncol(a$correlations)) |
|
35 |
- expect_equal(class(f$data_glasso), "NULL") |
|
36 |
- |
|
37 |
- ## corrplot |
|
38 |
- |
|
39 |
- expect_equal(class(a$corrplot)[2], "ggplot") |
|
40 |
- expect_equal(class(b$corrplot)[2], "ggplot") |
|
41 |
- expect_equal(class(c$corrplot)[2], "ggplot") |
|
42 |
- expect_equal(class(d$corrplot)[2], "ggplot") |
|
43 |
- |
|
44 |
- ## networks |
|
45 |
- |
|
46 |
- expect_true(class(a$graph)[1] == "ggraph") |
|
47 |
- expect_true(class(b$graph)[1] == "ggraph") |
|
48 |
- expect_true(class(c$graph)[1] == "ggraph") |
|
49 |
- expect_true(class(d$graph)[1] == "ggraph") |
|
50 |
- |
|
51 |
- expect_true(113 > nrow(a$graph$data)) |
|
52 |
- expect_equal(6, ncol(a$graph$data)) |
|
53 |
- |
|
54 |
- expect_true(30 > nrow(b$graph$data)) |
|
55 |
- expect_equal(6, ncol(b$graph$data)) |
|
56 |
- |
|
57 |
- expect_true(113 > nrow(c$graph$data)) |
|
58 |
- expect_true(30 > nrow(d$graph$data)) |
|
59 |
- |
|
60 |
- ## errors |
|
61 |
- |
|
62 |
- expect_error(PomaCorr()) |
|
63 |
- expect_error(PomaCorr(iris)) |
|
64 |
- expect_error(PomaCorr(st000284, corr_type = "co")) |
|
65 |
- expect_error(PomaCorr(st000284, coeff = 2)) |
|
66 |
- expect_error(PomaCorr(st000284, coeff = -0.2)) |
|
67 |
- expect_error(PomaCorr(st000284, method = "pear")) |
|
68 |
- expect_error(PomaCorr(st000284, corr_type = "cor", coeff = 1)) |
|
69 |
- expect_error(PomaCorr(st000284, corr_type = "glasso", coeff = 1)) |
|
70 |
- |
|
2 |
+test_that("PomaCorr handles valid SummarizedExperiment objects", { |
|
3 |
+ data <- create_mock_summarized_experiment() |
|
4 |
+ result <- PomaCorr(data) |
|
5 |
+ expect_is(result, "list") |
|
6 |
+ expect_true(all(c("correlations", "corrplot") %in% names(result))) |
|
7 |
+}) |
|
8 |
+ |
|
9 |
+test_that("PomaCorr stops with non-SummarizedExperiment objects", { |
|
10 |
+ data <- data.frame(matrix(runif(100), ncol = 10)) |
|
11 |
+ expect_error(PomaCorr(data), "data is not a SummarizedExperiment object") |
|
12 |
+}) |
|
13 |
+ |
|
14 |
+test_that("PomaCorr handles different methods correctly", { |
|
15 |
+ data <- create_mock_summarized_experiment() |
|
16 |
+ for (method in c("pearson", "kendall", "spearman")) { |
|
17 |
+ result <- PomaCorr(data, method = method) |
|
18 |
+ expect_is(result, "list") |
|
19 |
+ expect_true(all(c("correlations", "corrplot") %in% names(result))) |
|
20 |
+ } |
|
21 |
+}) |
|
22 |
+ |
|
23 |
+test_that("PomaCorr stops with incorrect method argument", { |
|
24 |
+ data <- create_mock_summarized_experiment() |
|
25 |
+ expect_error(PomaCorr(data, method = "invalid_method"), "Incorrect value for method argument") |
|
26 |
+}) |
|
27 |
+ |
|
28 |
+test_that("PomaCorr applies label_size and theme_params correctly", { |
|
29 |
+ data <- create_mock_summarized_experiment() |
|
30 |
+ result <- PomaCorr(data, label_size = 10, theme_params = list(base_size = 10)) |
|
31 |
+ expect_is(result, "list") |
|
71 | 32 |
}) |
72 | 33 |
|
... | ... |
@@ -1,54 +1,22 @@ |
1 |
-context("PomaCreateObject") |
|
2 | 1 |
|
3 |
-test_that("PomaCreateObject works", { |
|
4 |
- |
|
5 |
- target <- data.frame(ID = c("One", "Two", "Three", "Four"), |
|
6 |
- Group = c("Trtd", "Ctrl", "Trtd", "Ctrl"), |
|
7 |
- Smoking = c(1,0,0,1)) |
|
8 |
- |
|
9 |
- target2 <- data.frame(ID = c("Five", "One", "Three", "Two"), |
|
10 |
- Group = c("Ctrl", "Trtd", "Trtd", "Ctrl"), |
|
11 |
- Smoking = c(0,0,0,1)) |
|
12 |
- |
|
13 |
- target_error <- as.matrix(target) |
|
14 |
- |
|
15 |
- target_error_2 <- data.frame(ID = c("Five", "One", "Three", "Two"), |
|
16 |
- Group = c("Ctrl", "Trtd", "Trtd", "Ctrl"), |
|
17 |
- Smoking = c(0,0,NA,1)) |
|
18 |
- |
|
19 |
- features <- data.frame(Feat.1 = c(1,2,3,4), Feat.2 = c(6,3,7,3), Feat.3 = c(3,5,23,24)) |
|
20 |
- features_error <- data.frame(Feat.1 = c(1,2,3,4,5), Feat.2 = c(6,3,7,4,3), Feat.3 = c(3,4,5,23,24)) |
|
21 |
- |
|
22 |
- a <- PomaCreateObject(target, features) |
|
23 |
- b <- PomaCreateObject(target2, features) |
|
24 |
- |
|
25 |
- ## |
|
26 |
- |
|
27 |
- expect_true(validObject(a)) |
|
28 |
- expect_true(validObject(b)) |
|
29 |
- |
|
30 |
- ## |
|
31 |
- |
|
32 |
- expect_false(all(rownames(SummarizedExperiment::colData(a)) == rownames(SummarizedExperiment::colData(b)))) |
|
33 |
- |
|
34 |
- ## |
|
2 |
+test_that("PomaCreateObject handles features and metadata correctly", { |
|
3 |
+ mock_data <- create_mock_data() |
|
4 |
+ se_object <- PomaCreateObject(metadata = mock_data$metadata, features = mock_data$features) |
|
5 |
+ expect_is(se_object, "SummarizedExperiment") |
|
6 |
+}) |
|
35 | 7 |
|
36 |
- expect_error(PomaCreateObject(target_error, features)) |
|
37 |
- expect_error(PomaCreateObject(target, features_error)) |
|
38 |
- expect_error(PomaCreateObject(target)) |
|
39 |
- # expect_error(PomaCreateObject(features)) |
|
40 |
- |
|
41 |
- ## |
|
42 |
- |
|
43 |
- expect_false(all(colnames(features) == rownames(SummarizedExperiment::assay(a)))) |
|
44 |
- expect_false(all(colnames(target)[2:3] == names(SummarizedExperiment::colData(a)))) |
|
45 |
- expect_false(all(colnames(target2)[2:3] == names(SummarizedExperiment::colData(b)))) |
|
8 |
+test_that("PomaCreateObject handles missing metadata", { |
|
9 |
+ mock_data <- create_mock_data() |
|
10 |
+ se_object <- PomaCreateObject(features = mock_data$features) |
|
11 |
+ expect_is(se_object, "SummarizedExperiment") |
|
12 |
+}) |
|
46 | 13 |
|
47 |
- expect_true(ncol(target2) != ncol(SummarizedExperiment::colData(a))) |
|
48 |
- |
|
49 |
- ## |
|
50 |
- |
|
51 |
- expect_error(PomaCreateObject(target_error_2, features)) |
|
52 |
- |
|
14 |
+test_that("PomaCreateObject handles factor_levels parameter correctly", { |
|
15 |
+ mock_data <- create_mock_data() |
|
16 |
+ mock_data$metadata$numeric_var <- 1:10 |
|
17 |
+ se_object <- PomaCreateObject(metadata = mock_data$metadata, features = mock_data$features, factor_levels = 5) |
|
18 |
+ expect_is(se_object, "SummarizedExperiment") |
|
19 |
+ expect_true("numeric_var" %in% colnames(SummarizedExperiment::colData(se_object))) |
|
20 |
+ expect_true(is.numeric(SummarizedExperiment::colData(se_object)$numeric_var)) |
|
53 | 21 |
}) |
54 | 22 |
|
55 | 23 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,26 @@ |
1 |
+ |
|
2 |
+test_that("PomaDESeq handles valid SummarizedExperiment objects", { |
|
3 |
+ data <- create_mock_summarized_experiment(integers = TRUE) |
|
4 |
+ DESeq_results <- PomaDESeq(data) |
|
5 |
+ expect_is(DESeq_results, "tbl_df") |
|
6 |
+}) |
|
7 |
+ |
|
8 |
+test_that("PomaDESeq stops with non-SummarizedExperiment objects", { |
|
9 |
+ data <- data.frame(matrix(runif(100), ncol = 10)) |
|
10 |
+ expect_error(PomaDESeq(data), "data is not a SummarizedExperiment object") |
|
11 |
+}) |
|
12 |
+ |
|
13 |
+test_that("PomaDESeq handles different adjust methods correctly", { |
|
14 |
+ data <- create_mock_summarized_experiment(integers = TRUE) |
|
15 |
+ for (adjust_method in c("fdr", "holm", "hochberg", "hommel", "bonferroni", "BH", "BY")) { |
|
16 |
+ DESeq_results <- PomaDESeq(data, adjust = adjust_method) |
|
17 |
+ expect_is(DESeq_results, "tbl_df") |
|
18 |
+ } |
|
19 |
+}) |
|
20 |
+ |
|
21 |
+test_that("PomaDESeq requires metadata", { |
|
22 |
+ data <- create_mock_summarized_experiment(integers = TRUE) |
|
23 |
+ metadata_removed_data <- SummarizedExperiment::SummarizedExperiment(assays = SummarizedExperiment::assay(data)) |
|
24 |
+ expect_error(PomaDESeq(metadata_removed_data), "metadata file required") |
|
25 |
+}) |
|
26 |
+ |
... | ... |
@@ -1,55 +1,36 @@ |
1 |
-context("PomaDensity") |
|
2 | 1 |
|
3 |
-test_that("PomaDensity works", { |
|
4 |
- |
|
5 |
- data("st000284") |
|
6 |
- |
|
7 |
- norm_none <- PomaNorm(st000284, method = "none") |
|
8 |
- norm_ls <- PomaNorm(st000284, method = "log_scaling") |
|
9 |
- |
|
10 |
- a <- PomaDensity(norm_none) |
|
11 |
- b <- PomaDensity(norm_ls) |
|
12 |
- c <- PomaDensity(norm_none, x = "features") |
|
13 |
- d <- PomaDensity(norm_ls, x = "features") |
|
14 |
- |
|
15 |
- e <- PomaDensity(norm_none, x = "samples") |
|
16 |
- |
|
17 |
- f <- PomaDensity(norm_ls, x = "features", feature_name = "methyl_succinate") |
|
18 |
- g <- PomaDensity(norm_ls, x = "features", feature_name = c("methyl_succinate", "linoleic_acid")) |
|
19 |
- |
|
20 |
- df_a <- ggplot2::layer_data(a) |
|
21 |
- df_b <- ggplot2::layer_data(b) |
|
22 |
- df_c <- ggplot2::layer_data(c) |
|
23 |
- df_d <- ggplot2::layer_data(d) |
|
24 |
- df_e <- ggplot2::layer_data(e) |
|
25 |
- df_f <- ggplot2::layer_data(f) |
|
26 |
- df_g <- ggplot2::layer_data(g) |
|
27 |
- |
|
28 |
- #### |
|
2 |
+test_that("PomaDensity handles valid SummarizedExperiment objects", { |
|
3 |
+ data <- create_mock_summarized_experiment() |
|
4 |
+ plot_samples <- PomaDensity(data, x = "samples") |
|
5 |
+ plot_features <- PomaDensity(data, x = "features") |
|
6 |
+ expect_is(plot_samples, "ggplot") |
|
7 |
+ expect_is(plot_features, "ggplot") |
|
8 |
+}) |
|
29 | 9 |
|
30 |
- expect_false(min(df_a$y) != min(df_c$y)) |
|
31 |
- expect_false(min(df_b$y) != min(df_d$y)) |
|
32 |
- expect_false(all(df_a$y == df_b$y)) |
|
33 |
- expect_false(all(df_c$y == df_d$y)) |
|
10 |
+test_that("PomaDensity stops with non-SummarizedExperiment objects", { |
|
11 |
+ data <- data.frame(matrix(runif(100), ncol = 10)) |
|
12 |
+ expect_error(PomaDensity(data), "data is not a SummarizedExperiment object") |
|
13 |
+}) |
|
34 | 14 |
|
35 |
- expect_false(all(df_f$y == df_g$y)) |
|
36 |
- expect_false(all(df_f$y == df_d$y)) |
|
15 |
+test_that("PomaDensity stops with incorrect x argument", { |
|
16 |
+ data <- create_mock_summarized_experiment() |
|
17 |
+ expect_error(PomaDensity(data, x = "invalid_option"), "Incorrect value for x argument") |
|
18 |
+}) |
|
37 | 19 |
|
38 |
- expect_equal(df_a, df_e) |
|
20 |
+test_that("PomaDensity handles feature_name parameter correctly", { |
|
21 |
+ data <- create_mock_summarized_experiment() |
|
22 |
+ plot_specific_feature <- PomaDensity(data, x = "features", feature_name = c("V1")) |
|
23 |
+ expect_is(plot_specific_feature, "ggplot") |
|
24 |
+}) |
|
39 | 25 |
|
40 |
- expect_error(PomaDensity(norm_ls, x = "samp")) |
|
41 |
- expect_error(PomaDensity(norm_ls, x = "features", feature_name = "hello")) |
|
26 |
+test_that("PomaDensity stops with non-existing feature names", { |
|
27 |
+ data <- create_mock_summarized_experiment() |
|
28 |
+ expect_error(PomaDensity(data, x = "features", feature_name = c("non_existing_feature")), "Features not found") |
|
29 |
+}) |
|
42 | 30 |
|
43 |
- expect_error(PomaDensity(norm_ls, feature_name = "hello")) |
|
44 |
- expect_error(PomaDensity(norm_ls, feature_name = "methyl_succinat")) |
|
45 |
- |
|
46 |
- ## |
|
47 |
- |
|
48 |
- expect_error(PomaDensity(x = "sample")) |
|
49 |
- expect_error(PomaDensity(iris, x = "sample")) |
|
50 |
- |
|
51 |
- expect_message(PomaDensity(norm_ls, x = "features", feature_name = c("methyl_succinate", "linoleic_aci"))) |
|
52 |
- expect_message(PomaDensity(norm_ls, feature_name = c("methyl_succinat", "linoleic_acid"))) |
|
53 |
- |
|
31 |
+test_that("PomaDensity applies theme parameters correctly", { |
|
32 |
+ data <- create_mock_summarized_experiment() |
|
33 |
+ plot_with_theme <- PomaDensity(data, theme_params = list(legend_title = TRUE)) |
|
34 |
+ expect_is(plot_with_theme, "ggplot") |
|
54 | 35 |
}) |
55 | 36 |
|
... | ... |
@@ -1,29 +1,30 @@ |
1 |
-context("PomaHeatmap") |
|
2 | 1 |
|
3 |
-test_that("PomaHeatmap works", { |
|
4 |
- |
|
5 |
- data("st000284") |
|
6 |
- data("st000336") |
|
7 |
- |
|
8 |
- a <- PomaHeatmap(st000284, sample_names = TRUE, feature_names = FALSE, show_legend = TRUE) |
|
9 |
- b <- PomaHeatmap(st000284, sample_names = FALSE, feature_names = FALSE, show_legend = FALSE) |
|
10 |
- |
|
11 |
- c <- PomaHeatmap(st000336, sample_names = TRUE, feature_names = TRUE, show_legend = TRUE) |
|
12 |
- d <- PomaHeatmap(st000336, sample_names = FALSE, feature_names = TRUE, show_legend = FALSE) |
|
13 |
- |
|
14 |
- ## |
|
15 |
- |
|
16 |
- expect_equal(class(a), class(b)) |
|
17 |
- expect_equal(class(b), class(d)) |
|
18 |
- |
|
19 |
- expect_false(length(a@matrix) == length(d@matrix)) |
|
20 |
- |
|
21 |
- expect_equal(length(a@row_order), ncol(t(SummarizedExperiment::assay(st000284)))) |
|
22 |
- expect_equal(length(d@row_order), ncol(t(SummarizedExperiment::assay(st000336)))) |
|
23 |
- |
|
24 |
- ## |
|
25 |
- |
|
26 |
- expect_error(PomaHeatmap()) |
|
27 |
- expect_error(PomaHeatmap(iris)) |
|
28 |
- |
|
29 |
-}) |
|
30 | 2 |
\ No newline at end of file |
3 |
+test_that("PomaHeatmap handles valid SummarizedExperiment objects", { |
|
4 |
+ data <- create_mock_summarized_experiment() |
|
5 |
+ heatmap_plot <- PomaHeatmap(data) |
|
6 |
+ expect_is(heatmap_plot, "Heatmap") |
|
7 |
+}) |
|
8 |
+ |
|
9 |
+test_that("PomaHeatmap stops with non-SummarizedExperiment objects", { |
|
10 |
+ data <- data.frame(matrix(runif(100), ncol = 10)) |
|
11 |
+ expect_error(PomaHeatmap(data), "data is not a SummarizedExperiment object") |
|
12 |
+}) |
|
13 |
+ |
|
14 |
+test_that("PomaHeatmap handles covariates correctly", { |
|
15 |
+ data <- create_mock_summarized_experiment() |
|
16 |
+ heatmap_plot_with_covs <- PomaHeatmap(data, covs = c("group")) |
|
17 |
+ expect_is(heatmap_plot_with_covs, "Heatmap") |
|
18 |
+}) |
|
19 |
+ |
|
20 |
+test_that("PomaHeatmap handles sample_names and feature_names parameters correctly", { |
|
21 |
+ data <- create_mock_summarized_experiment() |
|
22 |
+ heatmap_plot_with_names <- PomaHeatmap(data, sample_names = FALSE, feature_names = TRUE) |
|
23 |
+ expect_is(heatmap_plot_with_names, "Heatmap") |
|
24 |
+}) |
|
25 |
+ |
|
26 |
+test_that("PomaHeatmap handles show_legend parameter correctly", { |
|
27 |
+ data <- create_mock_summarized_experiment() |
|
28 |
+ heatmap_plot_no_legend <- PomaHeatmap(data, show_legend = FALSE) |
|
29 |
+ expect_is(heatmap_plot_no_legend, "Heatmap") |
|
30 |
+}) |
|
31 |
+ |
... | ... |
@@ -1,170 +1,37 @@ |
1 |
-context("PomaImpute") |
|
2 | 1 |
|
3 |
-test_that("PomaImpute works", { |
|
4 |
- |
|
5 |
- data("st000284") |
|
6 |
- |
|
7 |
- data <- t(SummarizedExperiment::assay(st000284)) |
|
8 |
- |
|
9 |
- data <- data*round(runif(n = 1, min = 0.01, max = 0.99), 3) # just to create decimals |
|
10 |
- data[1:4, 5] <- 0 # create some zeros in the first group |
|
11 |
- data[73:77, 5] <- 0 # create some zeros in the second group |
|
12 |
- |
|
13 |
- data[10:14, 5] <- NA # create some NA in the first group |
|
14 |
- data[78:81, 5] <- NA # create some NA in the second group |
|
15 |
- |
|
16 |
- colnames(data) <- gsub("_", ":", colnames(data)) |
|
17 |
- |
|
18 |
- target <- SummarizedExperiment::colData(st000284) %>% |
|
19 |
- as.data.frame() %>% |
|
20 |
- tibble::rownames_to_column() |
|
21 |
- testimput <- PomaCreateObject(features = data, metadata = target) |
|
22 |
- |
|
23 |
- a <- ncol(t(SummarizedExperiment::assay(PomaImpute(testimput, method = "knn", zeros_as_na = FALSE, remove_na = FALSE, cutoff = 8)))) |
|
24 |
- b <- ncol(t(SummarizedExperiment::assay(PomaImpute(testimput, method = "knn", zeros_as_na = TRUE, remove_na = FALSE, cutoff = 8)))) |
|
25 |
- c <- ncol(t(SummarizedExperiment::assay(PomaImpute(testimput, method = "knn", zeros_as_na = FALSE, remove_na = TRUE, cutoff = 8)))) |
|
26 |
- d <- ncol(t(SummarizedExperiment::assay(PomaImpute(testimput, method = "knn", zeros_as_na = TRUE, remove_na = TRUE, cutoff = 8)))) |
|
27 |
- |
|
28 |
- e <- ncol(t(SummarizedExperiment::assay(PomaImpute(testimput, method = "knn", zeros_as_na = FALSE, remove_na = TRUE, cutoff = 20)))) |
|
29 |
- f <- ncol(t(SummarizedExperiment::assay(PomaImpute(testimput, method = "knn", zeros_as_na = FALSE, remove_na = TRUE, cutoff = 10)))) |
|
30 |
- |
|
31 |
- g <- PomaImpute(testimput, method = "half_min", zeros_as_na = FALSE, remove_na = TRUE, cutoff = 20) |
|
32 |
- h <- PomaImpute(testimput, method = "knn", zeros_as_na = FALSE, remove_na = TRUE, cutoff = 20) |
|
33 |
- |
|
34 |
- i <- PomaImpute(testimput, method = "half_min", zeros_as_na = FALSE, remove_na = FALSE, cutoff = 1) |
|
35 |
- j <- PomaImpute(testimput, method = "mean", zeros_as_na = FALSE, remove_na = FALSE, cutoff = 1) |
|
36 |
- k <- PomaImpute(testimput, method = "median", zeros_as_na = FALSE, remove_na = FALSE, cutoff = 1) |
|
37 |
- |
|
38 |
- l <- PomaImpute(testimput, method = "knn", zeros_as_na = FALSE, remove_na = TRUE, cutoff = 20) |
|
39 |
- m <- PomaImpute(testimput, method = "knn") |
|
40 |
- |
|
41 |
- n <- PomaImpute(testimput, method = "none", remove_na = FALSE, cutoff = 2) |
|
42 |
- o <- PomaImpute(testimput, method = "none", remove_na = FALSE, cutoff = 5) |
|
43 |
- p <- PomaImpute(testimput, method = "none", cutoff = 20) |
|
44 |
- q <- PomaImpute(testimput, method = "min", cutoff = 20) |
|
45 |
- |
|
46 |
- data2 <- t(SummarizedExperiment::assay(testimput)) |
|
47 |
- |
|
48 |
- data2[1:4, 5] <- 1000 |
|
49 |
- data2[73:77, 5] <- 1000 |
|
50 |
- |
|
51 |
- testimput2 <- PomaCreateObject(features = data2, target = target) |
|
52 |
- |
|
53 |
- r <- PomaImpute(testimput2, method = "half_min") |
|
54 |
- s <- PomaImpute(testimput2, method = "median") |
|
55 |
- t <- PomaImpute(testimput2, method = "mean") |
|
56 |
- u <- PomaImpute(testimput2, method = "min") |
|
57 |
- v <- PomaImpute(testimput2, method = "knn") |
|
58 |
- |
|
59 |
- SummarizedExperiment::assay(testimput)[5, 175:190] <- NA |
|
60 |
- h_1 <- PomaImpute(testimput, method = "knn", zeros_as_na = FALSE, remove_na = TRUE, cutoff = 1) |
|
61 |
- |
|
62 |
- ## |
|
63 |
- |
|
64 |
- expect_equal(testimput@NAMES[1], h@NAMES[1]) |
|
65 |
- expect_equal(length(testimput@NAMES), length(h@NAMES)) |
|
66 |
- expect_false(length(testimput@NAMES) == length(h_1@NAMES)) |
|
67 |
- |
|
68 |
- expect_equal(testimput@NAMES[1], n@NAMES[1]) |
|
69 |
- expect_equal(length(testimput@NAMES), length(n@NAMES)) |
|
70 |
- |
|
71 |
- ## |
|
72 |
- |
|
73 |
- expect_equal(a, b) |
|
74 |
- expect_equal(b, c) |
|
75 |
- expect_equal(a, c) |
|
76 |
- expect_equal(a, d) |
|
77 |
- expect_equal(b, d) |
|
78 |
- expect_equal(c, d) |
|
79 |
- |
|
80 |
- expect_equal(d, e) |
|
81 |
- expect_equal(c, e) |
|
82 |
- expect_equal(e, f) |
|
83 |
- |
|
84 |
- expect_false(all(SummarizedExperiment::assay(g) == SummarizedExperiment::assay(h))) |
|
85 |
- expect_equal(dim(g), dim(h)) |
|
86 |
- |
|
87 |
- expect_equal(dim(i), dim(j)) |
|
88 |
- expect_equal(dim(j), dim(k)) |
|
89 |
- |
|
90 |
- expect_false(all(SummarizedExperiment::assay(i) == SummarizedExperiment::assay(j))) |
|
91 |
- expect_false(all(SummarizedExperiment::assay(j) == SummarizedExperiment::assay(k))) |
|
92 |
- expect_false(all(SummarizedExperiment::assay(k) == SummarizedExperiment::assay(i))) |
|
93 |
- |
|
94 |
- expect_equal(SummarizedExperiment::assay(l), SummarizedExperiment::assay(m)) |
|
95 |
- expect_equal(SummarizedExperiment::assay(n), SummarizedExperiment::assay(o)) |
|
96 |
- expect_true(all(SummarizedExperiment::assay(p) == SummarizedExperiment::assay(q))) |
|
97 |
- |
|
98 |
- expect_equal(dim(r), dim(s)) |
|
99 |
- expect_equal(dim(s), dim(t)) |
|
100 |
- expect_equal(dim(t), dim(u)) |
|
101 |
- expect_equal(dim(u), dim(v)) |
|
102 |
- |
|
103 |
- #### |
|
104 |
- |
|
105 |
- expect_false(all(SummarizedExperiment::assay(r) == SummarizedExperiment::assay(s))) |
|
106 |
- expect_false(all(SummarizedExperiment::assay(r) == SummarizedExperiment::assay(t))) |
|
107 |
- expect_false(all(SummarizedExperiment::assay(r) == SummarizedExperiment::assay(u))) |
|
108 |
- expect_false(all(SummarizedExperiment::assay(r) == SummarizedExperiment::assay(v))) |
|
109 |
- |
|
110 |
- expect_false(all(SummarizedExperiment::assay(s) == SummarizedExperiment::assay(t))) |
|
111 |
- expect_false(all(SummarizedExperiment::assay(s) == SummarizedExperiment::assay(u))) |
|
112 |
- expect_false(all(SummarizedExperiment::assay(s) == SummarizedExperiment::assay(v))) |
|
113 |
- |
|
114 |
- expect_false(all(SummarizedExperiment::assay(t) == SummarizedExperiment::assay(u))) |
|
115 |
- expect_false(all(SummarizedExperiment::assay(t) == SummarizedExperiment::assay(v))) |
|
116 |
- |
|
117 |
- expect_false(all(SummarizedExperiment::assay(u) == SummarizedExperiment::assay(v))) |
|
2 |
+test_that("PomaImpute handles valid SummarizedExperiment objects", { |
|
3 |
+ data <- create_mock_summarized_experiment() |
|
4 |
+ imputed_data <- PomaImpute(data, method = "mean") |
|
5 |
+ expect_is(imputed_data, "SummarizedExperiment") |
|
6 |
+}) |
|
118 | 7 |
|
119 |
- #### |
|
8 |
+test_that("PomaImpute stops with non-SummarizedExperiment objects", { |
|
9 |
+ data <- data.frame(matrix(runif(100), ncol = 10)) |
|
10 |
+ expect_error(PomaImpute(data), "data is not a SummarizedExperiment object") |
|
11 |
+}) |
|
120 | 12 |
|
121 |
- expect_error(PomaImpute(testimput, method = "non")) |
|
122 |
- expect_message(PomaImpute(testimput)) |
|
123 |
- expect_message(PomaImpute(testimput2)) |
|
13 |
+test_that("PomaImpute handles zeros_as_na parameter correctly", { |
|
14 |
+ data <- create_mock_summarized_experiment() |
|
15 |
+ imputed_data <- PomaImpute(data, zeros_as_na = TRUE) |
|
16 |
+ expect_is(imputed_data, "SummarizedExperiment") |
|
17 |
+}) |
|
124 | 18 |
|
125 |
- #### |
|
126 |
- |
|
127 |
- expect_message(PomaImpute(st000284, method = "knn")) |
|
128 |
- expect_message(PomaImpute(st000284, method = "random_forest")) |
|
129 |
- |
|
130 |
- ## |
|
131 |
- |
|
132 |
- expect_error(PomaImpute(method = "knn")) |
|
133 |
- expect_error(PomaImpute(iris, method = "knn")) |
|
134 |
- |
|
19 |
+test_that("PomaImpute handles remove_na and cutoff parameters correctly", { |
|
20 |
+ data <- create_mock_summarized_experiment() |
|
21 |
+ imputed_data <- PomaImpute(data, remove_na = TRUE, cutoff = 50) |
|
22 |
+ expect_is(imputed_data, "SummarizedExperiment") |
|
135 | 23 |
}) |
136 | 24 |
|
137 |
-################################################################## |
|
138 |
-################################################################## |
|
25 |
+test_that("PomaImpute handles different imputation methods correctly", { |
|
26 |
+ data <- create_mock_summarized_experiment() |
|
27 |
+ for (method in c("none", "half_min", "median", "mean", "min", "knn")) { # "random_forest" |
|
28 |
+ imputed_data <- PomaImpute(data, method = method) |
|
29 |
+ expect_is(imputed_data, "SummarizedExperiment") |
|
30 |
+ } |
|
31 |
+}) |
|
139 | 32 |
|
140 |
-# rfImpute fails many times in virtual machines because it generates a |
|
141 |
-# huge proximity matrix that sometimes needs >2 cores to run |
|
142 |
-# |
|
143 |
-# test_that("PomaImpute works skip on Appveyor", { |
|
144 |
-# |
|
145 |
-# skip_on_appveyor() # rfImpute needs more than 2 cores to run and Appveyor only have 2 |
|
146 |
-# |
|
147 |
-# data("st000336") |
|
148 |
-# |
|
149 |
-# a_2 <- PomaImpute(st000336, method = "half_min") |
|
150 |
-# b_2 <- PomaImpute(st000336, method = "median") |
|
151 |
-# c_2 <- PomaImpute(st000336, method = "mean") |
|
152 |
-# d_2 <- PomaImpute(st000336, method = "min") |
|
153 |
-# e_2 <- PomaImpute(st000336, method = "knn") |
|
154 |
-# f_2 <- PomaImpute(st000336, method = "rf") |
|
155 |
-# |
|
156 |
-# ## |
|
157 |
-# |
|
158 |
-# expect_false(all(SummarizedExperiment::assay(a_2) == SummarizedExperiment::assay(b_2))) |
|
159 |
-# expect_false(all(SummarizedExperiment::assay(b_2) == SummarizedExperiment::assay(c_2))) |
|
160 |
-# expect_false(all(SummarizedExperiment::assay(c_2) == SummarizedExperiment::assay(d_2))) |
|
161 |
-# expect_false(all(SummarizedExperiment::assay(d_2) == SummarizedExperiment::assay(e_2))) |
|
162 |
-# expect_false(all(SummarizedExperiment::assay(e_2) == SummarizedExperiment::assay(f_2))) |
|
163 |
-# expect_false(all(SummarizedExperiment::assay(f_2) == SummarizedExperiment::assay(a_2))) |
|
164 |
-# |
|
165 |
-# ## |
|
166 |
-# |
|
167 |
-# expect_error(PomaImpute(st000284, method = "rf")) |
|
168 |
-# |
|
169 |
-# }) |
|
33 |
+test_that("PomaImpute stops with incorrect method argument", { |
|
34 |
+ data <- create_mock_summarized_experiment() |
|
35 |
+ expect_error(PomaImpute(data, method = "invalid_method"), "Incorrect value for method argument") |
|
36 |
+}) |
|
170 | 37 |
|
171 | 38 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,37 @@ |
1 |
+ |
|
2 |
+test_that("PomaLM handles valid SummarizedExperiment objects", { |
|
3 |
+ data <- create_mock_summarized_experiment() |
|
4 |
+ data$NumericVariable <- 1:20 |
|
5 |
+ lm_results <- PomaLM(data) |
|
6 |
+ expect_is(lm_results, "list") |
|
7 |
+ expect_true("lm_table" %in% names(lm_results)) |
|
8 |
+ expect_true("regression_plot" %in% names(lm_results)) |
|
9 |
+}) |
|
10 |
+ |
|
11 |
+test_that("PomaLM stops with non-SummarizedExperiment objects", { |
|
12 |
+ data <- data.frame(matrix(runif(100), ncol = 10)) |
|
13 |
+ expect_error(PomaLM(data), "data is not a SummarizedExperiment object") |
|
14 |
+}) |
|
15 |
+ |
|
16 |
+test_that("PomaLM handles specific independent variables", { |
|
17 |
+ data <- create_mock_summarized_experiment() |
|
18 |
+ data$NumericVariable <- 1:20 |
|
19 |
+ lm_results <- PomaLM(data, x = c("V1", "V2")) |
|
20 |
+ expect_is(lm_results, "list") |
|
21 |
+}) |
|
22 |
+ |
|
23 |
+test_that("PomaLM handles specific dependent variables", { |
|
24 |
+ data <- create_mock_summarized_experiment() |
|
25 |
+ data$NumericVariable <- 1:20 |
|
26 |
+ lm_results <- PomaLM(data, y = "NumericVariable") |
|
27 |
+ expect_is(lm_results, "list") |
|
28 |
+}) |
|
29 |
+ |
|
30 |
+test_that("PomaLM provides expected output structure", { |
|
31 |
+ data <- create_mock_summarized_experiment() |
|
32 |
+ data$NumericVariable <- 1:20 |
|
33 |
+ lm_results <- PomaLM(data) |
|
34 |
+ expect_is(lm_results$lm_table, "tbl_df") |
|
35 |
+ expect_true(all(c("feature", "estimate", "std_err", "statistic", "pvalue", "adj_pvalue") %in% names(lm_results$lm_table))) |
|
36 |
+}) |
|
37 |
+ |
0 | 38 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,40 @@ |
1 |
+ |
|
2 |
+test_that("PomaLMM handles valid SummarizedExperiment objects", { |
|
3 |
+ data <- create_mock_summarized_experiment() |
|
4 |
+ lmm_results <- PomaLMM(data) |
|
5 |
+ expect_is(lmm_results, "list") |
|
6 |
+ expect_true("variances" %in% names(lmm_results)) |
|
7 |
+ expect_true("variances_plot" %in% names(lmm_results)) |
|
8 |
+}) |
|
9 |
+ |
|
10 |
+test_that("PomaLMM stops with non-SummarizedExperiment objects", { |
|
11 |
+ data <- data.frame(matrix(runif(100), ncol = 10)) |
|
12 |
+ expect_error(PomaLMM(data), "data is not a SummarizedExperiment object") |
|
13 |
+}) |
|
14 |
+ |
|
15 |
+test_that("PomaLMM handles specific independent variables", { |
|
16 |
+ data <- create_mock_summarized_experiment() |
|
17 |
+ data$NumericVariable <- 1:20 |
|
18 |
+ data$FactorVariable <- factor(c(rep("s", 10), rep("d", 10))) |
|
19 |
+ lmm_results <- PomaLMM(data, x = c("NumericVariable", "FactorVariable")) |
|
20 |
+ expect_is(lmm_results, "list") |
|
21 |
+}) |
|
22 |
+ |
|
23 |
+test_that("PomaLMM handles specific dependent variables", { |
|
24 |
+ data <- create_mock_summarized_experiment() |
|
25 |
+ lmm_results <- PomaLMM(data, y = c("V1", "V2")) |
|
26 |
+ expect_is(lmm_results, "list") |
|
27 |
+}) |
|
28 |
+ |
|
29 |
+test_that("PomaLMM stops with incorrect adjustment method", { |
|
30 |
+ data <- create_mock_summarized_experiment() |
|
31 |
+ expect_error(PomaLMM(data, adjust = "invalid"), "Incorrect value for adjust argument") |
|
32 |
+}) |
|
33 |
+ |
|
34 |
+test_that("PomaLMM provides expected output structure", { |
|
35 |
+ data <- create_mock_summarized_experiment() |
|
36 |
+ lmm_results <- PomaLMM(data) |
|
37 |
+ expect_is(lmm_results$variances, "tbl_df") |
|
38 |
+ expect_true(all(c("feature", "Residual", "(Intercept)") %in% names(lmm_results$variances))) |
|
39 |
+}) |
|
40 |
+ |
... | ... |
@@ -1,55 +1,37 @@ |
1 |
-context("PomaLasso") |
|
2 | 1 |
|
3 |
-test_that("PomaLasso works", { |
|
4 |
- |
|
5 |
- data("st000336") |
|
6 |
- |
|
7 |
- normalized <- st000336 %>% |
|
8 |
- POMA::PomaImpute(method = "knn") %>% |
|
9 |
- POMA::PomaNorm(method = "log_scaling") |
|
10 |
- |
|
11 |
- normalized_test <- normalized |
|
12 |
- normalized_test_less <- normalized |
|
13 |
- SummarizedExperiment::colData(normalized_test)[,1] <- c(rep("C", 30), rep("G", 20), rep("P", 7)) |
|
14 |
- SummarizedExperiment::colData(normalized_test_less)[,1] <- "Control" |
|
15 |
- |
|
16 |
- lasso_res <- PomaLasso(normalized, alpha = 1, ntest = NULL, nfolds = 3, labels = TRUE) |
|
17 |
- ridge_res <- PomaLasso(normalized, alpha = 0, ntest = NULL, nfolds = 10) |
|
18 |
- enet_res <- PomaLasso(normalized, alpha = 0.5, ntest = NULL, nfolds = 5, labels = TRUE) |
|
19 |
- lasso_self_lambda <- PomaLasso(normalized, alpha = 1, ntest = NULL, nfolds = 10, lambda = seq(0.002, 20, length.out = 100)) |
|
20 |
- |
|
21 |
- ## TABLES |
|
22 |
- |
|
23 |
- expect_false(nrow(lasso_res$coefficients) == nrow(ridge_res$coefficients)) |
|
24 |
- expect_false(nrow(lasso_res$coefficients) == nrow(enet_res$coefficients)) |
|
25 |
- |
|
26 |
- expect_equal(ncol(lasso_res$coefficients), ncol(ridge_res$coefficients)) |
|
27 |
- expect_equal(ncol(ridge_res$coefficients), ncol(enet_res$coefficients)) |
|
28 |
- expect_equal(ncol(lasso_res$coefficients), ncol(lasso_self_lambda$coefficients)) |
|
29 |
- |
|
30 |
- ## PLOTS |
|
2 |
+test_that("PomaLasso handles valid SummarizedExperiment objects", { |
|
3 |
+ data <- create_mock_summarized_experiment(binary = TRUE) |
|
4 |
+ lasso_results <- PomaLasso(data, alpha = 1) |
|
5 |
+ expect_is(lasso_results, "list") |
|
6 |
+ expect_true(all(c("coefficients", "coefficients_plot", "cv_plot", "model") %in% names(lasso_results))) |
|
7 |
+}) |
|
31 | 8 |
|
32 |
- df_a <- ggplot2::layer_data(lasso_res$coefficientPlot) |
|
33 |
- df_b <- ggplot2::layer_data(ridge_res$coefficientPlot) |
|
34 |
- df_e <- ggplot2::layer_data(enet_res$coefficientPlot) |
|
9 |
+test_that("PomaLasso stops with non-SummarizedExperiment objects", { |
|
10 |
+ data <- data.frame(matrix(runif(100), ncol = 10)) |
|
11 |
+ expect_error(PomaLasso(data), "data is not a SummarizedExperiment object") |
|
12 |
+}) |
|
35 | 13 |
|
36 |
- df_c <- ggplot2::layer_data(lasso_res$cvLassoPlot) |
|
37 |
- df_d <- ggplot2::layer_data(ridge_res$cvLassoPlot) |
|
14 |
+test_that("PomaLasso handles different alpha values correctly", { |
|
15 |
+ data <- create_mock_summarized_experiment(binary = TRUE) |
|
16 |
+ for (alpha in c(0, 0.5, 1)) { # Testing Ridge, Elasticnet, and Lasso |
|
17 |
+ lasso_results <- PomaLasso(data, alpha = alpha) |
|
18 |
+ expect_is(lasso_results, "list") |
|
19 |
+ expect_true(all(c("coefficients", "coefficients_plot", "cv_plot", "model") %in% names(lasso_results))) |
|
20 |
+ } |
|
21 |
+}) |
|
38 | 22 |
|
39 |
- expect_false(length(df_a$y) == length(df_b$y)) |
|
40 |
- expect_false(length(df_c$y) == length(df_d$y)) |
|
41 |
- expect_false(length(df_a$y) == length(df_e$y)) |
|
23 |
+test_that("PomaLasso stops with incorrect alpha argument", { |
|
24 |
+ data <- create_mock_summarized_experiment(binary = TRUE) |
|
25 |
+ expect_error(PomaLasso(data, alpha = -1), "alpha must be a number between 0 and 1") |
|
26 |
+ expect_error(PomaLasso(data, alpha = 2), "alpha must be a number between 0 and 1") |
|
27 |
+}) |
|
42 | 28 |
|
43 |
- ## ERRORS |
|
44 |
- |
|
45 |
- expect_error(PomaLasso(normalized, alpha = 2)) |
|
46 |
- expect_error(PomaLasso(normalized, alpha = -0.5)) |
|
47 |
- expect_error(PomaLasso(iris, alpha = 1)) |
|
48 |
- expect_error(PomaLasso(normalized_test, alpha = 1)) |
|
49 |
- expect_error(PomaLasso(normalized_test_less, alpha = 1)) |
|
50 |
- expect_error(PomaLasso()) |
|
51 |
- expect_error(PomaLasso(normalized, ntest = 60)) |
|
52 |
- expect_error(PomaLasso(normalized, ntest = 2)) |
|
53 |
- |
|
29 |
+test_that("PomaLasso handles ntest and nfolds parameters correctly", { |
|
30 |
+ data <- create_mock_summarized_experiment(binary = TRUE) |
|
31 |
+ lasso_results_with_ntest <- PomaLasso(data, ntest = 10) |
|
32 |
+ lasso_results_with_nfolds <- PomaLasso(data, nfolds = 5) |
|
33 |
+ expect_is(lasso_results_with_ntest, "list") |
|
34 |
+ expect_true("confusion_matrix" %in% names(lasso_results_with_ntest)) |
|
35 |
+ expect_is(lasso_results_with_nfolds, "list") |
|
54 | 36 |
}) |
55 | 37 |
|
... | ... |
@@ -1,46 +1,27 @@ |
1 |
-context("PomaLimma") |
|
2 | 1 |
|
3 |
-test_that("PomaLimma works", { |
|
4 |
- |
|
5 |
- data("st000284") |
|
6 |
- data("st000336") |
|
7 |
- |
|
8 |
- a <- PomaLimma(st000284, contrast = "CRC-Polyp", covariates = FALSE, adjust = "fdr") |
|
9 |
- b <- PomaLimma(st000284, contrast = "CRC-Polyp", covariates = FALSE, adjust = "bonferroni") |
|
10 |
- c <- PomaLimma(st000284, contrast = "CRC-Polyp", covariates = TRUE, adjust = "fdr") |
|
11 |
- d <- PomaLimma(st000284, contrast = "CRC-Polyp", covariates = TRUE, adjust = "bonferroni") |
|
12 |
- |
|
13 |
- e <- PomaLimma(st000336, contrast = "Controls-DMD", covariates = FALSE, adjust = "fdr") |
|
14 |
- f <- PomaLimma(st000336, contrast = "Controls-DMD", covariates = TRUE, adjust = "fdr") |
|
15 |
- |
|
16 |
- #### |
|
17 |
- |
|
18 |
- expect_error(PomaLimma(st000284, covariates = FALSE, adjust = "fdr")) |
|
19 |
- expect_error(PomaLimma(st000284, contrast = NULL)) |
|
20 |
- |
|
21 |
- #### |
|
22 |
- |
|
23 |
- expect_equal(dim(a), dim(b)) |
|
24 |
- expect_equal(dim(b), dim(c)) |
|
25 |
- expect_equal(dim(c), dim(d)) |
|
26 |
- |
|
27 |
- expect_false(all(a == b)) |
|
28 |
- expect_false(all(a == c)) |
|
2 |
+test_that("PomaLimma handles valid SummarizedExperiment objects", { |
|
3 |
+ data <- create_mock_summarized_experiment() |
|
4 |
+ limma_results <- PomaLimma(data, contrast = "A-B") |
|
5 |
+ expect_is(limma_results, "tbl_df") |
|
6 |
+}) |
|
29 | 7 |
|
30 |
- expect_false(all(b == d)) |
|
8 |
+test_that("PomaLimma stops with non-SummarizedExperiment objects", { |
|
9 |
+ data <- data.frame(matrix(runif(100), ncol = 10)) |
|
10 |
+ expect_error(PomaLimma(data, contrast = "A-B"), "data is not a SummarizedExperiment object") |
|
11 |
+}) |
|
31 | 12 |
|
32 |
- expect_equal(dim(e), dim(f)) |
|
33 |
- |
|
34 |
- #### |
|
13 |
+test_that("PomaLimma handles different contrasts correctly", { |
|
14 |
+ data <- create_mock_summarized_experiment() |
|
15 |
+ contrast <- levels(SummarizedExperiment::colData(data)[,1]) |
|
16 |
+ limma_results <- PomaLimma(data, contrast = paste(contrast[1], contrast[2], sep = "-")) |
|
17 |
+ expect_is(limma_results, "tbl_df") |
|
18 |
+}) |
|
35 | 19 |
|
36 |
- SummarizedExperiment::colData(st000284) <- SummarizedExperiment::colData(st000284)[1] |
|
37 |
- expect_error(PomaLimma(st000284, contrast = "CRC-Polyp", covariates = TRUE, adjust = "fdr")) |
|
38 |
- expect_error(PomaLimma(st000284, contrast = "CRC-Polyp", covariates = TRUE, adjust = "fd")) |
|
39 |
- |
|
40 |
- ## |
|
41 |
- |
|
42 |
- expect_error(PomaLimma(contrast = "CRC-Polyp")) |
|
43 |
- expect_error(PomaLimma(iris, contrast = "CRC-Polyp")) |
|
44 |
- |
|
20 |
+test_that("PomaLimma handles parameters correctly", { |
|
21 |
+ data <- create_mock_summarized_experiment() |
|
22 |
+ limma_results_with_covs <- PomaLimma(data, contrast = "A-B") |
|
23 |
+ limma_results_adjusted <- PomaLimma(data, contrast = "A-B", adjust = "holm") |
|
24 |
+ expect_is(limma_results_with_covs, "tbl_df") |
|
25 |
+ expect_is(limma_results_adjusted, "tbl_df") |
|
45 | 26 |
}) |
46 | 27 |
|
47 | 28 |
deleted file mode 100644 |
... | ... |
@@ -1,112 +0,0 @@ |
1 |
-context("PomaMultivariate") |
|
2 |
- |
|
3 |
-test_that("PomaMultivariate works", { |
|
4 |
- |
|
5 |
- data("st000284") |
|
6 |
- |
|
7 |
- #### PCA |
|
8 |
- |
|
9 |
- multivariate_pca_1 <- PomaMultivariate(st000284, method = "pca", components = 4, |
|
10 |
- center = FALSE, scale = FALSE, labels = TRUE) |
|
11 |
- |
|
12 |
- multivariate_pca_2 <- PomaMultivariate(st000284, method = "pca", components = 5, |
|
13 |
- center = TRUE, scale = TRUE) |
|
14 |
- |
|
15 |
- ## |
|
16 |
- |
|
17 |
- expect_equal(nrow(multivariate_pca_1$scores), nrow(multivariate_pca_2$scores)) |
|
18 |
- expect_false(ncol(multivariate_pca_1$scores) == ncol(multivariate_pca_2$scores)) |
|
19 |
- expect_equal(ncol(multivariate_pca_1$scores), 4) |
|
20 |
- expect_equal(ncol(multivariate_pca_2$scores), 5) |
|
21 |
- |
|
22 |
- expect_equal(ncol(multivariate_pca_1$eigenvalues), ncol(multivariate_pca_2$eigenvalues)) |
|
23 |
- expect_false(nrow(multivariate_pca_1$eigenvalues) == nrow(multivariate_pca_2$eigenvalues)) |
|
24 |
- |
|
25 |
- ## |
|
26 |
- |
|
27 |
- df_a <- ggplot2::layer_data(multivariate_pca_1$screeplot) |
|
28 |
- df_b <- ggplot2::layer_data(multivariate_pca_1$scoresplot) |
|
29 |
- |
|
30 |
- df_c <- ggplot2::layer_data(multivariate_pca_2$screeplot) |
|
31 |
- df_d <- ggplot2::layer_data(multivariate_pca_2$scoresplot) |
|
32 |
- |
|
33 |
- expect_false(length(df_a$y) == length(df_c$y)) |
|
34 |
- expect_false(length(df_b$y) == length(df_d$y)) |
|
35 |
- |
|
36 |
- #### PLSDA |
|
37 |
- |
|
38 |
- multivariate_plsda_1 <- PomaMultivariate(st000284, method = "plsda", components = 3, |
|
39 |
- center = TRUE, scale = TRUE, |
|
40 |
- validation = "Mfold", folds = 5, nrepeat = 10, labels = TRUE) |
|
41 |
- |
|
42 |
- multivariate_plsda_2 <- PomaMultivariate(st000284, method = "plsda", components = 4, |
|
43 |
- center = TRUE, scale = TRUE, |
|
44 |
- validation = "loo", folds = 5, nrepeat = 1, vip = 1) |
|
45 |
- |
|
46 |
- ## |
|
47 |
- |
|
48 |
- expect_equal(ncol(multivariate_plsda_1$errors_plsda), ncol(multivariate_plsda_2$errors_plsda)) |
|
49 |
- expect_false(nrow(multivariate_plsda_1$errors_plsda) == nrow(multivariate_plsda_2$errors_plsda)) |
|
50 |
- |
|
51 |
- expect_false(ncol(multivariate_plsda_1$vip_plsda) == ncol(multivariate_plsda_2$vip_plsda)) |
|
52 |
- expect_equal(nrow(multivariate_plsda_1$vip_plsda), nrow(multivariate_plsda_2$vip_plsda)) |
|
53 |
- |
|
54 |
- expect_false(ncol(multivariate_plsda_1$scores) == ncol(multivariate_plsda_2$scores)) |
|
55 |
- |
|
56 |
- ## |
|
57 |
- |
|
58 |
- df_a <- ggplot2::layer_data(multivariate_plsda_1$scoresplot) |
|
59 |
- df_b <- ggplot2::layer_data(multivariate_plsda_1$errors_plsda_plot) |
|
60 |
- df_c <- ggplot2::layer_data(multivariate_plsda_1$vip_plsda_plot) |
|
61 |
- |
|
62 |
- df_d <- ggplot2::layer_data(multivariate_plsda_2$scoresplot) |
|
63 |
- df_e <- ggplot2::layer_data(multivariate_plsda_2$errors_plsda_plot) |
|
64 |
- df_f <- ggplot2::layer_data(multivariate_plsda_2$vip_plsda_plot) |
|
65 |
- |
|
66 |
- expect_false(ncol(df_a) == ncol(df_d)) |
|
67 |
- expect_equal(ncol(df_b$y), ncol(df_e$y)) |
|
68 |
- expect_false(length(df_c$y) == length(df_f$y)) |
|
69 |
- |
|
70 |
- #### SPLSDA |
|
71 |
- |
|
72 |
- multivariate_splsda_1 <- PomaMultivariate(st000284, method = "splsda", components = 3, |
|
73 |
- center = TRUE, scale = TRUE, |
|
74 |
- validation = "Mfold", folds = 5, nrepeat = 10, |
|
75 |
- num_features = 10, labels = TRUE) |
|
76 |
- |
|
77 |
- multivariate_splsda_2 <- PomaMultivariate(st000284, method = "splsda", components = 4, |
|
78 |
- center = TRUE, scale = TRUE, |
|
79 |
- validation = "Mfold", folds = 5, nrepeat = 10, |
|
80 |
- num_features = 5) |
|
81 |
- |
|
82 |
- ## |
|
83 |
- |
|
84 |
- expect_false(nrow(multivariate_splsda_1$selected_variables) == |
|
85 |
- nrow(multivariate_splsda_2$selected_variables)) |
|
86 |
- expect_false(nrow(multivariate_splsda_1$errors_splsda) == |
|
87 |
- nrow(multivariate_splsda_2$errors_splsda)) |
|
88 |
- |
|
89 |
- expect_true(is.numeric(multivariate_splsda_1$ncomp)) |
|
90 |
- expect_true(is.numeric(multivariate_splsda_2$ncomp)) |
|
91 |
- |
|
92 |
- df_a <- ggplot2::layer_data(multivariate_splsda_1$errors_splsda_plot) |
|
93 |
- df_b <- ggplot2::layer_data(multivariate_splsda_2$errors_splsda_plot) |
|
94 |
- df_c <- ggplot2::layer_data(multivariate_splsda_1$scoresplot) |
|
95 |
- df_d <- ggplot2::layer_data(multivariate_splsda_2$scoresplot) |
|
96 |
- |
|
97 |
- expect_false(nrow(df_a) == nrow(df_b)) |
|
98 |
- expect_false(length(df_c$y) == length(df_d$y)) |
|
99 |
- |
|
100 |
- ## ERRORS AND WARNINGS |
|
101 |
- |
|
102 |
- expect_error(PomaMultivariate(method = "splsda")) |
|
103 |
- expect_error(PomaMultivariate(iris, method = "splsda")) |
|
104 |
- expect_error(PomaMultivariate(st000284, method = "pc", components = 5)) |
|
105 |
- expect_error(PomaMultivariate(st000284)) |
|
106 |
- expect_error(PomaMultivariate(st000284, method = "plsda", validation = "Mfo")) |
|
107 |
- expect_message(PomaMultivariate(st000284, method = "plsda")) |
|
108 |
- expect_error(PomaMultivariate(st000284, method = "pca", load_length = 2.1)) |
|
109 |
- expect_error(PomaMultivariate(st000284, method = "pca", load_length = 0.9)) |
|
110 |
- |
|
111 |
-}) |
|
112 |
- |
... | ... |
@@ -1,53 +1,34 @@ |
1 |
-context("PomaNorm") |
|
2 | 1 |
|
3 |
-test_that("PomaNorm works", { |
|
4 |
- |
|
5 |
- data("st000284") |
|
6 |
- |
|
7 |
- data <- t(SummarizedExperiment::assay(st000284)) |
|
8 |
- |
|
9 |
- data <- data*round(runif(n=1, min = 0.01, max = 0.99), 3) # just to create decimals |
|
10 |
- data[1:4, 5] <- 0 # create some zeros in one group |
|
11 |
- data[73:77, 5] <- 0 # create some zeros in the other group |
|
12 |
- |
|
13 |
- data[10:14, 5] <- NA # create some NA in one group (5/66 = 7.6% of NA) |
|
14 |
- data[78:81, 5] <- NA # create some NA in the other group (4/66 = 6.1% of NA) |
|
15 |
- |
|
16 |
- data[,1] <- 0 # create column of only zeros |
|
17 |
- data[,2] <- 100 # create feature with var = 0 |
|
18 |
- |
|
19 |
- target <- SummarizedExperiment::colData(st000284) %>% |
|
20 |
- as.data.frame() %>% |
|
21 |
- tibble::rownames_to_column() |
|
22 |
- |
|
23 |
- testnorm <- POMA::PomaCreateObject(features = data, metadata = target) |
|
24 |
- |
|
25 |
- newdata <- POMA::PomaImpute(testnorm, method = "knn", zeros_as_na = FALSE, remove_na = TRUE, cutoff = 2) |
|
26 |
- newdata2 <- POMA::PomaNorm(newdata, method = "log_pareto") |
|
27 |
- |
|
28 |
- #### |
|
2 |
+test_that("PomaNorm handles valid SummarizedExperiment objects", { |
|
3 |
+ data <- create_mock_summarized_experiment() |
|
4 |
+ normalized_data <- PomaNorm(data, method = "auto_scaling") |
|
5 |
+ expect_is(normalized_data, "SummarizedExperiment") |
|
6 |
+}) |
|
29 | 7 |
|
30 |
- a <- dim(PomaNorm(newdata, method = "auto_scaling")) |
|
31 |
- b <- dim(PomaNorm(newdata, method = "level_scaling")) |
|
32 |
- c <- dim(PomaNorm(newdata, method = "log_scaling")) |
|
33 |
- d <- dim(PomaNorm(newdata, method = "log_transform")) |
|
34 |
- e <- dim(PomaNorm(newdata, method = "vast_scaling")) |
|
35 |
- f <- dim(PomaNorm(newdata, method = "log_pareto")) |
|
36 |
- g <- dim(PomaNorm(newdata, method = "none")) |
|
8 |
+test_that("PomaNorm stops with non-SummarizedExperiment objects", { |
|
9 |
+ data <- data.frame(matrix(runif(100), ncol = 10)) |
|
10 |
+ expect_error(PomaNorm(data), "data is not a SummarizedExperiment object") |
|
11 |
+}) |
|
37 | 12 |
|
38 |
- expect_equal(a, b) |
|
39 |
- expect_equal(b, c) |
|
40 |
- expect_equal(c, d) |
|
41 |
- expect_equal(e, f) |
|
42 |
- expect_equal(f, g) |
|
13 |
+test_that("PomaNorm handles sample normalization methods correctly", { |
|
14 |
+ data <- create_mock_summarized_experiment() |
|
15 |
+ normalized_data_sum <- PomaNorm(data, sample_norm = "sum") |
|
16 |
+ normalized_data_quantile <- PomaNorm(data, sample_norm = "quantile") |
|
17 |
+ expect_is(normalized_data_sum, "SummarizedExperiment") |
|
18 |
+ expect_is(normalized_data_quantile, "SummarizedExperiment") |
|
19 |
+}) |
|
43 | 20 |
|
44 |
- expect_error(PomaNorm(newdata, method = "log")) |
|
45 |
- expect_message(PomaNorm(newdata)) |
|
21 |
+test_that("PomaNorm handles different normalization methods correctly", { |
|
22 |
+ data <- create_mock_summarized_experiment() |
|
23 |
+ for (method in c("none", "auto_scaling", "level_scaling", "log_scaling", "log_transform", |
|
24 |
+ "vast_scaling", "log_pareto", "min_max", "box_cox")) { |
|
25 |
+ normalized_data <- PomaNorm(data, method = method) |
|
26 |
+ expect_is(normalized_data, "SummarizedExperiment") |
|
27 |
+ } |
|
28 |
+}) |
|
46 | 29 |
|
47 |
- ## |
|
48 |
- |
|
49 |
- expect_error(PomaNorm(method = "auto_scaling")) |
|
50 |
- expect_error(PomaNorm(iris, method = "auto_scaling")) |
|
51 |
- |
|
30 |
+test_that("PomaNorm stops with incorrect normalization method argument", { |
|
31 |
+ data <- create_mock_summarized_experiment() |
|
32 |
+ expect_error(PomaNorm(data, method = "invalid_method"), "Incorrect value for method argument") |
|
52 | 33 |
}) |
53 | 34 |
|
... | ... |
@@ -1,82 +1,34 @@ |
1 |
-context("PomaOddsRatio") |
|
2 | 1 |
|
3 |
-test_that("PomaOddsRatio works", { |
|
4 |
- |
|
5 |
- data("st000336") |
|
6 |
- |
|
7 |
- imputed <- POMA::PomaImpute(st000336, method = "knn") |
|
8 |
- |
|
9 |
- norm_none <- PomaNorm(imputed, method = "none") |
|
10 |
- norm_ls <- PomaNorm(imputed, method = "log_scaling") |
|
11 |
- |
|
12 |
- a <- PomaOddsRatio(norm_none, feature_name = c("glutamic_acid", "glutamine", "glycine", "histidine", "isoleucine", "leucine", "lysine"))$OddsRatioPlot |
|
13 |
- b <- PomaOddsRatio(norm_ls, feature_name = c("glutamic_acid", "glutamine", "glycine", "histidine", "isoleucine", "leucine", "lysine"))$OddsRatioPlot |
|
14 |
- |
|
15 |
- c <- PomaOddsRatio(norm_ls, feature_name = "glutamic_acid")$OddsRatioPlot |
|
16 |
- d <- PomaOddsRatio(norm_ls, feature_name = c("glutamic_acid", "arginine"))$OddsRatioPlot |
|
17 |
- |
|
18 |
- df_a <- ggplot2::layer_data(a) |
|
19 |
- df_b <- ggplot2::layer_data(b) |
|
20 |
- |
|
21 |
- ## |
|
22 |
- |
|
23 |
- expect_equal(nrow(df_a), nrow(df_b)) |
|
24 |
- |
|
25 |
- ## |
|
26 |
- |
|
27 |
- e <- PomaOddsRatio(norm_none)$OddsRatioTable |
|
28 |
- f <- PomaOddsRatio(norm_ls)$OddsRatioTable |
|
29 |
- g <- PomaOddsRatio(norm_ls, feature_name = "glutamic_acid")$OddsRatioTable |
|
30 |
- h <- PomaOddsRatio(norm_ls, feature_name = c("glutamic_acid", "arginine"))$OddsRatioTable |
|
31 |
- h_1 <- PomaOddsRatio(norm_ls, feature_name = c("glutamic_acid", "arginine"), covariates = TRUE)$OddsRatioTable |
|
32 |
- |
|
33 |
- ## |
|
34 |
- |
|
35 |
- expect_equal(dim(e), dim(f)) |
|
36 |
- expect_false(nrow(f) == nrow(g)) |
|
37 |
- expect_false(nrow(g) == nrow(h)) |
|
38 |
- expect_false(nrow(h) == nrow(h_1)) |
|
39 |
- |
|
40 |
- ## |
|
41 |
- |
|
42 |
- i <- PomaOddsRatio(norm_ls, feature_name = NULL, covariates = TRUE)$OddsRatioTable |
|
43 |
- j <- PomaOddsRatio(norm_ls, feature_name = NULL, covariates = FALSE)$OddsRatioTable |
|
44 |
- |
|
45 |
- ## |
|
46 |
- |
|
47 |
- expect_false(nrow(i) == nrow(j)) |
|
48 |
- |
|
49 |
- ## |
|
50 |
- |
|
51 |
- k <- PomaOddsRatio(norm_ls, feature_name = c("glutamic_acid", "glutamine", "glycine", "histidine", "isoleucine", "leucine", "lysine"))$OddsRatioPlot |
|
52 |
- l <- PomaOddsRatio(norm_ls, feature_name = c("glutamic_acid", "glutamine", "glycine", "histidine", "isoleucine", "leucine", "lysine"), showCI = FALSE)$OddsRatioPlot |
|
53 |
- m <- PomaOddsRatio(norm_ls, feature_name = c("glutamic_acid", "glutamine", "glycine", "histidine", "isoleucine", "leucine", "lysine"), showCI = FALSE, covariates = TRUE)$OddsRatioPlot |
|
54 |
- |
|
55 |
- df_k <- ggplot2::layer_data(k) |
|
56 |
- df_l <- ggplot2::layer_data(l) |
|
57 |
- df_m <- ggplot2::layer_data(m) |
|
58 |
- |
|
59 |
- ## |
|
60 |
- |
|
61 |
- expect_equal(nrow(df_k), nrow(df_l)) |
|
62 |
- |
|
63 |
- ## |
|
2 |
+test_that("PomaOddsRatio handles valid SummarizedExperiment objects", { |
|
3 |
+ data <- create_mock_summarized_experiment(binary = TRUE) |
|
4 |
+ odds_ratio_results <- PomaOddsRatio(data) |
|
5 |
+ expect_is(odds_ratio_results, "list") |
|
6 |
+ expect_true(all(c("odds_ratio_table", "odds_ratio_plot") %in% names(odds_ratio_results))) |
|
7 |
+}) |
|
64 | 8 |
|
65 |
- expect_error(PomaOddsRatio(norm_ls, feature_name = "hello")) |
|
66 |
- expect_error(PomaOddsRatio(norm_ls, feature_name = "glutamic_aci")) |
|
67 |
- |
|
68 |
- expect_warning(PomaOddsRatio(norm_ls, feature_name = c("glutamic_aci", "arginine"))) |
|
69 |
- expect_warning(PomaOddsRatio(norm_ls, feature_name = c("glutamic_aci", "arginine", "glutamine"))) |
|
9 |
+test_that("PomaOddsRatio stops with non-SummarizedExperiment objects", { |
|
10 |
+ data <- data.frame(matrix(runif(100), ncol = 10)) |
|
11 |
+ expect_error(PomaOddsRatio(data), "data is not a SummarizedExperiment object") |
|
12 |
+}) |
|
70 | 13 |
|
71 |
- ## |
|
14 |
+test_that("PomaOddsRatio handles specific feature names correctly", { |
|
15 |
+ data <- create_mock_summarized_experiment(binary = TRUE) |
|
16 |
+ feature_name <- rownames(SummarizedExperiment::assay(data))[1:2] |
|
17 |
+ odds_ratio_results <- PomaOddsRatio(data, feature_name = feature_name) |
|
18 |
+ expect_is(odds_ratio_results, "list") |
|
19 |
+ expect_true(all(feature_name %in% odds_ratio_results$odds_ratio_table$feature)) |
|
20 |
+}) |
|
72 | 21 |
|
73 |
- SummarizedExperiment::colData(imputed) <- SummarizedExperiment::colData(imputed)[1] |
|
74 |
- expect_error(PomaOddsRatio(imputed, covariates = TRUE)) |
|
75 |
- |
|
76 |
- ## |
|
77 |
- |
|
78 |
- expect_error(PomaOddsRatio()) |
|
79 |
- expect_error(PomaOddsRatio(iris)) |
|
22 |
+test_that("PomaOddsRatio stops with incorrect feature names", { |
|
23 |
+ data <- create_mock_summarized_experiment(binary = TRUE) |
|
24 |
+ expect_error(PomaOddsRatio(data, feature_name = "non_existing_feature")) |
|
25 |
+}) |
|
80 | 26 |
|
27 |
+test_that("PomaOddsRatio handles covariates and show_ci parameters correctly", { |
|
28 |
+ data <- create_mock_summarized_experiment(binary = TRUE) |
|
29 |
+ odds_ratio_results_with_covs <- PomaOddsRatio(data, covs = c("Group")) |
|
30 |
+ odds_ratio_results_no_ci <- PomaOddsRatio(data, show_ci = FALSE) |
|
31 |
+ expect_is(odds_ratio_results_with_covs, "list") |
|
32 |
+ expect_is(odds_ratio_results_no_ci, "list") |
|
81 | 33 |
}) |
82 | 34 |
|
83 | 35 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,25 @@ |
1 |
+ |
|
2 |
+test_that("PomaPCA handles valid SummarizedExperiment objects", { |
|
3 |
+ data <- create_mock_summarized_experiment() |
|
4 |
+ pca_results <- PomaPCA(data) |
|
5 |
+ expect_is(pca_results, "list") |
|
6 |
+ expect_true(all(c("factors", "factors_plot", "eigenvalues", "eigenvalues_plot", "loadings", "loadings_plot", "biplot") %in% names(pca_results))) |
|
7 |
+}) |
|
8 |
+ |
|
9 |
+test_that("PomaPCA stops with non-SummarizedExperiment objects", { |
|
10 |
+ data <- data.frame(matrix(runif(100), ncol = 10)) |
|
11 |
+ expect_error(PomaPCA(data), "data is not a SummarizedExperiment object") |
|
12 |
+}) |
|
13 |
+ |
|
14 |
+test_that("PomaPCA handles different parameter settings correctly", { |
|
15 |
+ data <- create_mock_summarized_experiment() |
|
16 |
+ pca_results_center_scale <- PomaPCA(data, center = TRUE, scale = TRUE) |
|
17 |
+ pca_results_ncomp <- PomaPCA(data, ncomp = 2) |
|
18 |
+ pca_results_labels <- PomaPCA(data, labels = TRUE) |
|
19 |
+ pca_results_ellipse <- PomaPCA(data, ellipse = TRUE) |
|
20 |
+ expect_is(pca_results_center_scale, "list") |
|
21 |
+ expect_is(pca_results_ncomp, "list") |
|
22 |
+ expect_is(pca_results_labels, "list") |
|
23 |
+ expect_is(pca_results_ellipse, "list") |
|
24 |
+}) |
|
25 |
+ |
0 | 26 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,35 @@ |
1 |
+ |
|
2 |
+test_that("PomaPCR handles valid SummarizedExperiment objects", { |
|
3 |
+ data <- create_mock_summarized_experiment() |
|
4 |
+ data$NumericVariable <- 1:20 |
|
5 |
+ pcr_results <- PomaPCR(data, y = "NumericVariable") |
|
6 |
+ expect_is(pcr_results, "tbl_df") |
|
7 |
+ expect_true(all(c("component", "estimate", "std_err", "statistic", "pvalue", "adj_pvalue") %in% names(pcr_results))) |
|
8 |
+}) |
|
9 |
+ |
|
10 |
+test_that("PomaPCR stops with non-SummarizedExperiment objects", { |
|
11 |
+ data <- data.frame(matrix(runif(100), ncol = 10)) |
|
12 |
+ expect_error(PomaPCR(data, y = "NumericVariable"), "data is not a SummarizedExperiment object") |
|
13 |
+}) |
|
14 |
+ |
|
15 |
+test_that("PomaPCR handles different number of components", { |
|
16 |
+ data <- create_mock_summarized_experiment() |
|
17 |
+ data$NumericVariable <- 1:20 |
|
18 |
+ pcr_results_2comp <- PomaPCR(data, ncomp = 2, y = "NumericVariable") |
|
19 |
+ pcr_results_5comp <- PomaPCR(data, ncomp = 5, y = "NumericVariable") |
|
20 |
+ expect_is(pcr_results_2comp, "tbl_df") |
|
21 |
+ expect_is(pcr_results_5comp, "tbl_df") |
|
22 |
+}) |
|
23 |
+ |
|
24 |
+test_that("PomaPCR stops with incorrect y variable", { |
|
25 |
+ data <- create_mock_summarized_experiment() |
|
26 |
+ data$InvalidVariable <- rep("a", 20) |
|
27 |
+ expect_error(PomaPCR(data, y = "InvalidVariable"), "No numeric variables to be used as dependent variable in metadata file") |
|
28 |
+}) |
|
29 |
+ |
|
30 |
+test_that("PomaPCR stops with incorrect adjust argument", { |
|
31 |
+ data <- create_mock_summarized_experiment() |
|
32 |
+ data$NumericVariable <- 1:20 |
|
33 |
+ expect_error(PomaPCR(data, y = "NumericVariable", adjust = "invalid"), "Incorrect value for adjust argument") |
|
34 |
+}) |
|
35 |
+ |
0 | 36 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,38 @@ |
1 |
+ |
|
2 |
+test_that("PomaPLS handles valid SummarizedExperiment objects", { |
|
3 |
+ data <- create_mock_summarized_experiment() |
|
4 |
+ data$numeric_var <- 1:20 |
|
5 |
+ pls_results <- PomaPLS(data, method = "pls") |
|
6 |
+ expect_is(pls_results, "list") |
|
7 |
+ expect_true(all(c("factors", "factors_plot", "loadings", "loadings_plot") %in% names(pls_results))) |
|
8 |
+}) |
|
9 |
+ |
|
10 |
+test_that("PomaPLS stops with non-SummarizedExperiment objects", { |
|
11 |
+ data <- data.frame(matrix(runif(100), ncol = 10)) |
|
12 |
+ expect_error(PomaPLS(data, method = "pls"), "data is not a SummarizedExperiment object") |
|
13 |
+}) |
|
14 |
+ |
|
15 |
+test_that("PomaPLS handles different PLS methods correctly", { |
|
16 |
+ data <- create_mock_summarized_experiment() |
|
17 |
+ plsda_results <- PomaPLS(data, method = "plsda", ncomp = 3) |
|
18 |
+ splsda_results <- PomaPLS(data, method = "splsda", ncomp = 3, num_features = 3) |
|
19 |
+ expect_is(plsda_results, "list") |
|
20 |
+ expect_is(splsda_results, "list") |
|
21 |
+}) |
|
22 |
+ |
|
23 |
+test_that("PomaPLS handles different parameter settings correctly", { |
|
24 |
+ data <- create_mock_summarized_experiment() |
|
25 |
+ data$numeric_var <- 1:20 |
|
26 |
+ pls_results_ncomp <- PomaPLS(data, method = "pls", ncomp = 3) |
|
27 |
+ plsda_results_labels <- PomaPLS(data, method = "plsda", ncomp = 3, labels = TRUE) |
|
28 |
+ splsda_results_ellipse <- PomaPLS(data, method = "splsda", ncomp = 3, num_features = 3, ellipse = TRUE) |
|
29 |
+ expect_is(pls_results_ncomp, "list") |
|
30 |
+ expect_is(plsda_results_labels, "list") |
|
31 |
+ expect_is(splsda_results_ellipse, "list") |
|
32 |
+}) |
|
33 |
+ |
|
34 |
+test_that("PomaPLS stops with incorrect method argument", { |
|
35 |
+ data <- create_mock_summarized_experiment() |
|
36 |
+ expect_error(PomaPLS(data, method = "invalid_method"), "Incorrect value for method argument") |
|
37 |
+}) |
|
38 |
+ |
... | ... |
@@ -1,42 +1,28 @@ |
1 |
-context("PomaRankProd") |
|
2 | 1 |
|
3 |
-test_that("PomaRankProd works", { |
|
4 |
- |
|
5 |
- data("st000284") |
|
6 |
- |
|
7 |
- target <- SummarizedExperiment::colData(st000284)[1:100,] %>% |
|
8 |
- as.data.frame() %>% |
|
9 |
- tibble::rownames_to_column("ID") |
|
10 |
- e <- SummarizedExperiment::assay(st000284)[,1:100] |
|
11 |
- |
|
12 |
- data <- PomaSummarizedExperiment(target = target, features = t(e)) |
|
13 |
- |
|
14 |
- toy_data <- POMA::PomaNorm(data, method = "log_scaling") |
|
15 |
- SummarizedExperiment::colData(toy_data)$groups <- c(rep("C", 25), rep("G", 25)) |
|
16 |
- |
|
17 |
- ## |
|
18 |
- |
|
19 |
- RP_one <- PomaRankProd(data, logged = TRUE, logbase = 2) |
|
20 |
- RP_two <- PomaRankProd(data, logged = TRUE, logbase = 10) |
|
21 |
- |
|
22 |
- RP_five <- PomaRankProd(data, cutoff = 0.05, method = "pfp") |
|
23 |
- RP_six <- PomaRankProd(data, cutoff = 0.05, method = "pval") |
|
24 |
- |
|
25 |
- ## |
|
26 |
- |
|
27 |
- expect_error(PomaRankProd()) |
|
28 |
- expect_error(PomaRankProd(data, method = "pfd")) |
|
29 |
- expect_error(PomaRankProd(toy_data)) |
|
30 |
- expect_message(PomaRankProd(data)) |
|
31 |
- expect_error(PomaRankProd(iris)) |
|
2 |
+test_that("PomaRankProd handles valid SummarizedExperiment objects", { |
|
3 |
+ data <- create_mock_summarized_experiment(binary = TRUE) |
|
4 |
+ rank_prod_results <- PomaRankProd(data) |
|
5 |
+ expect_is(rank_prod_results, "list") |
|
6 |
+ expect_true(all(c("up_regulated", "down_regulated", "up_regulated_plot", "down_regulated_plot") %in% names(rank_prod_results))) |
|
7 |
+}) |
|
32 | 8 |
|
33 |
- ## |
|
9 |
+test_that("PomaRankProd stops with non-SummarizedExperiment objects", { |
|
10 |
+ data <- data.frame(matrix(runif(100), ncol = 10)) |
|
11 |
+ expect_error(PomaRankProd(data), "data is not a SummarizedExperiment object") |
|
12 |
+}) |
|
34 | 13 |
|
35 |
- expect_equal(dim(RP_one$upregulated), dim(RP_two$upregulated)) |
|
36 |
- expect_equal(dim(RP_one$downregulated), dim(RP_two$downregulated)) |
|
14 |
+test_that("PomaRankProd handles different parameters correctly", { |
|
15 |
+ data <- create_mock_summarized_experiment(binary = TRUE) |
|
16 |
+ rank_prod_results_logged <- PomaRankProd(data, logged = TRUE) |
|
17 |
+ rank_prod_results_cutoff <- PomaRankProd(data, cutoff = 0.1) |
|
18 |
+ rank_prod_results_method <- PomaRankProd(data, method = "pval") |
|
19 |
+ expect_is(rank_prod_results_logged, "list") |
|
20 |
+ expect_is(rank_prod_results_cutoff, "list") |
|
21 |
+ expect_is(rank_prod_results_method, "list") |
|
22 |
+}) |
|
37 | 23 |
|
38 |
- expect_false(dim(RP_five$upregulated)[1] == dim(RP_six$upregulated)[1]) |
|
39 |
- expect_false(dim(RP_five$downregulated)[1] == dim(RP_six$downregulated)[1]) |
|
40 |
- |
|
24 |
+test_that("PomaRankProd stops with incorrect method argument", { |
|
25 |
+ data <- create_mock_summarized_experiment(binary = TRUE) |
|
26 |
+ expect_error(PomaRankProd(data, method = "invalid_method"), "Incorrect value for method argument") |
|
41 | 27 |
}) |
42 | 28 |
|
43 | 29 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,33 @@ |
1 |
+ |
|
2 |
+test_that("PomaUMAP handles valid SummarizedExperiment objects", { |
|
3 |
+ data <- create_mock_summarized_experiment() |
|
4 |
+ umap_results <- PomaUMAP(data) |
|
5 |
+ expect_is(umap_results, "list") |
|
6 |
+ expect_true("umap_embeddings" %in% names(umap_results)) |
|
7 |
+ expect_true("umap_plot" %in% names(umap_results)) |
|
8 |
+}) |
|
9 |
+ |
|
10 |
+test_that("PomaUMAP stops with non-SummarizedExperiment objects", { |
|
11 |
+ data <- data.frame(matrix(runif(100), ncol = 10)) |
|
12 |
+ expect_error(PomaUMAP(data), "data is not a SummarizedExperiment object") |
|
13 |
+}) |
|
14 |
+ |
|
15 |
+test_that("PomaUMAP works with different n_neighbors values", { |
|
16 |
+ data <- create_mock_summarized_experiment() |
|
17 |
+ umap_results <- PomaUMAP(data, n_neighbors = 5) |
|
18 |
+ expect_is(umap_results, "list") |
|
19 |
+}) |
|
20 |
+ |
|
21 |
+test_that("PomaUMAP works with different n_components values", { |
|
22 |
+ data <- create_mock_summarized_experiment() |
|
23 |
+ umap_results <- PomaUMAP(data, n_components = 3) |
|
24 |
+ expect_is(umap_results, "list") |
|
25 |
+}) |
|
26 |
+ |
|
27 |
+test_that("PomaUMAP provides expected output structure", { |
|
28 |
+ data <- create_mock_summarized_experiment() |
|
29 |
+ umap_results <- PomaUMAP(data) |
|
30 |
+ expect_is(umap_results$umap_embeddings, "tbl_df") |
|
31 |
+ expect_true(all(c("sample", "UMAP1", "UMAP2", "clust", "member_prob") %in% names(umap_results$umap_embeddings))) |
|
32 |
+}) |
|
33 |
+ |
... | ... |
@@ -1,68 +1,48 @@ |
1 |
-context("PomaUnivariate") |
|
2 | 1 |
|
3 |
-test_that("PomaUnivariate works", { |
|
4 |
- |
|
5 |
- library(SummarizedExperiment) |
|
6 |
- |
|
7 |
- data("st000284") |
|
8 |
- data("st000336") |
|
9 |
- |
|
10 |
- # st000284_sub <- st000284[,st000284@colData$factors %in% c("CRC", "Healthy")] |
|
11 |
- st000336 <- POMA::PomaImpute(st000336, method = "knn") |
|
12 |
- |
|
13 |
- dims_for_ttest_and_mann <- c(ncol(t(SummarizedExperiment::assay(st000336))), 9) |
|
14 |
- dims_for_aov <- c(ncol(t(SummarizedExperiment::assay(st000284))), |
|
15 |
- length(levels(as.factor(SummarizedExperiment::colData(st000284)[,1]))) + 6) |
|
16 |
- dims_for_krusk <- c(ncol(t(SummarizedExperiment::assay(st000284))), |
|
17 |
- length(levels(as.factor(SummarizedExperiment::colData(st000284)[,1]))) + 7) |
|
18 |
- |
|
19 |
- univ_ttest <- PomaUnivariate(st000336, method = "ttest", adjust = "fdr") |
|
20 |
- univ_aov <- PomaUnivariate(st000284, covariates = FALSE, method = "anova", adjust = "fdr") |
|
21 |
- univ_aov_cov <- PomaUnivariate(st000284, covariates = TRUE, method = "anova", adjust = "fdr") |
|
22 |
- univ_aov_cov2 <- PomaUnivariate(st000284, covariates = TRUE, method = "anova", adjust = "bonferroni") |
|
2 |
+test_that("PomaUnivariate handles valid SummarizedExperiment objects", { |
|
3 |
+ data <- create_mock_summarized_experiment(binary = TRUE) |
|
4 |
+ univariate_results <- PomaUnivariate(data, method = "ttest") |
|
5 |
+ expect_is(univariate_results, "list") |
|
6 |
+ expect_true("result" %in% names(univariate_results)) |
|
7 |
+}) |
|
23 | 8 |
|
24 |
- univ_mann <- PomaUnivariate(st000336, method = "mann", adjust = "fdr") |
|
25 |
- univ_kruskal <- PomaUnivariate(st000284, method = "kruskal", adjust = "fdr") |
|
26 |
- univ_kruskal2 <- PomaUnivariate(st000284, method = "kruskal", adjust = "BY") |
|
9 |
+test_that("PomaUnivariate stops with non-SummarizedExperiment objects", { |
|
10 |
+ data <- data.frame(matrix(runif(100), ncol = 10)) |
|
11 |
+ expect_error(PomaUnivariate(data), "data is not a SummarizedExperiment object") |
|
12 |
+}) |
|
27 | 13 |
|
28 |
- one_cov1 <- PomaUnivariate(st000336, covariates = FALSE, method = "anova", adjust = "fdr") |
|
29 |
- one_cov2 <- PomaUnivariate(st000336, covariates = TRUE, method = "anova", adjust = "fdr") |
|
30 |
- |
|
31 |
- ## |
|
14 |
+test_that("PomaUnivariate handles different methods correctly for 2 groups", { |
|
15 |
+ data <- create_mock_summarized_experiment(binary = TRUE) |
|
16 |
+ for (method in c("ttest", "mann")) { |
|
17 |
+ univariate_results <- PomaUnivariate(data, method = method) |
|
18 |
+ expect_is(univariate_results, "list") |
|
19 |
+ expect_true("result" %in% names(univariate_results)) |
|
20 |
+ } |
|
21 |
+}) |
|
32 | 22 |
|
33 |
- expect_equal(dims_for_ttest_and_mann, dim(univ_ttest)) |
|
34 |
- expect_false(dims_for_aov[2] == dim(univ_aov_cov)[2]) |
|
35 |
- expect_false(dims_for_aov[2] == dim(univ_aov_cov2)[2]) |
|
36 |
- expect_equal(dims_for_aov[1], dim(univ_aov_cov)[1]) |
|
37 |
- expect_equal(dims_for_aov[1], dim(univ_aov_cov2)[1]) |
|
38 |
- expect_equal(dims_for_aov, dim(univ_aov)) |
|
39 |
- expect_false(all(univ_aov_cov == univ_aov_cov2)) |
|
23 |
+test_that("PomaUnivariate handles different methods correctly for more than 2 groups", { |
|
24 |
+ data <- create_mock_summarized_experiment() |
|
25 |
+ for (method in c("anova", "kruskal")) { |
|
26 |
+ univariate_results <- PomaUnivariate(data, method = method) |
|
27 |
+ expect_is(univariate_results, "list") |
|
28 |
+ expect_true("result" %in% names(univariate_results)) |
|
29 |
+ } |
|
30 |
+}) |
|
40 | 31 |
|
41 |
- ## |
|
32 |
+test_that("PomaUnivariate stops with incorrect method argument", { |
|
33 |
+ data <- create_mock_summarized_experiment() |
|
34 |
+ expect_error(PomaUnivariate(data, method = "invalid_method"), "Incorrect value for method argument") |
|
35 |
+}) |
|
42 | 36 |
|
43 |
- expect_equal(dims_for_ttest_and_mann, dim(univ_mann)) |
|
44 |
- expect_equal(dim(univ_kruskal), dim(univ_kruskal2)) |
|
45 |
- expect_false(all(univ_kruskal == univ_kruskal2)) |
|
46 |
- expect_equal(dims_for_krusk, dim(univ_kruskal)) |
|
47 |
- expect_equal(dims_for_krusk, dim(univ_kruskal2)) |
|
48 |
- |
|
49 |
- expect_false(dim(one_cov1)[2] == dim(one_cov2)[2]) |
|
50 |
- expect_equal(dim(one_cov1)[1], dim(one_cov2)[1]) |
|
51 |
- |
|
52 |
- ## |
|
53 |
- |
|
54 |
- expect_error(PomaUnivariate(st000284, covariates = TRUE, method = "anov", adjust = "fdr")) |
|
55 |
- expect_error(PomaUnivariate(st000284, covariates = TRUE, adjust = "fdr")) |
|
56 |
- |
|
57 |
- expect_error(PomaUnivariate(st000336, method = "ttest", adjust = "fd")) |
|
58 |
- |
|
59 |
- SummarizedExperiment::colData(st000284) <- SummarizedExperiment::colData(st000284)[1] |
|
60 |
- expect_error(PomaUnivariate(st000284, method = "anova", covariates = TRUE, adjust = "fdr")) |
|
61 |
- |
|
62 |
- ## |
|
63 |
- |
|
64 |
- expect_error(PomaUnivariate(method = "ttest")) |
|
65 |
- expect_error(PomaUnivariate(iris, method = "ttest")) |
|
37 |
+test_that("PomaUnivariate handles paired, var_equal, and adjust parameters correctly", { |
|
38 |
+ data_binary <- create_mock_summarized_experiment(binary = TRUE) |
|
39 |
+ data_binary_paired <- create_mock_summarized_experiment(binary = TRUE, paired = TRUE) |
|
66 | 40 |
|
41 |
+ univariate_results_paired <- PomaUnivariate(data_binary_paired, method = "ttest", paired = TRUE) |
|
42 |
+ univariate_results_var_equal <- PomaUnivariate(data_binary, method = "ttest", var_equal = TRUE) |
|
43 |
+ univariate_results_adjusted <- PomaUnivariate(data_binary, method = "ttest", adjust = "holm") |
|
44 |
+ expect_is(univariate_results_paired, "list") |
|
45 |
+ expect_is(univariate_results_var_equal, "list") |
|
46 |
+ expect_is(univariate_results_adjusted, "list") |
|
67 | 47 |
}) |
68 | 48 |
|
... | ... |
@@ -12,7 +12,7 @@ test_that("PomaVolcano stops with non-SummarizedExperiment objects", { |
12 | 12 |
|
13 | 13 |
test_that("PomaVolcano handles different methods correctly", { |
14 | 14 |
data <- create_mock_summarized_experiment(binary = TRUE) |
15 |
- for (method in c("ttest", "mann", "limma")) { # DESeq |
|
15 |
+ for (method in c("ttest", "mann", "limma")) { # "DESeq" |
|
16 | 16 |
plot <- PomaVolcano(data, method = method) |
17 | 17 |
expect_is(plot, "ggplot") |
18 | 18 |
} |
... | ... |
@@ -20,7 +20,7 @@ test_that("PomaVolcano handles different methods correctly", { |
20 | 20 |
|
21 | 21 |
test_that("PomaVolcano stops with incorrect method argument", { |
22 | 22 |
data <- create_mock_summarized_experiment(binary = TRUE) |
23 |
- expect_error(PomaVolcano(data, method = "invalid_method"), "Incorrect value for method argument") |
|
23 |
+ expect_error(PomaVolcano(data, method = "invalid_method")) |
|
24 | 24 |
}) |
25 | 25 |
|
26 | 26 |
test_that("PomaVolcano handles p-value adjustments correctly", { |
... | ... |
@@ -43,7 +43,7 @@ test_that("PomaVolcano handles different pval_cutoff and log2fc_cutoff values", |
43 | 43 |
}) |
44 | 44 |
|
45 | 45 |
test_that("PomaVolcano handles labels, paired, and var_equal parameters correctly", { |
46 |
- data <- create_mock_summarized_experiment(binary = TRUE) |
|
46 |
+ data <- create_mock_summarized_experiment(binary = TRUE, paired = TRUE) |
|
47 | 47 |
plot_with_labels <- PomaVolcano(data, labels = TRUE) |
48 | 48 |
plot_paired <- PomaVolcano(data, paired = TRUE) |
49 | 49 |
plot_var_equal <- PomaVolcano(data, var_equal = TRUE) |
50 | 50 |
deleted file mode 100644 |
... | ... |
@@ -1,288 +0,0 @@ |
1 |
-title: "POMA Workflow" |
|
2 |
-author: |
|
3 |
-- name: Pol Castellano-Escuder, Ph.D. |
|
4 |
- affiliation: Duke University |
|
5 |
- email: polcaes@gmail.com |
|
6 |
-date: "`r BiocStyle::doc_date()`" |
|
7 |
-output: |
|
8 |
- BiocStyle::html_document |
|
9 |
-vignette: > |
|
10 |
- %\VignetteIndexEntry{POMA Workflow} |
|
11 |
- %\VignetteEngine{knitr::rmarkdown} |
|
12 |
- %\usepackage[utf8]{inputenc} |
|
13 |
- %\VignetteEncoding{UTF-8} |
|
14 |
-bibliography: ["POMA.bib"] |
|
15 |
-biblio-style: apalike |
|
16 |
-link-citations: true |
|
17 |
- |
|
18 |
-**Compiled date**: `r Sys.Date()` |
|
19 |
- |
|
20 |
-**Last edited**: 2023-11-20 |
|
21 |
- |
|
22 |
-**License**: `r packageDescription("POMA")[["License"]]` |
|
23 |
- |
|
24 |
-```{r, include = FALSE} |
|
25 |
-knitr::opts_chunk$set( |
|
26 |
- collapse = TRUE, |
|
27 |
- # fig.align = "center", |
|
28 |
- comment = ">" |
|
29 |
-) |
|
30 |
-``` |
|
31 |
- |
|
32 |
-# Installation |
|
33 |
- |
|
34 |
-To install the Bioconductor version of the POMA package, run the following code: |
|
35 |
- |
|
36 |
-```{r, eval = FALSE} |
|
37 |
-# install.packages("BiocManager") |
|
38 |
-BiocManager::install("POMA") |
|
39 |
-``` |
|
40 |
- |
|
41 |
-# Load POMA |
|
42 |
- |
|
43 |
-```{r, warning = FALSE, message = FALSE} |
|
44 |
-library(POMA) |
|
45 |
-``` |
|
46 |
- |
|
47 |
-Additionally, you can load other useful packages for this vignette: |
|
48 |
- |
|
49 |
-```{r, warning = FALSE, message = FALSE} |
|
50 |
-library(ggplot2) |
|
51 |
-library(plotly) |
|
52 |
-``` |
|
53 |
- |
|
54 |
-# The POMA Workflow |
|
55 |
- |
|
56 |
-The `POMA` package functions are organized into three sequential, distinct blocks: Data Preparation, Pre-processing, and Statistical Analysis. |
|
57 |
- |
|
58 |
-## Data Preparation |
|
59 |
- |
|
60 |
-The `SummarizedExperiment` package from Bioconductor offers well-defined computational data structures for representing various types of omics experiment data [@SummarizedExperiment]. Utilizing these data structures can significantly improve data analysis. `POMA` leverages `SummarizedExperiment` objects, enhancing the reusability of existing methods for this class and contributing to more robust and reproducible workflows. |
|
61 |
- |
|
62 |
-The workflow begins with either loading or creating a `SummarizedExperiment` object. Typically, your data might be stored in separate matrices and/or data frames. The `PomaCreateObject` function simplifies this step by quickly building a SummarizedExperiment object for you. |
|
63 |
- |
|
64 |
-```{r, eval = FALSE} |
|
65 |
-# create an SummarizedExperiment object from two separated data frames |
|
66 |
-target <- readr::read_csv("your_target.csv") |
|
67 |
-features <- readr::read_csv("your_features.csv") |
|
68 |
- |
|
69 |
-data <- PomaCreateObject(metadata = target, features = features) |
|
70 |
-``` |
|
71 |
- |
|
72 |
-Alternatively, if your data is already in a `SummarizedExperiment` object, you can proceed directly to the pre-processing step. This vignette uses sample data provided in `POMA`. |
|
73 |
- |
|
74 |
-```{r, warning = FALSE, message = FALSE} |
|
75 |
-# load example data |
|
76 |
-data("st000336") |
|
77 |
-``` |
|
78 |
- |
|
79 |
-```{r, warning = FALSE, message = FALSE} |
|
80 |
-st000336 |
|
81 |
-``` |
|
82 |
- |
|
83 |
-### Brief Description of the Example Data |
|
84 |
- |
|
85 |
-This dataset comprises 57 samples, 31 metabolites, 1 covariate, and 2 experimental groups (Controls and DMD) from a targeted LC/MS study. |
|
86 |
- |
|
87 |
-_Duchenne Muscular Dystrophy (DMD) is an X-linked recessive form of muscular dystrophy that affects males via a mutation in the gene for the muscle protein, dystrophin. Progression of the disease results in severe muscle loss, ultimately leading to paralysis and death. Steroid therapy has been a commonly employed method for reducing the severity of symptoms. This study aims to quantify the urine levels of amino acids and organic acids in patients with DMD both with and without steroid treatment. Track the progression of DMD in patients who have provided multiple urine samples._ |
|
88 |
- |
|
89 |
-This data was obtained from [Metabolomics Workbench](https://www.metabolomicsworkbench.org/data/DRCCMetadata.php?Mode=Study&DataMode=AllData&StudyID=ST000336&StudyType=MS&ResultType=1#DataTabs). |
|
90 |
- |
|
91 |
-## Pre Processing |
|
92 |
- |
|
93 |
-This stage of the workflow is pivotal, as the decisions made here fundamentally influence the final statistical results. This phase is methodically segmented into three steps: Missing Value Imputation, Normalization, and Outlier Detection. |
|
94 |
- |
|
95 |
-### Missing Value Imputation |
|
96 |
- |
|
97 |
-In metabolomics studies, it's not uncommon for certain features to be undetectable or unquantifiable in some samples, owing to a variety of biological and technical factors [@imputation]. To address this prevalent issue, `POMA` provides a suite of seven distinct imputation methods, each designed to effectively handle missing data. The choice of method can significantly impact the subsequent analysis, so it's crucial to select the one that aligns best with the specific characteristics and requirements of your dataset. To perform imputation, simply execute the following code: |
|
98 |
- |
|
99 |
-```{r} |
|
100 |
-imputed <- st000336 %>% |
|
101 |
- PomaImpute(method = "knn", zeros_as_na = TRUE, remove_na = TRUE, cutoff = 20) |
|
102 |
- |
|
103 |
-imputed |
|
104 |
-``` |
|
105 |
- |
|
106 |
-### Normalization |
|
107 |
- |
|
108 |
-Following missing value imputation, the next crucial step in the process is Data Normalization. In mass spectrometry (MS) data, various factors can introduce significant variability that profoundly impacts the final statistical outcomes. Normalization is thus an essential step, as it corrects for these variations, ensuring that the data can be compared more reliably across. |
|
109 |
- |
|
110 |
-```{r} |
|
111 |
-normalized <- imputed %>% |
|
112 |
- PomaNorm(method = "log_pareto") |
|
113 |
- |
|
114 |
-normalized |
|
115 |
-``` |
|
116 |
- |
|
117 |
-#### Normalization effect |
|
118 |
- |
|
119 |
-Sometimes, you will be interested in _how the normalization process affect your data_? |
|
120 |
- |
|
121 |
-To answer this question, **POMA** offers two exploratory functions, `PomaBoxplots` and `PomaDensity`, that can help to understand the normalization process. |
|
122 |
- |
|
123 |
-`PomaBoxplots` generates boxplots for all samples or features (depending on the group factor) of a `SummarizedExperiment` object. Here, we can compare objects before and after normalization step. |
|
124 |
- |
|
125 |
-```{r, message = FALSE} |
|
126 |
-PomaBoxplots(imputed, group = "samples", |
|
127 |
- jitter = FALSE, |
|
128 |
- legend_position = "none") + |
|
129 |
- ggplot2::ggtitle("Not Normalized") # data before normalization |
|
130 |
-``` |
|
131 |
- |
|
132 |
-```{r, message = FALSE} |
|
133 |
-PomaBoxplots(normalized, |
|
134 |
- group = "samples", |
|
135 |
- jitter = FALSE, |
|
136 |
- legend_position = "none") + |
|
137 |
- ggplot2::ggtitle("Normalized") # data after normalization |
|
138 |
-``` |
|
139 |
- |
|
140 |
-On the other hand, `PomaDensity` shows the distribution of all features before and after the normalization process. |
|
141 |
- |
|
142 |
-```{r, message = FALSE} |
|
143 |
-PomaDensity(imputed, |
|
144 |
- group = "features", |
|
145 |
- legend_position = "none") + |
|
146 |
- ggplot2::ggtitle("Not Normalized") # data before normalization |
|
147 |
-``` |
|
148 |
- |
|
149 |
-```{r, message = FALSE} |
|
150 |
-PomaDensity(normalized, |
|
151 |
- group = "features") + |
|
152 |
- ggplot2::ggtitle("Normalized") # data after normalization |
|
153 |
-``` |
|
154 |
- |
|
155 |
-### Outlier Detection |
|
156 |
- |
|
157 |
-Finally, the last step of this block is the Outlier Detection. Outlers are defined as observations that are not concordant with those of the vast majority of the remaining data points. These values can have an enormous influence on the resultant statistical analysis, being a dangerous ground for all required assumptions in the most commonly applied parametric tests in mass spectrometry as well as for all also required assumptions in many regression techniques and predictive modeling approaches. **POMA** allows the analysis of outliers as well as the possibility to remove them from the analysis using different modulable parameters. |
|
158 |
- |
|
159 |
-Analyze and remove outliers running the following two lines of code. |
|
160 |
- |
|
161 |
-```{r} |
|
162 |
-PomaOutliers(normalized, do = "analyze")$polygon_plot # to explore |
|
163 |
-pre_processed <- PomaOutliers(normalized, do = "clean") # to remove outliers |
|
164 |
-pre_processed |
|
165 |
-``` |
|
166 |
- |
|
167 |
-## Statistical Analysis |
|
168 |
- |
|
169 |
-Once the data have been pre-processed, you can start with the statistical analysis step! **POMA** offers many different statistical methods and possible combinations to compute. However, in this vignette we will comment only some of the most used. |
|
170 |
- |
|
171 |
-### Univariate Analysis |
|
172 |
- |
|
173 |
-**POMA** allows you to perform all of the most used univariate statistical methods in MS by using only one function! `PomaUnivariate` wrap 4 different univariate methods (ttest, ANOVA and ANCOVA, Wilcoxon test and Kruskal-Wallis Rank Sum Test) that you can perform changing only the "method" argument. |
|
174 |
- |
|
175 |
-#### T-test |
|
176 |
- |
|
177 |
-```{r} |
|
178 |
-PomaUnivariate(pre_processed, method = "ttest") |
|
179 |
-``` |
|
180 |
- |
|
181 |
-You can also compute a volcano plot using the T-test results. _Note that we're using the non-normalized object to avoid negative values in our data._ |
|
182 |
- |
|
183 |
-```{r} |
|
184 |
-PomaVolcano(imputed, pval = "adjusted") |
|
185 |
-``` |
|
186 |
- |
|
187 |
-#### Wilcoxon Test |
|
188 |
- |
|
189 |
-```{r, warning = FALSE} |
|
190 |
-PomaUnivariate(pre_processed, method = "mann") |
|
191 |
-``` |
|
192 |
- |
|
193 |
-### Limma |
|
194 |
- |
|
195 |
-Other of the wide used statistical methods in many different omics, such as epigenomics or transcriptomics, is **limma** [@limma]. **POMA** provides an easy use implementation of _limma_ you only have to specify the desired contrast to compute. |
|
196 |
- |
|
197 |
-```{r} |
|
198 |
-PomaLimma(pre_processed, contrast = "Controls-DMD", adjust = "fdr") |
|
199 |
-``` |
|
200 |
- |
|
201 |
-### Multivariate Analysis |
|
202 |
- |
|
203 |
-On the other hand, multivariate analysis implemented in **POMA** is quite similar to the univariate approaches. `PomaMultivariate` allows users to compute a PCA, PLS-DA or sPLS-DA by changing only the "method" parameter. This function is based on **mixOmics** package [@mixOmics]. |
|
204 |
- |
|
205 |
-#### Principal Component Analysis |
|
206 |
- |
|
207 |
-```{r} |
|
208 |
-poma_pca <- PomaMultivariate(pre_processed, method = "pca") |
|
209 |
-``` |
|
210 |
- |
|
211 |
-```{r} |
|
212 |
-poma_pca$scoresplot + |
|
213 |
- ggplot2::ggtitle("Scores Plot") |
|
214 |
-``` |
|
215 |
- |
|
216 |
-#### PLS-DA |
|
217 |
- |
|
218 |
-```{r, warning = FALSE, message = FALSE, results = 'hide'} |
|
219 |
-poma_plsda <- PomaMultivariate(pre_processed, method = "plsda") |
|
220 |
-``` |
|
221 |
- |
|
222 |
-```{r} |
|
223 |
-poma_plsda$scoresplot + |
|
224 |
- ggplot2::ggtitle("Scores Plot") |
|
225 |
-``` |
|
226 |
- |
|
227 |
-```{r} |
|
228 |
-poma_plsda$errors_plsda_plot + |
|
229 |
- ggplot2::ggtitle("Error Plot") |
|
230 |
-``` |
|
231 |
- |
|
232 |
-### Correlation Analysis |
|
233 |
- |
|
234 |
-Often, correlation analysis is used to explore and discover relationships and patterns within our data. `PomaCorr` provides a flexible and easy way to do that providing a table with all pairwise coorelations in the data, a correlogram and a correlation graph. |
|
235 |
- |
|
236 |
-```{r} |
|
237 |
-poma_cor <- PomaCorr(pre_processed, label_size = 8, coeff = 0.6) |
|
238 |
-poma_cor$correlations |
|
239 |
-poma_cor$corrplot |
|
240 |
-poma_cor$graph |
|
241 |
-``` |
|
242 |
- |
|
243 |
-Alternatively, if you switch the "corr_type" parameter to "glasso", this function will compute a **Gaussian Graphical Model** using the **glmnet** package [@glasso]. |
|
244 |
- |
|
245 |
-```{r} |
|
246 |
-PomaCorr(pre_processed, corr_type = "glasso", coeff = 0.6)$graph |
|
247 |
-``` |
|
248 |
- |
|
249 |
-### Lasso, Ridge and Elasticnet |
|
250 |
- |
|
251 |
-**POMA** also provides a function to perform a Lasso, Ridge and Elasticnet regression for binary outcomes in a very intuitive and easy way. `PomaLasso` is based on **glmnet** package [@glmnet]. This function allows you to create a test subset in your data, evaluate the prediction of your models and export the model computed (it could be useful to perform prediction models with MS data). If "ntest" parameter is set to NULL, `PomaLasso` will use all observations to create the model (useful for feature selection). |
|
252 |
- |
|
253 |
-```{r} |
|
254 |
-# alpha = 1 for Lasso |
|
255 |
-PomaLasso(pre_processed, alpha = 1, labels = TRUE)$coefficientPlot |
|
256 |
-``` |
|
257 |
- |
|
258 |
-### Random Forest |
|
259 |
- |
|
260 |
-Finally, the random forest algorithm is also implemented in **POMA**. `PomaRandForest` uses the **randomForest** package [@randomForest] to facilitate the implementation of the algorithm and creates automatically both test and train sets to compute and evaluate the resultant models. |
|
261 |
- |
|
262 |
-```{r} |
|
263 |
-poma_rf <- PomaRandForest(pre_processed, ntest = 10, nvar = 10) |
|
264 |
-poma_rf$error_tree |
|
265 |
-``` |
|
266 |
- |
|
267 |
-Resultant random forest model confusion matrix for **test** set: |
|
268 |
- |
|
269 |
-```{r} |
|
270 |
-poma_rf$confusionMatrix$table |
|
271 |
-``` |
|
272 |
- |
|
273 |
-Gini index plot for the top 10 predictors: |
|
274 |
- |
|
275 |
-```{r} |
|
276 |
-poma_rf$MeanDecreaseGini_plot |
|
277 |
-``` |
|
278 |
- |
|
279 |
-# Session Information |
|
280 |
- |
|
281 |
-```{r} |
|
282 |
-sessionInfo() |
|
283 |
-``` |
|
284 |
- |
|
285 |
-# References |
|
286 |
- |
... | ... |
@@ -1,5 +1,5 @@ |
1 | 1 |
--- |
2 |
-title: "POMA Normalization Methods" |
|
2 |
+title: "Normalization Methods" |
|
3 | 3 |
author: |
4 | 4 |
- name: Pol Castellano-Escuder, Ph.D. |
5 | 5 |
affiliation: Duke University |
... | ... |
@@ -8,7 +8,7 @@ date: "`r BiocStyle::doc_date()`" |
8 | 8 |
output: |
9 | 9 |
BiocStyle::html_document |
10 | 10 |
vignette: > |
11 |
- %\VignetteIndexEntry{POMA Normalization Methods} |
|
11 |
+ %\VignetteIndexEntry{Normalization Methods} |
|
12 | 12 |
%\VignetteEngine{knitr::rmarkdown} |
13 | 13 |
%\usepackage[utf8]{inputenc} |
14 | 14 |
%\VignetteEncoding{UTF-8} |
... | ... |
@@ -19,14 +19,14 @@ link-citations: true |
19 | 19 |
|
20 | 20 |
**Compiled date**: `r Sys.Date()` |
21 | 21 |
|
22 |
-**Last edited**: 2023-11-20 |
|
22 |
+**Last edited**: 2023-12-06 |
|
23 | 23 |
|
24 | 24 |
**License**: `r packageDescription("POMA")[["License"]]` |
25 | 25 |
|
26 | 26 |
```{r, include = FALSE} |
27 | 27 |
knitr::opts_chunk$set( |
28 | 28 |
collapse = TRUE, |
29 |
- # fig.align = "center", |
|
29 |
+ fig.align = "center", |
|
30 | 30 |
comment = ">" |
31 | 31 |
) |
32 | 32 |
``` |
... | ... |
@@ -49,12 +49,11 @@ library(patchwork) |
49 | 49 |
|
50 | 50 |
# Load Data and Imputation |
51 | 51 |
|
52 |
-Let's create a cleaned `SummarizedExperiment` object from the sample `st000336` data to explore the normalization effects. |
|
52 |
+Let's create a cleaned `SummarizedExperiment` object from the sample data `st000336` to explore the normalization effects. |
|
53 | 53 |
|
54 | 54 |
```{r, warning = FALSE, comment = NA} |
55 |
-# imputation using the default method KNN |
|
56 | 55 |
example_data <- st000336 %>% |
57 |
- PomaImpute() |
|
56 |
+ PomaImpute() # KNN imputation |
|
58 | 57 |
|
59 | 58 |
example_data |
60 | 59 |
``` |
... | ... |
@@ -75,7 +74,7 @@ log_pareto <- PomaNorm(example_data, method = "log_pareto") |
75 | 74 |
|
76 | 75 |
## Normalization effect on data dimensions |
77 | 76 |
|
78 |
-When we check for the dimension of the data after normalization we can see that ALL methods have the same effect on data dimension. `PomaNorm` **only** change the data dimension when the data have **features that only have zeros** and when the data have **features with 0 variance**. Only in these two cases `PomaNorm` will remove features of the data, changing the data dimensions. |
|
77 |
+When we check for the dimension of the data after normalization we can see that all methods have the same effect on data dimension. `PomaNorm` **only** modifies the data dimension when the dataset contains **only-zero features** or **zero-variance features**. |
|
79 | 78 |
|
80 | 79 |
```{r, warning = FALSE} |
81 | 80 |
dim(SummarizedExperiment::assay(none)) |
... | ... |
@@ -89,52 +88,45 @@ dim(SummarizedExperiment::assay(log_pareto)) |
89 | 88 |
|
90 | 89 |
## Normalization effect on samples |
91 | 90 |
|
92 |
-Here we can evaluate the different normalization effects on samples [@normalization]. |
|
91 |
+Here we can evaluate the normalization effects on samples [@normalization]. |
|
93 | 92 |
|
94 | 93 |
```{r, message = FALSE, warning = FALSE} |
95 | 94 |
a <- PomaBoxplots(none, |
96 |
- x = "samples", |
|
97 |
- jitter = FALSE) + |
|
95 |
+ x = "samples") + |
|
98 | 96 |
ggplot2::ggtitle("Not Normalized") |
99 | 97 |
|
100 | 98 |
b <- PomaBoxplots(auto_scaling, |
101 | 99 |
x = "samples", |
102 |
- jitter = FALSE, |
|
103 | 100 |
legend_position = "none") + |
104 | 101 |
ggplot2::ggtitle("Auto Scaling") + |
105 | 102 |
ggplot2::theme(axis.text.x = ggplot2::element_blank()) |
106 | 103 |
|
107 | 104 |
c <- PomaBoxplots(level_scaling, |
108 | 105 |
x = "samples", |
109 |
- jitter = FALSE, |
|
110 | 106 |
legend_position = "none") + |
111 | 107 |
ggplot2::ggtitle("Level Scaling") + |
112 | 108 |
ggplot2::theme(axis.text.x = ggplot2::element_blank()) |
113 | 109 |
|
114 | 110 |
d <- PomaBoxplots(log_scaling, |
115 | 111 |
x = "samples", |
116 |
- jitter = FALSE, |
|
117 | 112 |
legend_position = "none") + |
118 | 113 |
ggplot2::ggtitle("Log Scaling") + |
119 | 114 |
ggplot2::theme(axis.text.x = ggplot2::element_blank()) |
120 | 115 |
|
121 | 116 |
e <- PomaBoxplots(log_transformation, |
122 | 117 |
x = "samples", |
123 |
- jitter = FALSE, |
|
124 | 118 |
legend_position = "none") + |
125 | 119 |
ggplot2::ggtitle("Log Transformation") + |
126 | 120 |
ggplot2::theme(axis.text.x = ggplot2::element_blank()) |
127 | 121 |
|
128 | 122 |
f <- PomaBoxplots(vast_scaling, |
129 | 123 |
x = "samples", |
130 |
- jitter = FALSE, |
|
131 | 124 |
legend_position = "none") + |
132 | 125 |
ggplot2::ggtitle("Vast Scaling") + |
133 | 126 |
ggplot2::theme(axis.text.x = ggplot2::element_blank()) |
134 | 127 |
|
135 | 128 |
g <- PomaBoxplots(log_pareto, |
136 | 129 |
x = "samples", |
137 |
- jitter = FALSE, |
|
138 | 130 |
legend_position = "none") + |
139 | 131 |
ggplot2::ggtitle("Log Pareto") + |
140 | 132 |
ggplot2::theme(axis.text.x = ggplot2::element_blank()) |
... | ... |
@@ -145,7 +137,7 @@ a |
145 | 137 |
|
146 | 138 |
## Normalization effect on features |
147 | 139 |
|
148 |
-Here we can evaluate the different normalization effects on features. |
|
140 |
+Here we can evaluate the normalization effects on features. |
|
149 | 141 |
|
150 | 142 |
```{r, message = FALSE, warning = FALSE} |
151 | 143 |
h <- PomaDensity(none, |
152 | 144 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,282 @@ |
1 |
+--- |
|
2 |
+title: "Get Started" |
|
3 |
+author: |
|
4 |
+- name: Pol Castellano-Escuder, Ph.D. |
|
5 |
+ affiliation: Duke University |
|
6 |
+ email: polcaes@gmail.com |
|
7 |
+date: "`r BiocStyle::doc_date()`" |
|
8 |
+output: |
|
9 |
+ BiocStyle::html_document |
|
10 |
+vignette: > |
|
11 |
+ %\VignetteIndexEntry{Get Started} |
|
12 |
+ %\VignetteEngine{knitr::rmarkdown} |
|
13 |
+ %\usepackage[utf8]{inputenc} |
|
14 |
+ %\VignetteEncoding{UTF-8} |
|
15 |
+bibliography: ["POMA.bib"] |
|
16 |
+biblio-style: apalike |
|
17 |
+link-citations: true |
|
18 |
+--- |
|
19 |
+ |
|
20 |
+**Compiled date**: `r Sys.Date()` |
|
21 |
+ |
|
22 |
+**Last edited**: 2023-12-06 |
|
23 |
+ |
|
24 |
+**License**: `r packageDescription("POMA")[["License"]]` |
|
25 |
+ |
|
26 |
+```{r, include = FALSE} |
|
27 |
+knitr::opts_chunk$set( |
|
28 |
+ collapse = TRUE, |
|
29 |
+ fig.align = "center", |
|
30 |
+ comment = ">" |
|
31 |
+) |
|
32 |
+``` |
|
33 |
+ |
|
34 |
+# Installation |
|
35 |
+ |
|
36 |
+To install the Bioconductor version of the POMA package, run the following code: |
|
37 |
+ |
|
38 |
+```{r, eval = FALSE} |
|
39 |
+# install.packages("BiocManager") |
|
40 |
+BiocManager::install("POMA") |
|
41 |
+``` |
|
42 |
+ |
|
43 |
+# Load POMA |
|
44 |
+ |
|
45 |
+```{r, warning = FALSE, message = FALSE} |
|
46 |
+library(POMA) |
|
47 |
+``` |
|
48 |
+ |
|
49 |
+Additionally, you can load other useful packages for this vignette: |
|
50 |
+ |
|
51 |
+```{r, warning = FALSE, message = FALSE} |
|
52 |
+library(ggplot2) |
|
53 |
+library(plotly) |
|
54 |
+``` |
|
55 |
+ |
|
56 |
+# The POMA Workflow |
|
57 |
+ |
|
58 |
+The `POMA` package functions are organized into three sequential, distinct blocks: Data Preparation, Pre-processing, and Statistical Analysis. |
|
59 |
+ |
|
60 |
+## Data Preparation |
|
61 |
+ |
|
62 |
+The `SummarizedExperiment` package from Bioconductor offers well-defined computational data structures for representing various types of omics experiment data [@SummarizedExperiment]. Utilizing these data structures can significantly improve data analysis. `POMA` leverages `SummarizedExperiment` objects, enhancing the reusability of existing methods for this class and contributing to more robust and reproducible workflows. |
|
63 |
+ |
|
64 |
+The workflow begins with either loading or creating a `SummarizedExperiment` object. Typically, your data might be stored in separate matrices and/or data frames. The `PomaCreateObject` function simplifies this step by quickly building a SummarizedExperiment object for you. |
|
65 |
+ |
|
66 |
+```{r, eval = FALSE} |
|
67 |
+# create an SummarizedExperiment object from two separated data frames |
|
68 |
+target <- readr::read_csv("your_target.csv") |
|
69 |
+features <- readr::read_csv("your_features.csv") |
|
70 |
+ |
|
71 |
+data <- PomaCreateObject(metadata = target, features = features) |
|
72 |
+``` |
|
73 |
+ |
|
74 |
+Alternatively, if your data is already in a `SummarizedExperiment` object, you can proceed directly to the pre-processing step. This vignette uses example data provided in `POMA`. |
|
75 |
+ |
|
76 |
+```{r, warning = FALSE, message = FALSE} |
|
77 |
+# load example data |
|
78 |
+data("st000336") |
|
79 |
+``` |
|
80 |
+ |
|
81 |
+```{r, warning = FALSE, message = FALSE} |
|
82 |
+st000336 |
|
83 |
+``` |
|
84 |
+ |
|
85 |
+<!-- ### Brief Description of the Example Data --> |
|
86 |
+ |
|
87 |
+<!-- This dataset comprises 57 samples, 31 metabolites, 1 covariate, and 2 experimental groups (Controls and DMD) from a targeted LC/MS study. --> |
|
88 |
+ |
|
89 |
+<!-- _Duchenne Muscular Dystrophy (DMD) is an X-linked recessive form of muscular dystrophy that affects males via a mutation in the gene for the muscle protein, dystrophin. Progression of the disease results in severe muscle loss, ultimately leading to paralysis and death. Steroid therapy has been a commonly employed method for reducing the severity of symptoms. This study aims to quantify the urine levels of amino acids and organic acids in patients with DMD both with and without steroid treatment. Track the progression of DMD in patients who have provided multiple urine samples._ --> |
|
90 |
+ |
|
91 |
+<!-- This data was obtained from [Metabolomics Workbench](https://www.metabolomicsworkbench.org/data/DRCCMetadata.php?Mode=Study&DataMode=AllData&StudyID=ST000336&StudyType=MS&ResultType=1#DataTabs). --> |
|
92 |
+ |
|
93 |
+## Pre Processing |
|
94 |
+ |
|
95 |
+<!-- This stage of the workflow is pivotal, as the decisions made here fundamentally influence the final statistical results. This phase is methodically segmented into three steps: Missing Value Imputation, Normalization, and Outlier Detection. --> |
|
96 |
+ |
|
97 |
+### Missing Value Imputation |
|
98 |
+ |
|
99 |
+<!-- In metabolomics studies, it's not uncommon for certain features to be undetectable or unquantifiable in some samples, owing to a variety of biological and technical factors [@imputation]. To address this prevalent issue, `POMA` provides a suite of seven distinct imputation methods, each designed to effectively handle missing data. The choice of method can significantly impact the subsequent analysis, so it's crucial to select the one that aligns best with the specific characteristics and requirements of your dataset. To perform imputation, simply execute the following code: --> |
|
100 |
+ |
|
101 |
+```{r} |
|
102 |
+imputed <- st000336 %>% |
|
103 |
+ PomaImpute(method = "knn", zeros_as_na = TRUE, remove_na = TRUE, cutoff = 20) |
|
104 |
+ |
|
105 |
+imputed |
|
106 |
+``` |
|
107 |
+ |
|
108 |
+### Normalization |
|
109 |
+ |
|
110 |
+<!-- Following missing value imputation, the next crucial step in the process is Data Normalization. In mass spectrometry (MS) data, various factors can introduce significant variability that profoundly impacts the final statistical outcomes. Normalization is thus an essential step, as it corrects for these variations, ensuring that the data can be compared more reliably across. --> |
|
111 |
+ |
|
112 |
+```{r} |
|
113 |
+normalized <- imputed %>% |
|
114 |
+ PomaNorm(method = "log_pareto") |
|
115 |
+ |
|
116 |
+normalized |
|
117 |
+``` |
|
118 |
+ |
|
119 |
+#### Normalization effect |
|
120 |
+ |
|
121 |
+<!-- `PomaBoxplots` generates boxplots for all samples or features of a `SummarizedExperiment` object. Here, we can compare objects before and after normalization step. --> |
|
122 |
+ |
|
123 |
+```{r, message = FALSE} |
|
124 |
+PomaBoxplots(imputed, |
|
125 |
+ x = "samples", |
|
126 |
+ legend_position = "none") + |
|
127 |
+ ggplot2::ggtitle("Not Normalized") # data before normalization |
|
128 |
+``` |
|
129 |
+ |
|
130 |
+```{r, message = FALSE} |
|
131 |
+PomaBoxplots(normalized, |
|
132 |
+ x = "samples", |
|
133 |
+ legend_position = "none") + |
|
134 |
+ ggplot2::ggtitle("Normalized") # data after normalization |
|
135 |
+``` |
|
136 |
+ |
|
137 |
+<!-- On the other hand, `PomaDensity` shows the distribution of all features before and after the normalization process. --> |
|
138 |
+ |
|
139 |
+```{r, message = FALSE} |
|
140 |
+PomaDensity(imputed, |
|
141 |
+ x = "features") + |
|
142 |
+ ggplot2::ggtitle("Not Normalized") # data before normalization |
|
143 |
+``` |
|
144 |
+ |
|
145 |
+```{r, message = FALSE} |
|
146 |
+PomaDensity(normalized, |
|
147 |
+ x = "features") + |
|
148 |
+ ggplot2::ggtitle("Normalized") # data after normalization |
|
149 |
+``` |
|
150 |
+ |
|
151 |
+### Outlier Detection |
|
152 |
+ |
|
153 |
+<!-- Finally, the last step of this block is the Outlier Detection. Outlers are defined as observations that are not concordant with those of the vast majority of the remaining data points. These values can have an enormous influence on the resultant statistical analysis, being a dangerous ground for all required assumptions in the most commonly applied parametric tests in mass spectrometry as well as for all also required assumptions in many regression techniques and predictive modeling approaches. **POMA** allows the analysis of outliers as well as the possibility to remove them from the analysis using different modulable parameters. --> |
|
154 |
+ |
|
155 |
+<!-- Analyze and remove outliers running the following two lines of code. --> |
|
156 |
+ |
|
157 |
+```{r} |
|
158 |
+PomaOutliers(normalized)$polygon_plot |
|
159 |
+pre_processed <- PomaOutliers(normalized)$data |
|
160 |
+pre_processed |
|
161 |
+``` |
|
162 |
+ |
|
163 |
+## Statistical Analysis |
|
164 |
+ |
|
165 |
+<!-- Once the data have been pre-processed, you can start with the statistical analysis step! **POMA** offers many different statistical methods and possible combinations to compute. However, in this vignette we will comment only some of the most used. --> |
|
166 |
+ |
|
167 |
+### Univariate Analysis |
|
168 |
+ |
|
169 |
+<!-- `PomaUnivariate` computes four univariate methods (ttest, ANOVA and ANCOVA, Wilcoxon test and Kruskal-Wallis Rank Sum Test) that you can perform changing only the "method" argument. --> |
|
170 |
+ |
|
171 |
+#### T-test |
|
172 |
+ |
|
173 |
+```{r} |
|
174 |
+PomaUnivariate(pre_processed, method = "ttest") |
|
175 |
+``` |
|
176 |
+ |
|
177 |
+<!-- You can also compute a volcano plot using the T-test results. --> |
|
178 |
+ |
|
179 |
+```{r} |
|
180 |
+PomaVolcano(imputed, pval = "adjusted", labels = TRUE) |
|
181 |
+``` |
|
182 |
+ |
|
183 |
+#### Wilcoxon Test |
|
184 |
+ |
|
185 |
+```{r, warning = FALSE} |
|
186 |
+PomaUnivariate(pre_processed, method = "mann") |
|
187 |
+``` |
|
188 |
+ |
|
189 |
+<!-- ### Limma --> |
|
190 |
+ |
|
191 |
+<!-- Other of the wide used statistical methods in many different omics, such as epigenomics or transcriptomics, is **limma** [@limma]. **POMA** provides an easy use implementation of _limma_ you only have to specify the desired contrast to compute. --> |
|
192 |
+ |
|
193 |
+```{r} |
|
194 |
+# PomaLimma(pre_processed, contrast = "Controls-DMD", adjust = "fdr") |
|
195 |
+``` |
|
196 |
+ |
|
197 |
+<!-- ### Multivariate Analysis --> |
|
198 |
+ |
|
199 |
+<!-- On the other hand, multivariate analysis implemented in **POMA** is quite similar to the univariate approaches. `PomaMultivariate` allows users to compute a PCA, PLS-DA or sPLS-DA by changing only the "method" parameter. This function is based on **mixOmics** package [@mixOmics]. --> |
|
200 |
+ |
|
201 |
+<!-- #### Principal Component Analysis --> |
|
202 |
+ |
|
203 |
+```{r} |
|
204 |
+# poma_pca <- PomaMultivariate(pre_processed, method = "pca") |
|
205 |
+``` |
|
206 |
+ |
|
207 |
+```{r} |
|
208 |
+# poma_pca$scoresplot + |
|
209 |
+# ggplot2::ggtitle("Scores Plot") |
|
210 |
+``` |
|
211 |
+ |
|
212 |
+<!-- #### PLS-DA --> |
|
213 |
+ |
|
214 |
+```{r, warning = FALSE, message = FALSE, results = 'hide'} |
|
215 |
+# poma_plsda <- PomaMultivariate(pre_processed, method = "plsda") |
|
216 |
+``` |
|
217 |
+ |
|
218 |
+```{r} |
|
219 |
+# poma_plsda$scoresplot + |
|
220 |
+# ggplot2::ggtitle("Scores Plot") |
|
221 |
+``` |
|
222 |
+ |
|
223 |
+```{r} |
|
224 |
+# poma_plsda$errors_plsda_plot + |
|
225 |
+# ggplot2::ggtitle("Error Plot") |
|
226 |
+``` |
|
227 |
+ |
|
228 |
+<!-- ### Correlation Analysis --> |
|
229 |
+ |
|
230 |
+<!-- Often, correlation analysis is used to explore and discover relationships and patterns within our data. `PomaCorr` provides a flexible and easy way to do that providing a table with all pairwise coorelations in the data, a correlogram and a correlation graph. --> |
|
231 |
+ |
|
232 |
+```{r} |
|
233 |
+# poma_cor <- PomaCorr(pre_processed, label_size = 8, coeff = 0.6) |
|
234 |
+# poma_cor$correlations |
|
235 |
+# poma_cor$corrplot |
|
236 |
+# poma_cor$graph |
|
237 |
+``` |
|
238 |
+ |
|
239 |
+<!-- Alternatively, if you switch the "corr_type" parameter to "glasso", this function will compute a **Gaussian Graphical Model** using the **glmnet** package [@glasso]. --> |
|
240 |
+ |
|
241 |
+```{r} |
|
242 |
+# PomaCorr(pre_processed, corr_type = "glasso", coeff = 0.6)$graph |
|
243 |
+``` |
|
244 |
+ |
|
245 |
+<!-- ### Lasso, Ridge and Elasticnet --> |
|
246 |
+ |
|
247 |
+<!-- **POMA** also provides a function to perform a Lasso, Ridge and Elasticnet regression for binary outcomes in a very intuitive and easy way. `PomaLasso` is based on **glmnet** package [@glmnet]. This function allows you to create a test subset in your data, evaluate the prediction of your models and export the model computed (it could be useful to perform prediction models with MS data). If "ntest" parameter is set to NULL, `PomaLasso` will use all observations to create the model (useful for feature selection). --> |
|
248 |
+ |
|
249 |
+```{r} |
|
250 |
+# alpha = 1 for Lasso |
|
251 |
+# PomaLasso(pre_processed, alpha = 1, labels = TRUE)$coefficientPlot |
|
252 |
+``` |
|
253 |
+ |
|
254 |
+<!-- ### Random Forest --> |
|
255 |
+ |
|
256 |
+<!-- Finally, the random forest algorithm is also implemented in **POMA**. `PomaRandForest` uses the **randomForest** package [@randomForest] to facilitate the implementation of the algorithm and creates automatically both test and train sets to compute and evaluate the resultant models. --> |
|
257 |
+ |
|
258 |
+```{r} |
|
259 |
+# poma_rf <- PomaRandForest(pre_processed, ntest = 10, nvar = 10) |
|
260 |
+# poma_rf$error_tree |
|
261 |
+``` |
|
262 |
+ |
|
263 |
+<!-- Resultant random forest model confusion matrix for **test** set: --> |
|
264 |
+ |
|
265 |
+```{r} |
|
266 |
+# poma_rf$confusionMatrix$table |
|
267 |
+``` |
|
268 |
+ |
|
269 |
+<!-- Gini index plot for the top 10 predictors: --> |
|
270 |
+ |
|
271 |
+```{r} |
|
272 |
+# poma_rf$MeanDecreaseGini_plot |
|
273 |
+``` |
|
274 |
+ |
|
275 |
+# Session Information |
|
276 |
+ |
|
277 |
+```{r} |
|
278 |
+sessionInfo() |
|
279 |
+``` |
|
280 |
+ |
|
281 |
+# References |
|
282 |
+ |