Bioconductor Code: POMA

Browse code

lasso

pcastellanoescuder authored on 17/09/2023 23:42:04
Showing 8 changed files

DESCRIPTION index 2f21a26..566a327 100644
NEWS.md index 9cab8ac..55f1695 100644
R/PomaLasso.R index 88678d5..47669ba 100644
R/PomaUnivariate.R index e59d996..35fee3f 100644
README.Rmd index 27b504a..45d61b2 100644
README.md index 48c1ace..b738973 100644
man/PomaLasso.Rd index 3cc470e..d30335b 100644
man/PomaOddsRatio.Rd index 2f05268..4993af6 100644

History View file @ a07ba46

@@ -1,6 +1,6 @@
                      Package: POMA
                      Title: Tools for Omics Data Analysis
                     -Version: 1.8.32
                     +Version: 1.8.33
                      Authors@R:
                          c(person(given = "Pol",
                                   family = "Castellano-Escuder",

NEWS.md

History View file @ a07ba46

@@ -1,4 +1,4 @@
                     -# POMA 1.8.32
                     +# POMA 1.8.33
                      * New POMA theme and colorblind-friendly palette
                      * Available sample normalization (sum and quantile)

R/PomaLasso.R

History View file @ a07ba46

@@ -1,18 +1,18 @@
                     -#' Lasso, Ridge and Elasticnet Regularized Generalized Linear Models for Binary Outcomes
                     +#' Lasso, Ridge, and Elasticnet Regularized Generalized Linear Models for Binary Outcomes
                      #'
                     -#' @description PomaLasso() is an implementation of the lasso, ridge and elasticnet regression from `glmnet` package for binary outcomes.
                     +#' @description `PomaLasso` performs LASSO, Ridge, and Elasticnet regression for feature selection and prediction purposes for binary outcomes.
                      #'
                     -#' @param data A SummarizedExperiment object.
                     -#' @param alpha Elasticnet mixing parameter. alpha = 1 is the lasso penalty and alpha = 0 is the ridge penalty. This value must be between 0 and 1.
                     -#' @param ntest Numeric indicating the percentage of observations that will be used as test set. Default is NULL (no test set).
                     -#' @param nfolds Number of folds for CV (default is 10). Although nfolds can be as large as the sample size (leave-one-out CV), it is not recommended for large datasets. Smallest value allowable is nfolds = 3.
                     -#' @param lambda A user supplied lambda sequence. Typical usage is to have the program compute its own lambda sequence based on `nlambda` and `lambda.min.ratio`. See `?glmnet::glmnet()`.
                     -#' @param labels Logical indicating if feature names should be plotted in coefficient plot or not. Default is FALSE.
                     +#' @param data A `SummarizedExperiment` object.
                     +#' @param alpha Numeric. Indicates the elasticnet mixing parameter. alpha = 1 is the LASSO penalty and alpha = 0 is the Ridge penalty.
                     +#' @param ntest Numeric. Indicates the percentage of observations that will be used as test set. Default is NULL (no test set).
                     +#' @param nfolds Numeric. Indicates number of folds for cross-validation (default is 10). Although nfolds can be as large as the sample size (leave-one-out CV), it is not recommended for large datasets. Smallest value allowable is nfolds = 3.
                     +#' @param lambda Numeric. Indicates the user supplied lambda sequence. Typical usage is to have the program compute its own lambda sequence based on `nlambda` and `lambda.min.ratio`. See `?glmnet::glmnet()`.
                     +#' @param labels Logical. Indicates if feature names should be plotted in coefficient plot or not. Default is FALSE.
                      #'
                      #' @export
                      #'
                     -#' @return A list with all results including plots, tables and the resulting prediction model.
                     +#' @return A `list` with results.
                      #' @references Jerome Friedman, Trevor Hastie, Robert Tibshirani (2010). Regularization Paths for Generalized Linear Models via Coordinate Descent. Journal of Statistical Software, 33(1), 1-22. URL http://www.jstatsoft.org/v33/i01/.
                      #' @author Pol Castellano-Escuder
                      #'
@@ -25,21 +25,18 @@
                      #' st000336 %>%
                      #'   PomaImpute() %>%
                      #'   PomaNorm() %>%
                     -#'   PomaOutliers() %>%
                      #'   PomaLasso()
                      #'
                      #' # elasticnet
                      #' st000336 %>%
                      #'   PomaImpute() %>%
                      #'   PomaNorm() %>%
                     -#'   PomaOutliers() %>%
                      #'   PomaLasso(alpha = 0.5)
                      #'
                      #' # ridge
                      #' st000336 %>%
                      #'   PomaImpute() %>%
                      #'   PomaNorm() %>%
                     -#'   PomaOutliers() %>%
                      #'   PomaLasso(alpha = 0)
                      PomaLasso <- function(data,
                                            alpha = 1,
@@ -48,44 +45,37 @@ PomaLasso <- function(data,
                                            lambda = NULL,
                                            labels = FALSE){
                     -  if (missing(data)) {
                     -    stop("data argument is empty!")
                     -  }
                     -  if(!is(data, "SummarizedExperiment")){
                     -    stop("data is not a SummarizedExperiment object. \nSee POMA::PomaSummarizedExperiment or SummarizedExperiment::SummarizedExperiment")
                     +  if (!is(data, "SummarizedExperiment")){
                     +    stop("data is not a SummarizedExperiment object. \nSee POMA::PomaCreateObject or SummarizedExperiment::SummarizedExperiment")
+                       }
                        if (alpha > 1 | alpha < 0) {
                     -    stop("alpha must be a number between 0 and 1...")
                     +    stop("alpha must be a number between 0 and 1")
+                       }
                        if(!is.null(ntest)){
                          if (ntest > 50 | ntest < 5) {
                     -      stop("ntest must be a number between 5 and 50...")
                     +      stop("ntest must be a number between 5 and 50 (%)")
+                         }
+                       }
                     -  if (length(levels(as.factor(SummarizedExperiment::colData(data)[,1]))) > 2) {
                     -    stop("Your data have more than two groups!")
                     -  }
                     -  if (length(levels(as.factor(SummarizedExperiment::colData(data)[,1]))) < 2) {
                     -    stop("Your data have less than two groups!")
                     -  }
+                    -
+                    +
                        features <- t(SummarizedExperiment::assay(data))
                     -  response <- as.factor(SummarizedExperiment::colData(data)[,1])
                     -  lasso_data <- cbind(response, features)
+                    -
                     -  n <- nrow(lasso_data)
                     +  group_factor <- as.factor(SummarizedExperiment::colData(data)[,1])
                     +  to_lasso <- cbind(group_factor, features)
+                    +
                     +  if (length(table(group_factor)[table(group_factor) != 0]) != 2) {
                     +    stop("Grouping factor must have exactly 2 levels (first column of the metadata file)")
                     +  }
                     -  if(!is.null(ntest)){
                     +  if (!is.null(ntest)){
                          repeat {
                     -      idx_test <- sample(1:n, (ntest/100)*n, replace = FALSE)
                     +      idx_test <- sample(1:nrow(to_lasso), (ntest/100) * nrow(to_lasso), replace = FALSE)
                     -      test <- lasso_data[idx_test ,]
                     +      test <- to_lasso[idx_test ,]
                            test_x <- test[,-1]
                            test_y <- test[,1]
                     -      train <- lasso_data[-idx_test ,]
                     +      train <- to_lasso[-idx_test ,]
                            train_x <- train[,-1]
                            train_y <- train[,1]
@@ -103,7 +93,7 @@ PomaLasso <- function(data,
                        } else {
                          cv_fit <- glmnet::cv.glmnet(features,
                     -                                response,
                     +                                group_factor,
                                                      family = "binomial",
                                                      nfolds = nfolds,
                                                      lambda = lambda,
@@ -120,13 +110,13 @@ PomaLasso <- function(data,
                          ggplot2::labs(x = "log10(Lambda)",
                                        y = "Estimate") +
                          ggplot2::geom_vline(xintercept = glance_cv$lambda.min, lty = 2) +
                     -    ggplot2::theme_bw()
                     +    theme_poma()
                        tmp_coeffs <- glmnet::coef.glmnet(cv_fit, s = "lambda.min")
                        final_coef <- data.frame(feature = tmp_coeffs@Dimnames[[1]][tmp_coeffs@i + 1], coefficient = tmp_coeffs@x) %>%
                          dplyr::as_tibble()
                     -  if(!is.null(ntest)){
                     +  if (!is.null(ntest)){
                          lasso_pred <- predict(cv_fit, s = cv_fit$lambda.min, newx = data.matrix(test_x), type = "class")
                          cm <- caret::confusionMatrix(as.factor(lasso_pred), as.factor(test_y))
+                       }
@@ -145,14 +135,14 @@ PomaLasso <- function(data,
                          ggplot2::geom_vline(xintercept = glance_cv$lambda.min, lty = 2) +
                          ggplot2::theme_bw() +
                          {if(labels)ggplot2::geom_label(data = tidied_cv2_names, ggplot2::aes(label = term))} +
                     -    ggplot2::theme(legend.position = "none") +
                     -    ggplot2::scale_color_viridis_d(option = "plasma", end = 0.8)
                     +    theme_poma(legend_position = "none") +
                     +    scale_color_poma_d()
                        if(!is.null(ntest)){
                          return(list(coefficients = final_coef,
                     -                coefficientPlot = coefficientplot,
                     -                cvLassoPlot = cvlasso,
                     -                confusionMatrix = cm,
                     +                coefficients_plot = coefficientplot,
                     +                cv_plot = cvlasso,
                     +                confusion_matrix = cm,
                                      train_x = train_x,
                                      train_y = train_y,
                                      test_x = test_x,
@@ -160,10 +150,9 @@ PomaLasso <- function(data,
                                      model = cv_fit))
                        } else {
                          return(list(coefficients = final_coef,
                     -                coefficientPlot = coefficientplot,
                     -                cvLassoPlot = cvlasso,
                     +                coefficients_plot = coefficientplot,
                     +                cv_plot = cvlasso,
                                      model = cv_fit))
+                       }
+                    -
+                     }

R/PomaUnivariate.R

History View file @ a07ba46

@@ -75,7 +75,7 @@ PomaUnivariate <- function(data,
                        group_factor <- SummarizedExperiment::colData(data)[,1]
                        to_univariate <- t(SummarizedExperiment::assay(data))
                     -  # group mean and sd
                     +  # group mean and SD
                        group_means <- to_univariate %>%
                          as.data.frame() %>%
                          dplyr::mutate(group = group_factor) %>%

README.Rmd

History View file @ a07ba46

@@ -16,7 +16,7 @@ output: github_document
                      | _BioC_ branch 	| Status 	| Version 	| Dependencies 	| Rank 	|
                      |-	|-	|-	|-	|-	|
                      | [Release](http://bioconductor.org/packages/release/bioc/html/POMA.html) 	| [![Bioc release status](https://bioconductor.org/shields/build/release/bioc/POMA.svg)](https://bioconductor.org/checkResults/release/bioc-LATEST/POMA/) 	| [![BioC released version](https://img.shields.io/badge/release%20version-1.6.0-blue.svg)](https://www.bioconductor.org/packages/POMA) 	| [![Dependencies](http://bioconductor.org/shields/dependencies/release/POMA.svg)](http://bioconductor.org/packages/release/bioc/html/POMA.html#since) 	| [![Rank](http://www.bioconductor.org/shields/downloads/release/POMA.svg)](https://bioconductor.org/packages/stats/bioc/POMA) 	|
                     -| [Devel](http://bioconductor.org/packages/devel/bioc/html/POMA.html) 	| [![Bioc devel status](https://bioconductor.org/shields/build/devel/bioc/POMA.svg)](https://bioconductor.org/checkResults/devel/bioc-LATEST/POMA/) 	| [![BioC devel version](https://img.shields.io/badge/devel%20version-1.8.32-blue.svg)](https://bioconductor.org/packages/devel/bioc/html/POMA.html) 	| [![Dependencies](http://bioconductor.org/shields/dependencies/devel/POMA.svg)](http://bioconductor.org/packages/devel/bioc/html/POMA.html#since) 	| [![Rank](http://www.bioconductor.org/shields/downloads/devel/POMA.svg)](https://bioconductor.org/packages/stats/bioc/POMA) 	|
                     +| [Devel](http://bioconductor.org/packages/devel/bioc/html/POMA.html) 	| [![Bioc devel status](https://bioconductor.org/shields/build/devel/bioc/POMA.svg)](https://bioconductor.org/checkResults/devel/bioc-LATEST/POMA/) 	| [![BioC devel version](https://img.shields.io/badge/devel%20version-1.8.33-blue.svg)](https://bioconductor.org/packages/devel/bioc/html/POMA.html) 	| [![Dependencies](http://bioconductor.org/shields/dependencies/devel/POMA.svg)](http://bioconductor.org/packages/devel/bioc/html/POMA.html#since) 	| [![Rank](http://www.bioconductor.org/shields/downloads/devel/POMA.svg)](https://bioconductor.org/packages/stats/bioc/POMA) 	|
                        <!-- badges: end -->

README.md

History View file @ a07ba46

@@ -18,7 +18,7 @@ v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/li
                      | *BioC* branch                                                           | Status                                                                                                                                                  | Version                                                                                                                                            | Dependencies                                                                                                                                         | Rank                                                                                                                         |
                      |-------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------|
                      | [Release](http://bioconductor.org/packages/release/bioc/html/POMA.html) | [![Bioc release status](https://bioconductor.org/shields/build/release/bioc/POMA.svg)](https://bioconductor.org/checkResults/release/bioc-LATEST/POMA/) | [![BioC released version](https://img.shields.io/badge/release%20version-1.6.0-blue.svg)](https://www.bioconductor.org/packages/POMA)              | [![Dependencies](http://bioconductor.org/shields/dependencies/release/POMA.svg)](http://bioconductor.org/packages/release/bioc/html/POMA.html#since) | [![Rank](http://www.bioconductor.org/shields/downloads/release/POMA.svg)](https://bioconductor.org/packages/stats/bioc/POMA) |
                     -| [Devel](http://bioconductor.org/packages/devel/bioc/html/POMA.html)     | [![Bioc devel status](https://bioconductor.org/shields/build/devel/bioc/POMA.svg)](https://bioconductor.org/checkResults/devel/bioc-LATEST/POMA/)       | [![BioC devel version](https://img.shields.io/badge/devel%20version-1.8.32-blue.svg)](https://bioconductor.org/packages/devel/bioc/html/POMA.html) | [![Dependencies](http://bioconductor.org/shields/dependencies/devel/POMA.svg)](http://bioconductor.org/packages/devel/bioc/html/POMA.html#since)     | [![Rank](http://www.bioconductor.org/shields/downloads/devel/POMA.svg)](https://bioconductor.org/packages/stats/bioc/POMA)   |
                     +| [Devel](http://bioconductor.org/packages/devel/bioc/html/POMA.html)     | [![Bioc devel status](https://bioconductor.org/shields/build/devel/bioc/POMA.svg)](https://bioconductor.org/checkResults/devel/bioc-LATEST/POMA/)       | [![BioC devel version](https://img.shields.io/badge/devel%20version-1.8.33-blue.svg)](https://bioconductor.org/packages/devel/bioc/html/POMA.html) | [![Dependencies](http://bioconductor.org/shields/dependencies/devel/POMA.svg)](http://bioconductor.org/packages/devel/bioc/html/POMA.html#since)     | [![Rank](http://www.bioconductor.org/shields/downloads/devel/POMA.svg)](https://bioconductor.org/packages/stats/bioc/POMA)   |
                      <!-- badges: end -->

man/PomaLasso.Rd

History View file @ a07ba46

@@ -2,7 +2,7 @@
                      % Please edit documentation in R/PomaLasso.R
                      \name{PomaLasso}
                      \alias{PomaLasso}
                     -\title{Lasso, Ridge and Elasticnet Regularized Generalized Linear Models for Binary Outcomes}
                     +\title{Lasso, Ridge, and Elasticnet Regularized Generalized Linear Models for Binary Outcomes}
                      \usage{
                      PomaLasso(
                        data,
@@ -14,23 +14,23 @@ PomaLasso(
+                     )
+                     }
                      \arguments{
                     -\item{data}{A SummarizedExperiment object.}
                     +\item{data}{A \code{SummarizedExperiment} object.}
                     -\item{alpha}{Elasticnet mixing parameter. alpha = 1 is the lasso penalty and alpha = 0 is the ridge penalty. This value must be between 0 and 1.}
                     +\item{alpha}{Numeric. Indicates the elasticnet mixing parameter. alpha = 1 is the LASSO penalty and alpha = 0 is the Ridge penalty.}
                     -\item{ntest}{Numeric indicating the percentage of observations that will be used as test set. Default is NULL (no test set).}
                     +\item{ntest}{Numeric. Indicates the percentage of observations that will be used as test set. Default is NULL (no test set).}
                     -\item{nfolds}{Number of folds for CV (default is 10). Although nfolds can be as large as the sample size (leave-one-out CV), it is not recommended for large datasets. Smallest value allowable is nfolds = 3.}
                     +\item{nfolds}{Numeric. Indicates number of folds for cross-validation (default is 10). Although nfolds can be as large as the sample size (leave-one-out CV), it is not recommended for large datasets. Smallest value allowable is nfolds = 3.}
                     -\item{lambda}{A user supplied lambda sequence. Typical usage is to have the program compute its own lambda sequence based on \code{nlambda} and \code{lambda.min.ratio}. See \code{?glmnet::glmnet()}.}
                     +\item{lambda}{Numeric. Indicates the user supplied lambda sequence. Typical usage is to have the program compute its own lambda sequence based on \code{nlambda} and \code{lambda.min.ratio}. See \code{?glmnet::glmnet()}.}
                     -\item{labels}{Logical indicating if feature names should be plotted in coefficient plot or not. Default is FALSE.}
                     +\item{labels}{Logical. Indicates if feature names should be plotted in coefficient plot or not. Default is FALSE.}
+                     }
                      \value{
                     -A list with all results including plots, tables and the resulting prediction model.
                     +A \code{list} with results.
+                     }
                      \description{
                     -PomaLasso() is an implementation of the lasso, ridge and elasticnet regression from \code{glmnet} package for binary outcomes.
                     +\code{PomaLasso} performs LASSO, Ridge, and Elasticnet regression for feature selection and prediction purposes for binary outcomes.
+                     }
                      \examples{
                      data("st000336")
@@ -39,21 +39,18 @@ data("st000336")
                      st000336 \%>\%
                        PomaImpute() \%>\%
                        PomaNorm() \%>\%
                     -  PomaOutliers() \%>\%
                        PomaLasso()
                      # elasticnet
                      st000336 \%>\%
                        PomaImpute() \%>\%
                        PomaNorm() \%>\%
                     -  PomaOutliers() \%>\%
                        PomaLasso(alpha = 0.5)
                      # ridge
                      st000336 \%>\%
                        PomaImpute() \%>\%
                        PomaNorm() \%>\%
                     -  PomaOutliers() \%>\%
                        PomaLasso(alpha = 0)
+                     }
                      \references{

man/PomaOddsRatio.Rd

History View file @ a07ba46

@@ -13,7 +13,7 @@ PomaOddsRatio(
 )
 }
 \arguments{
-\item{data}{A SummarizedExperiment object.}
+\item{data}{A \code{SummarizedExperiment} object.}
 
 \item{feature_name}{A vector with the name/s of feature/s that will be used to fit the model. If it's NULL (default), all variables will be included in the model.}
 

...	...	@@ -13,7 +13,7 @@ PomaOddsRatio(
13	13	)
14	14	}
15	15	\arguments{
16		-\item{data}{A SummarizedExperiment object.}
	16	+\item{data}{A \code{SummarizedExperiment} object.}
17	17
18	18	\item{feature_name}{A vector with the name/s of feature/s that will be used to fit the model. If it's NULL (default), all variables will be included in the model.}
19	19