pcastellanoescuder authored on 17/09/2023 23:42:04
Showing 8 changed files

... ...
@@ -1,6 +1,6 @@
1 1
 Package: POMA
2 2
 Title: Tools for Omics Data Analysis   
3
-Version: 1.8.32
3
+Version: 1.8.33
4 4
 Authors@R: 
5 5
     c(person(given = "Pol",
6 6
              family = "Castellano-Escuder",
... ...
@@ -1,4 +1,4 @@
1
-# POMA 1.8.32
1
+# POMA 1.8.33
2 2
 
3 3
 * New POMA theme and colorblind-friendly palette
4 4
 * Available sample normalization (sum and quantile)
... ...
@@ -1,18 +1,18 @@
1 1
 
2
-#' Lasso, Ridge and Elasticnet Regularized Generalized Linear Models for Binary Outcomes
2
+#' Lasso, Ridge, and Elasticnet Regularized Generalized Linear Models for Binary Outcomes
3 3
 #'
4
-#' @description PomaLasso() is an implementation of the lasso, ridge and elasticnet regression from `glmnet` package for binary outcomes.
4
+#' @description `PomaLasso` performs LASSO, Ridge, and Elasticnet regression for feature selection and prediction purposes for binary outcomes.
5 5
 #'
6
-#' @param data A SummarizedExperiment object.
7
-#' @param alpha Elasticnet mixing parameter. alpha = 1 is the lasso penalty and alpha = 0 is the ridge penalty. This value must be between 0 and 1.
8
-#' @param ntest Numeric indicating the percentage of observations that will be used as test set. Default is NULL (no test set).
9
-#' @param nfolds Number of folds for CV (default is 10). Although nfolds can be as large as the sample size (leave-one-out CV), it is not recommended for large datasets. Smallest value allowable is nfolds = 3.
10
-#' @param lambda A user supplied lambda sequence. Typical usage is to have the program compute its own lambda sequence based on `nlambda` and `lambda.min.ratio`. See `?glmnet::glmnet()`.
11
-#' @param labels Logical indicating if feature names should be plotted in coefficient plot or not. Default is FALSE.
6
+#' @param data A `SummarizedExperiment` object.
7
+#' @param alpha Numeric. Indicates the elasticnet mixing parameter. alpha = 1 is the LASSO penalty and alpha = 0 is the Ridge penalty.
8
+#' @param ntest Numeric. Indicates the percentage of observations that will be used as test set. Default is NULL (no test set).
9
+#' @param nfolds Numeric. Indicates number of folds for cross-validation (default is 10). Although nfolds can be as large as the sample size (leave-one-out CV), it is not recommended for large datasets. Smallest value allowable is nfolds = 3.
10
+#' @param lambda Numeric. Indicates the user supplied lambda sequence. Typical usage is to have the program compute its own lambda sequence based on `nlambda` and `lambda.min.ratio`. See `?glmnet::glmnet()`.
11
+#' @param labels Logical. Indicates if feature names should be plotted in coefficient plot or not. Default is FALSE.
12 12
 #' 
13 13
 #' @export
14 14
 #'
15
-#' @return A list with all results including plots, tables and the resulting prediction model.
15
+#' @return A `list` with results.
16 16
 #' @references Jerome Friedman, Trevor Hastie, Robert Tibshirani (2010). Regularization Paths for Generalized Linear Models via Coordinate Descent. Journal of Statistical Software, 33(1), 1-22. URL http://www.jstatsoft.org/v33/i01/.
17 17
 #' @author Pol Castellano-Escuder
18 18
 #'
... ...
@@ -25,21 +25,18 @@
25 25
 #' st000336 %>%
26 26
 #'   PomaImpute() %>%
27 27
 #'   PomaNorm() %>%
28
-#'   PomaOutliers() %>%
29 28
 #'   PomaLasso()
30 29
 #' 
31 30
 #' # elasticnet
32 31
 #' st000336 %>%
33 32
 #'   PomaImpute() %>%
34 33
 #'   PomaNorm() %>%
35
-#'   PomaOutliers() %>%
36 34
 #'   PomaLasso(alpha = 0.5)
37 35
 #' 
38 36
 #' # ridge
39 37
 #' st000336 %>%
40 38
 #'   PomaImpute() %>%
41 39
 #'   PomaNorm() %>%
42
-#'   PomaOutliers() %>%
43 40
 #'   PomaLasso(alpha = 0)
44 41
 PomaLasso <- function(data,
45 42
                       alpha = 1,
... ...
@@ -48,44 +45,37 @@ PomaLasso <- function(data,
48 45
                       lambda = NULL,
49 46
                       labels = FALSE){
50 47
 
51
-  if (missing(data)) {
52
-    stop("data argument is empty!")
53
-  }
54
-  if(!is(data, "SummarizedExperiment")){
55
-    stop("data is not a SummarizedExperiment object. \nSee POMA::PomaSummarizedExperiment or SummarizedExperiment::SummarizedExperiment")
48
+  if (!is(data, "SummarizedExperiment")){
49
+    stop("data is not a SummarizedExperiment object. \nSee POMA::PomaCreateObject or SummarizedExperiment::SummarizedExperiment")
56 50
   }
57 51
   if (alpha > 1 | alpha < 0) {
58
-    stop("alpha must be a number between 0 and 1...")
52
+    stop("alpha must be a number between 0 and 1")
59 53
   }
60 54
   if(!is.null(ntest)){
61 55
     if (ntest > 50 | ntest < 5) {
62
-      stop("ntest must be a number between 5 and 50...")
56
+      stop("ntest must be a number between 5 and 50 (%)")
63 57
     }
64 58
   }
65
-  if (length(levels(as.factor(SummarizedExperiment::colData(data)[,1]))) > 2) {
66
-    stop("Your data have more than two groups!")
67
-  }
68
-  if (length(levels(as.factor(SummarizedExperiment::colData(data)[,1]))) < 2) {
69
-    stop("Your data have less than two groups!")
70
-  }
71
-
59
+  
72 60
   features <- t(SummarizedExperiment::assay(data))
73
-  response <- as.factor(SummarizedExperiment::colData(data)[,1])
74
-  lasso_data <- cbind(response, features)
75
-
76
-  n <- nrow(lasso_data)
61
+  group_factor <- as.factor(SummarizedExperiment::colData(data)[,1])
62
+  to_lasso <- cbind(group_factor, features)
63
+  
64
+  if (length(table(group_factor)[table(group_factor) != 0]) != 2) {
65
+    stop("Grouping factor must have exactly 2 levels (first column of the metadata file)")
66
+  }
77 67
 
78
-  if(!is.null(ntest)){
68
+  if (!is.null(ntest)){
79 69
     
80 70
     repeat {
81 71
 
82
-      idx_test <- sample(1:n, (ntest/100)*n, replace = FALSE)
72
+      idx_test <- sample(1:nrow(to_lasso), (ntest/100) * nrow(to_lasso), replace = FALSE)
83 73
       
84
-      test <- lasso_data[idx_test ,]
74
+      test <- to_lasso[idx_test ,]
85 75
       test_x <- test[,-1]
86 76
       test_y <- test[,1]
87 77
 
88
-      train <- lasso_data[-idx_test ,]
78
+      train <- to_lasso[-idx_test ,]
89 79
       train_x <- train[,-1]
90 80
       train_y <- train[,1]
91 81
       
... ...
@@ -103,7 +93,7 @@ PomaLasso <- function(data,
103 93
     
104 94
   } else {
105 95
     cv_fit <- glmnet::cv.glmnet(features,
106
-                                response, 
96
+                                group_factor, 
107 97
                                 family = "binomial", 
108 98
                                 nfolds = nfolds, 
109 99
                                 lambda = lambda, 
... ...
@@ -120,13 +110,13 @@ PomaLasso <- function(data,
120 110
     ggplot2::labs(x = "log10(Lambda)",
121 111
                   y = "Estimate") +
122 112
     ggplot2::geom_vline(xintercept = glance_cv$lambda.min, lty = 2) +
123
-    ggplot2::theme_bw()
113
+    theme_poma()
124 114
 
125 115
   tmp_coeffs <- glmnet::coef.glmnet(cv_fit, s = "lambda.min")
126 116
   final_coef <- data.frame(feature = tmp_coeffs@Dimnames[[1]][tmp_coeffs@i + 1], coefficient = tmp_coeffs@x) %>% 
127 117
     dplyr::as_tibble()
128 118
 
129
-  if(!is.null(ntest)){
119
+  if (!is.null(ntest)){
130 120
     lasso_pred <- predict(cv_fit, s = cv_fit$lambda.min, newx = data.matrix(test_x), type = "class")
131 121
     cm <- caret::confusionMatrix(as.factor(lasso_pred), as.factor(test_y))
132 122
   }
... ...
@@ -145,14 +135,14 @@ PomaLasso <- function(data,
145 135
     ggplot2::geom_vline(xintercept = glance_cv$lambda.min, lty = 2) +
146 136
     ggplot2::theme_bw() +
147 137
     {if(labels)ggplot2::geom_label(data = tidied_cv2_names, ggplot2::aes(label = term))} +
148
-    ggplot2::theme(legend.position = "none") +
149
-    ggplot2::scale_color_viridis_d(option = "plasma", end = 0.8)
138
+    theme_poma(legend_position = "none") +
139
+    scale_color_poma_d()
150 140
 
151 141
   if(!is.null(ntest)){
152 142
     return(list(coefficients = final_coef, 
153
-                coefficientPlot = coefficientplot, 
154
-                cvLassoPlot = cvlasso,
155
-                confusionMatrix = cm,
143
+                coefficients_plot = coefficientplot, 
144
+                cv_plot = cvlasso,
145
+                confusion_matrix = cm,
156 146
                 train_x = train_x,
157 147
                 train_y = train_y,
158 148
                 test_x = test_x,
... ...
@@ -160,10 +150,9 @@ PomaLasso <- function(data,
160 150
                 model = cv_fit))
161 151
   } else {
162 152
     return(list(coefficients = final_coef, 
163
-                coefficientPlot = coefficientplot, 
164
-                cvLassoPlot = cvlasso,
153
+                coefficients_plot = coefficientplot, 
154
+                cv_plot = cvlasso,
165 155
                 model = cv_fit))
166 156
   }
167
-
168 157
 }
169 158
 
... ...
@@ -75,7 +75,7 @@ PomaUnivariate <- function(data,
75 75
   group_factor <- SummarizedExperiment::colData(data)[,1]
76 76
   to_univariate <- t(SummarizedExperiment::assay(data))
77 77
 
78
-  # group mean and sd
78
+  # group mean and SD
79 79
   group_means <- to_univariate %>%
80 80
     as.data.frame() %>% 
81 81
     dplyr::mutate(group = group_factor) %>%
... ...
@@ -16,7 +16,7 @@ output: github_document
16 16
 | _BioC_ branch 	| Status 	| Version 	| Dependencies 	| Rank 	|
17 17
 |-	|-	|-	|-	|-	|
18 18
 | [Release](http://bioconductor.org/packages/release/bioc/html/POMA.html) 	| [![Bioc release status](https://bioconductor.org/shields/build/release/bioc/POMA.svg)](https://bioconductor.org/checkResults/release/bioc-LATEST/POMA/) 	| [![BioC released version](https://img.shields.io/badge/release%20version-1.6.0-blue.svg)](https://www.bioconductor.org/packages/POMA) 	| [![Dependencies](http://bioconductor.org/shields/dependencies/release/POMA.svg)](http://bioconductor.org/packages/release/bioc/html/POMA.html#since) 	| [![Rank](http://www.bioconductor.org/shields/downloads/release/POMA.svg)](https://bioconductor.org/packages/stats/bioc/POMA) 	|
19
-| [Devel](http://bioconductor.org/packages/devel/bioc/html/POMA.html) 	| [![Bioc devel status](https://bioconductor.org/shields/build/devel/bioc/POMA.svg)](https://bioconductor.org/checkResults/devel/bioc-LATEST/POMA/) 	| [![BioC devel version](https://img.shields.io/badge/devel%20version-1.8.32-blue.svg)](https://bioconductor.org/packages/devel/bioc/html/POMA.html) 	| [![Dependencies](http://bioconductor.org/shields/dependencies/devel/POMA.svg)](http://bioconductor.org/packages/devel/bioc/html/POMA.html#since) 	| [![Rank](http://www.bioconductor.org/shields/downloads/devel/POMA.svg)](https://bioconductor.org/packages/stats/bioc/POMA) 	|
19
+| [Devel](http://bioconductor.org/packages/devel/bioc/html/POMA.html) 	| [![Bioc devel status](https://bioconductor.org/shields/build/devel/bioc/POMA.svg)](https://bioconductor.org/checkResults/devel/bioc-LATEST/POMA/) 	| [![BioC devel version](https://img.shields.io/badge/devel%20version-1.8.33-blue.svg)](https://bioconductor.org/packages/devel/bioc/html/POMA.html) 	| [![Dependencies](http://bioconductor.org/shields/dependencies/devel/POMA.svg)](http://bioconductor.org/packages/devel/bioc/html/POMA.html#since) 	| [![Rank](http://www.bioconductor.org/shields/downloads/devel/POMA.svg)](https://bioconductor.org/packages/stats/bioc/POMA) 	|
20 20
 
21 21
   <!-- badges: end -->
22 22
 
... ...
@@ -18,7 +18,7 @@ v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/li
18 18
 | *BioC* branch                                                           | Status                                                                                                                                                  | Version                                                                                                                                            | Dependencies                                                                                                                                         | Rank                                                                                                                         |
19 19
 |-------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------|
20 20
 | [Release](http://bioconductor.org/packages/release/bioc/html/POMA.html) | [![Bioc release status](https://bioconductor.org/shields/build/release/bioc/POMA.svg)](https://bioconductor.org/checkResults/release/bioc-LATEST/POMA/) | [![BioC released version](https://img.shields.io/badge/release%20version-1.6.0-blue.svg)](https://www.bioconductor.org/packages/POMA)              | [![Dependencies](http://bioconductor.org/shields/dependencies/release/POMA.svg)](http://bioconductor.org/packages/release/bioc/html/POMA.html#since) | [![Rank](http://www.bioconductor.org/shields/downloads/release/POMA.svg)](https://bioconductor.org/packages/stats/bioc/POMA) |
21
-| [Devel](http://bioconductor.org/packages/devel/bioc/html/POMA.html)     | [![Bioc devel status](https://bioconductor.org/shields/build/devel/bioc/POMA.svg)](https://bioconductor.org/checkResults/devel/bioc-LATEST/POMA/)       | [![BioC devel version](https://img.shields.io/badge/devel%20version-1.8.32-blue.svg)](https://bioconductor.org/packages/devel/bioc/html/POMA.html) | [![Dependencies](http://bioconductor.org/shields/dependencies/devel/POMA.svg)](http://bioconductor.org/packages/devel/bioc/html/POMA.html#since)     | [![Rank](http://www.bioconductor.org/shields/downloads/devel/POMA.svg)](https://bioconductor.org/packages/stats/bioc/POMA)   |
21
+| [Devel](http://bioconductor.org/packages/devel/bioc/html/POMA.html)     | [![Bioc devel status](https://bioconductor.org/shields/build/devel/bioc/POMA.svg)](https://bioconductor.org/checkResults/devel/bioc-LATEST/POMA/)       | [![BioC devel version](https://img.shields.io/badge/devel%20version-1.8.33-blue.svg)](https://bioconductor.org/packages/devel/bioc/html/POMA.html) | [![Dependencies](http://bioconductor.org/shields/dependencies/devel/POMA.svg)](http://bioconductor.org/packages/devel/bioc/html/POMA.html#since)     | [![Rank](http://www.bioconductor.org/shields/downloads/devel/POMA.svg)](https://bioconductor.org/packages/stats/bioc/POMA)   |
22 22
 
23 23
 <!-- badges: end -->
24 24
 
... ...
@@ -2,7 +2,7 @@
2 2
 % Please edit documentation in R/PomaLasso.R
3 3
 \name{PomaLasso}
4 4
 \alias{PomaLasso}
5
-\title{Lasso, Ridge and Elasticnet Regularized Generalized Linear Models for Binary Outcomes}
5
+\title{Lasso, Ridge, and Elasticnet Regularized Generalized Linear Models for Binary Outcomes}
6 6
 \usage{
7 7
 PomaLasso(
8 8
   data,
... ...
@@ -14,23 +14,23 @@ PomaLasso(
14 14
 )
15 15
 }
16 16
 \arguments{
17
-\item{data}{A SummarizedExperiment object.}
17
+\item{data}{A \code{SummarizedExperiment} object.}
18 18
 
19
-\item{alpha}{Elasticnet mixing parameter. alpha = 1 is the lasso penalty and alpha = 0 is the ridge penalty. This value must be between 0 and 1.}
19
+\item{alpha}{Numeric. Indicates the elasticnet mixing parameter. alpha = 1 is the LASSO penalty and alpha = 0 is the Ridge penalty.}
20 20
 
21
-\item{ntest}{Numeric indicating the percentage of observations that will be used as test set. Default is NULL (no test set).}
21
+\item{ntest}{Numeric. Indicates the percentage of observations that will be used as test set. Default is NULL (no test set).}
22 22
 
23
-\item{nfolds}{Number of folds for CV (default is 10). Although nfolds can be as large as the sample size (leave-one-out CV), it is not recommended for large datasets. Smallest value allowable is nfolds = 3.}
23
+\item{nfolds}{Numeric. Indicates number of folds for cross-validation (default is 10). Although nfolds can be as large as the sample size (leave-one-out CV), it is not recommended for large datasets. Smallest value allowable is nfolds = 3.}
24 24
 
25
-\item{lambda}{A user supplied lambda sequence. Typical usage is to have the program compute its own lambda sequence based on \code{nlambda} and \code{lambda.min.ratio}. See \code{?glmnet::glmnet()}.}
25
+\item{lambda}{Numeric. Indicates the user supplied lambda sequence. Typical usage is to have the program compute its own lambda sequence based on \code{nlambda} and \code{lambda.min.ratio}. See \code{?glmnet::glmnet()}.}
26 26
 
27
-\item{labels}{Logical indicating if feature names should be plotted in coefficient plot or not. Default is FALSE.}
27
+\item{labels}{Logical. Indicates if feature names should be plotted in coefficient plot or not. Default is FALSE.}
28 28
 }
29 29
 \value{
30
-A list with all results including plots, tables and the resulting prediction model.
30
+A \code{list} with results.
31 31
 }
32 32
 \description{
33
-PomaLasso() is an implementation of the lasso, ridge and elasticnet regression from \code{glmnet} package for binary outcomes.
33
+\code{PomaLasso} performs LASSO, Ridge, and Elasticnet regression for feature selection and prediction purposes for binary outcomes.
34 34
 }
35 35
 \examples{
36 36
 data("st000336")
... ...
@@ -39,21 +39,18 @@ data("st000336")
39 39
 st000336 \%>\%
40 40
   PomaImpute() \%>\%
41 41
   PomaNorm() \%>\%
42
-  PomaOutliers() \%>\%
43 42
   PomaLasso()
44 43
 
45 44
 # elasticnet
46 45
 st000336 \%>\%
47 46
   PomaImpute() \%>\%
48 47
   PomaNorm() \%>\%
49
-  PomaOutliers() \%>\%
50 48
   PomaLasso(alpha = 0.5)
51 49
 
52 50
 # ridge
53 51
 st000336 \%>\%
54 52
   PomaImpute() \%>\%
55 53
   PomaNorm() \%>\%
56
-  PomaOutliers() \%>\%
57 54
   PomaLasso(alpha = 0)
58 55
 }
59 56
 \references{
... ...
@@ -13,7 +13,7 @@ PomaOddsRatio(
13 13
 )
14 14
 }
15 15
 \arguments{
16
-\item{data}{A SummarizedExperiment object.}
16
+\item{data}{A \code{SummarizedExperiment} object.}
17 17
 
18 18
 \item{feature_name}{A vector with the name/s of feature/s that will be used to fit the model. If it's NULL (default), all variables will be included in the model.}
19 19