#' Database knowledge for wppi #' #' Retrieves the database knowledge necessary for WPPI directly from the #' databases. The databases used here are the Human Phenotype Ontology (HPO, #' \url{https://hpo.jax.org/app/}), Gene Ontology (GO, #' \url{http://geneontology.org/}) and OmniPath #' (\url{https://omnipathdb.org/}). The downloads carried out by #' the OmnipathR package and data required by wppi are extracted from each #' table. #' #' @param GO_slim Character: use a GO subset (slim). If \code{NULL}, the #' full GO is used. The most often used slim is called "generic". For #' a list of available slims see \code{OmnipathR::go_annot_slim}. #' @param GO_aspects Character vector with the single letter codes of the #' gene ontology aspects to use. By default all three aspects are used. #' The aspects are "C": cellular component, "F": molecular function and #' "P" biological process. #' @param GO_organism Character: name of the organism for GO annotations. #' @param ... Passed to #' \code{OmnipathR::import_post_translational_interactions}. With these #' options you can customize the network retrieved from OmniPath. #' #' @return A list of data frames (tibbles) with database knowledge from HPO, #' GO and OmniPath. #' #' @details #' If you use a GO subset (slim), building it at the first time might take #' around 20 minutes. The result is saved into the cache so next time loading #' the data from there is really quick. #' Gene Ontology annotations are available for a few other organisms apart #' from human. The currently supported organisms are "chicken", "cow", "dog", #' "human", "pig" and "uniprot_all". If you disable \code{HPO_annot} you can #' use \code{wppi} to score PPI networks other than human. #' #' @examples #' # Download all data #' data_wppi <- wppi_data() #' # OmniPath #' omnipath_data <- data_wppi$omnipath #' # HPO #' HPO_data <- data_wppi$hpo #' # GO #' GO_data <- data_wppi$go #' #' @importFrom logger log_info #' @export #' @seealso \itemize{ #' \item{\code{\link{wppi_go_data}}} #' \item{\code{\link{wppi_hpo_data}}} #' \item{\code{\link{wppi_omnipath_data}}} #' } wppi_data <- function( GO_slim = NULL, GO_aspects = c('C', 'F', 'P'), GO_organism = 'human', ... ){ log_info('Collecting database knowledge.') ### Collect database data hpo <- wppi_hpo_data() go <- wppi_go_data(GO_slim, GO_aspects, GO_organism) omnipath <- wppi_omnipath_data(...) log_info('Finished collecting database knowledge.') list( hpo = hpo, go = go, omnipath = omnipath ) } #' Retrieves data from Human Phenotype Ontology (HPO) #' #' Human Phenotype Ontology (\url{https://hpo.jax.org/app/}), HPO) annotates #' proteins with phenotypes and diseases. #' #' @return A data frame (tibble) with HPO data. #' #' @examples #' hpo <- wppi_hpo_data() #' #' @importFrom magrittr %>% #' @importFrom dplyr select distinct #' @importFrom OmnipathR hpo_download #' @export #' @seealso \code{\link{wppi_data}} wppi_hpo_data <- function(){ # NSE vs. R CMD check workaround entrez_gene_id <- entrez_gene_symbol <- hpo_term_id <- hpo_term_name <- NULL OmnipathR::hpo_download() %>% select( Gene_ID = entrez_gene_id, Gene_Symbol = entrez_gene_symbol, ID = hpo_term_id, Name = hpo_term_name ) %>% distinct() } #' Retrieves data from Gene Ontology (GO) #' #' Gene Ontology (\url{http://geneontology.org/}), GO) annotates genes #' by their function, localization and biological processes. #' #' @param slim Character: use a GO subset (slim). If \code{NULL}, the #' full GO is used. The most often used slim is called "generic". For #' a list of available slims see \code{OmnipathR::go_annot_slim}. #' @param aspects Character vector with the single letter codes of the #' gene ontology aspects to use. By default all three aspects are used. #' The aspects are "C": cellular component, "F": molecular function and #' "P" biological process. #' @param organism Character: name of the organism for GO annotations. #' #' @return A data frame (tibble) with GO annotation data. #' #' @details #' If you use a GO subset (slim), building it at the first time might take #' around 20 minutes. The result is saved into the cache so next time loading #' the data from there is really quick. #' Gene Ontology annotations are available for a few other organisms apart #' from human. The currently supported organisms are "chicken", "cow", "dog", #' "human", "pig" and "uniprot_all". If you disable \code{HPO_annot} you can #' use \code{wppi} to score PPI networks other than human. #' #' @examples #' go <- wppi_go_data() #' #' @importFrom OmnipathR go_annot_download #' @importFrom magrittr %>% #' @importFrom dplyr select #' @export #' @seealso \code{\link{wppi_data}} wppi_go_data <- function( slim = NULL, aspects = c('C', 'F', 'P'), organism = 'human' ){ # NSE vs. R CMD check workaround db_object_symbol <- go_id <- aspect <- NULL OmnipathR::go_annot_download( slim = slim, aspects = aspects, organism = organism ) %>% select( Gene_Symbol = db_object_symbol, ID = go_id, Aspect = aspect ) } #' Protein-protein interaction data from OmniPath #' #' OmniPath (\url{https://omnipathdb.org/}) integrates protein-protein #' interactions (PPI) from more than 30 resources. The network created is #' highly customizable by passing parameters #' to \code{OmnipathR::import_post_translational_interactions}. #' #' @param ... Passed to #' \code{OmnipathR::import_post_translational_interactions}. #' #' @return A data frame (tibble) with protein-protein interaction data from #' OmniPath. #' #' @examples #' omnipath <- wppi_omnipath_data() #' #' @importFrom OmnipathR import_post_translational_interactions #' @importFrom RCurl merge.list #' @importFrom magrittr %>% #' @importFrom rlang exec !!! #' @importFrom dplyr select #' @export #' @seealso \code{\link{wppi_data}} wppi_omnipath_data <- function(...){ # OmniPath omnipath_param <- list(...) %>% merge.list(list(entity_type = 'protein')) OmnipathR::import_post_translational_interactions %>% exec(!!!omnipath_param) %>% select(seq(10)) }