#' Pick power to fit network to a scale-free topology
#'
#' @param exp A gene expression data frame with genes in row names and
#' samples in column names or a `SummarizedExperiment` object.
#' @param net_type Network type. One of 'signed', 'signed hybrid' or
#' 'unsigned'. Default is signed.
#' @param rsquared R squared cutoff. Default is 0.8.
#' @param cor_method Correlation method. One of "pearson", "biweight"
#' or "spearman". Default is "spearman".
#'
#' @return A list containing: \itemize{
#'   \item{power}{Optimal power based on scale-free topology fit}
#'   \item{plot}{A ggplot object displaying main statistics of the SFT fit test}
#' }
#'
#' @author Fabricio Almeida-Silva
#' @seealso
#'  \code{\link[WGCNA]{pickSoftThreshold}}
#' @rdname SFT_fit
#' @export
#' @importFrom WGCNA pickSoftThreshold
#' @importFrom ggpubr ggarrange ggscatter
#' @importFrom ggplot2 theme element_text geom_hline
#' @examples
#' data(filt.se)
#' sft <- SFT_fit(filt.se, cor_method="pearson")
SFT_fit <- function(exp, net_type="signed", rsquared=0.8, cor_method="spearman") {
    exp <- handleSE(exp)
    texp <- t(exp)

    if(cor_method == "pearson") {
        sft <- WGCNA::pickSoftThreshold(texp, networkType = net_type, powerVector=3:20, RsquaredCut = rsquared)
    } else if(cor_method == "biweight") {
        sft <- WGCNA::pickSoftThreshold(texp, networkType = net_type, powerVector=3:20,
                                        RsquaredCut = rsquared, corFnc = bicor, corOptions = list(use = 'p', maxPOutliers = 0.05))
    } else if (cor_method == "spearman"){
        sft <- WGCNA::pickSoftThreshold(texp, networkType = net_type, powerVector=3:20,
                                        RsquaredCut = rsquared, corOptions = list(use = 'p', method = "spearman"))
    } else {
        stop("Please, specify a correlation method (one of 'spearman', 'pearson' or 'biweight').")
    }
    wgcna_power <- sft$powerEstimate
    if(is.na(wgcna_power)){
        message("No power reached R-squared cut-off, now choosing max R-squared based power")
        wgcna_power <- sft$fitIndices$Power[which(sft$fitIndices$SFT.R.sq == max(sft$fitIndices$SFT.R.sq))]
    }

    # Create data frame with indices to plot
    sft_df <- data.frame(power = sft$fitIndices$Power,
                         fit = -sign(sft$fitIndices$slope) * sft$fitIndices$SFT.R.sq,
                         meank = sft$fitIndices$mean.k.)
    # Plot 1
    p1 <- ggpubr::ggscatter(sft_df, x="power", y="fit",
                            xlab="Soft threshold (power)",
                            ylab=expression(paste("Scale-free topology fit - ", R^{2})),
                            title = "Scale independence",
                            ylim=c(0, 1), label="power", size=1,
                            font.label=10,
                            color="gray10",
                            font.tickslab=10, ytickslab.rt=90) +
        geom_hline(yintercept = rsquared, color="brown3") +
        theme(plot.title = element_text(hjust = 0.5))

    # Plot 2
    p2 <- ggscatter(sft_df, x="power", y="meank",
                    xlab="Soft threshold (power)", ylab="Mean connectivity (k)",
                    title="Mean connectivity",
                    label="power", size=1, font.label=10, color="gray10",
                    font.tickslab=10, ytickslab.rt=90) +
        theme(plot.title = element_text(hjust=0.5))

    # Combined plot
    sft_plot <- ggpubr::ggarrange(p1, p2)
    result <- list(power=as.numeric(wgcna_power), plot=sft_plot)
    return(result)
}


#' Reconstruct gene coexpression network from gene expression
#'
#' @param exp A gene expression data frame with genes in row names and
#' samples in column names or a `SummarizedExperiment` object.
#' @param net_type Network type. One of 'signed', 'signed hybrid' or
#' 'unsigned'. Default: 'signed'.
#' @param module_merging_threshold Correlation threshold to merge similar
#' modules into a single one. Default: 0.8.
#' @param SFTpower SFT power generated by the function \code{SFT_fit}.
#' @param cor_method Correlation method. One of "pearson", "biweight" or
#' "spearman". Default is "spearman".
#' @param verbose Logical indicating whether to display progress
#' messages or not. Default: FALSE.
#'
#' @return List containing: \itemize{
#'   \item Adjacency matrix
#'   \item Data frame of module eigengenes
#'   \item Data frame of genes and their corresponding modules
#'   \item Data frame of intramodular connectivity
#'   \item Vector of module assignment
#'   \item Correlation matrix
#'   \item Parameters used for network reconstruction
#'   \item Objects to plot the dendrogram in \code{plot_dendro_and_colors}.
#' }
#' @author Fabricio Almeida-Silva
#' @seealso
#'  \code{\link[WGCNA]{adjacency.fromSimilarity}},\code{\link[WGCNA]{TOMsimilarity}},\code{\link[WGCNA]{standardColors}},\code{\link[WGCNA]{labels2colors}},\code{\link[WGCNA]{moduleEigengenes}},\code{\link[WGCNA]{plotEigengeneNetworks}},\code{\link[WGCNA]{mergeCloseModules}},\code{\link[WGCNA]{plotDendroAndColors}},\code{\link[WGCNA]{intramodularConnectivity}}
#'  \code{\link[dynamicTreeCut]{cutreeDynamicTree}}
#' @rdname exp2gcn
#' @export
#' @importFrom WGCNA TOMsimilarity standardColors labels2colors moduleEigengenes plotEigengeneNetworks mergeCloseModules plotDendroAndColors intramodularConnectivity
#' @importFrom dynamicTreeCut cutreeDynamicTree
#' @importFrom stats as.dist median cor fisher.test hclust na.omit prcomp qnorm qqplot quantile sd var
#' @importFrom grDevices colorRampPalette
#' @examples
#' data(filt.se)
#' # The SFT fit was previously calculated and the optimal power was 16
#' gcn <- exp2gcn(filt.se, SFTpower = 18, cor_method = "pearson")
exp2gcn <- function(exp, net_type="signed",
                    module_merging_threshold = 0.8,
                    SFTpower = NULL, cor_method = "spearman",
                    verbose = FALSE) {

    params <- list(net_type=net_type,
                   module_merging_threshold=module_merging_threshold,
                   SFTpower=SFTpower,
                   cor_method=cor_method)
    norm.exp <- handleSE(exp)

    if(is.null(SFTpower)) { stop("Please, specify the SFT power.") }

    if(verbose) { message("Calculating adjacency matrix...") }
    cor_matrix <- calculate_cor_adj(cor_method, norm.exp, SFTpower, net_type)$cor
    adj_matrix <- calculate_cor_adj(cor_method, norm.exp, SFTpower, net_type)$adj

    #Convert to matrix
    gene_ids <- rownames(adj_matrix)
    adj_matrix <- matrix(adj_matrix, nrow=nrow(adj_matrix))
    rownames(adj_matrix) <- gene_ids
    colnames(adj_matrix) <- gene_ids

    #Calculate TOM from adjacency matrix
    if(verbose) { message("Calculating topological overlap matrix (TOM)...") }
    tomtype <- get_TOMtype(net_type)
    TOM <- WGCNA::TOMsimilarity(adj_matrix, TOMType = tomtype)

    #Hierarchically cluster genes
    dissTOM <- 1-TOM #hclust takes a distance structure
    geneTree <- hclust(as.dist(dissTOM), method="average")

    #Detecting coexpression modules
    if(verbose) { message("Detecting coexpression modules...") }
    old.module_labels <- dynamicTreeCut::cutreeDynamicTree(dendro=geneTree,
                                                           minModuleSize=30,
                                                           deepSplit=TRUE)

    nmod <- length(unique(old.module_labels))
    palette <- rev(WGCNA::standardColors(nmod))
    old.module_colors <- WGCNA::labels2colors(old.module_labels, colorSeq = palette)

    #Calculate eigengenes and merge the ones who are highly correlated
    if(verbose) { message("Calculating module eigengenes (MEs)...") }
    old.MElist <- WGCNA::moduleEigengenes(t(norm.exp),
                                          colors = old.module_colors,
                                          softPower = SFTpower)
    old.MEs <- old.MElist$eigengenes

    #Calculate dissimilarity of module eigengenes
    MEDiss1 <- 1-cor(old.MEs)

    #Hierarchically cluster module eigengenes to see how they're related
    old.METree <- hclust(as.dist(MEDiss1), method="average")

    #Then, choose a height cut
    MEDissThreshold <- 1-module_merging_threshold

    #Merge the modules.
    if(verbose) { message("Merging similar modules...") }
    if(cor_method == "pearson") {
        merge1 <- WGCNA::mergeCloseModules(t(norm.exp), old.module_colors,
                                           cutHeight = MEDissThreshold,
                                           verbose = 0, colorSeq=palette)
    } else if(cor_method == "spearman") {
        merge1 <- WGCNA::mergeCloseModules(t(norm.exp), old.module_colors,
                                           cutHeight = MEDissThreshold,
                                           verbose = 0,
                                           corOptions = list(use = "p", method = "spearman"),
                                           colorSeq=palette)
    } else if(cor_method == "biweight") {
        merge1 <- WGCNA::mergeCloseModules(t(norm.exp), old.module_colors,
                                           cutHeight = MEDissThreshold,
                                           verbose = 0,
                                           corFnc = bicor, colorSeq=palette)
    } else {
        stop("Please, specify a correlation method. One of 'spearman', 'pearson' or 'biweight'.")
    }
    new.module_colors <- merge1$colors
    new.MEs <- merge1$newMEs

    #Plot dendrogram of merged modules eigengenes
    new.METree <- hclust(as.dist(1-cor(new.MEs)), method="average")

    #Get data frame with genes and modules they belong to
    genes_and_modules <- as.data.frame(cbind(gene_ids, new.module_colors),
                                       stringsAsFactors = FALSE)
    colnames(genes_and_modules) <- c("Genes", "Modules")

    if(verbose) { message("Calculating intramodular connectivity...") }
    kwithin <- WGCNA::intramodularConnectivity(adj_matrix, new.module_colors)

    result.list <- list(adjacency_matrix = adj_matrix,
                        MEs = new.MEs,
                        genes_and_modules = genes_and_modules,
                        kIN = kwithin,
                        moduleColors = new.module_colors,
                        correlation_matrix = cor_matrix,
                        params=params,
                        dendro_plot_objects = list(tree = geneTree,
                                                   unmerged = old.module_colors)
                        )
    return(result.list)
}

#' Plot eigengene network
#'
#' @param gcn List object returned by \code{exp2gcn}.
#'
#' @return A base plot with the eigengene network
#' @importFrom WGCNA plotEigengeneNetworks
#' @importFrom graphics par layout
#' @export
#' @rdname plot_eigengene_network
#' @examples
#' data(filt.se)
#' gcn <- exp2gcn(filt.se, SFTpower = 18, cor_method = "pearson")
#' plot_eigengene_network(gcn)
plot_eigengene_network <- function(gcn) {
    on.exit(graphics::layout(1))
    opar <- par(no.readonly=TRUE)
    on.exit(par(opar), add=TRUE, after=FALSE)
    p <- WGCNA::plotEigengeneNetworks(gcn$MEs, "",
                                      marDendro = c(3,5,2,6),
                                      marHeatmap = c(3,4,2,2))
    return(p)
}

#' Plot dendrogram of genes and modules
#'
#' @param gcn List object returned by \code{exp2gcn}.
#'
#' @return A base plot with the gene dendrogram and modules.
#' @importFrom WGCNA plotDendroAndColors
#' @importFrom graphics layout par
#' @export
#' @rdname plot_dendro_and_colors
#' @examples
#' data(filt.se)
#' gcn <- exp2gcn(filt.se, SFTpower = 18, cor_method = "pearson")
#' plot_dendro_and_colors(gcn)
plot_dendro_and_colors <- function(gcn) {
    on.exit(graphics::layout(1))
    opar <- par(no.readonly=TRUE)
    on.exit(par(opar), add=TRUE, after=FALSE)
    p <- WGCNA::plotDendroAndColors(gcn$dendro_plot_objects$tree,
                               cbind(
                                   gcn$dendro_plot_objects$unmerged,
                                   gcn$moduleColors
                               ),
                               c("Unmerged", "Merged"),
                               dendroLabels = FALSE, hang = 0.03,
                               addGuide = TRUE, guideHang = 0.05)
    return(NULL)
}

#' Perform module stability analysis
#'
#' @param exp A gene expression data frame with genes in row names and
#' samples in column names or a `SummarizedExperiment` object.
#' @param net List object returned by \code{exp2gcn}.
#' @param nRuns Number of times to resample. Default is 20.
#'
#' @return A base plot with the module stability results.
#' @seealso
#'  \code{\link[WGCNA]{sampledBlockwiseModules}},\code{\link[WGCNA]{matchLabels}},\code{\link[WGCNA]{plotDendroAndColors}}
#' @rdname module_stability
#' @export
#' @importFrom WGCNA sampledBlockwiseModules matchLabels plotDendroAndColors
#' @importFrom graphics par layout
#' @examples
#' data(filt.se)
#' # The SFT fit was previously calculated and the optimal power was 16
#' gcn <- exp2gcn(filt.se, SFTpower = 18, cor_method = "pearson")
#' # For simplicity, only 2 runs
#' module_stability(exp = filt.se, net = gcn, nRuns = 2)
module_stability <- function(exp, net, nRuns = 20) {
    norm.exp <- handleSE(exp)
    expr <- as.matrix(t(norm.exp))
    net_type <- net$params$net_type
    SFTpower <- net$params$SFTpower
    cor_method <- net$params$cor_method
    module_merging_threshold <- 1 - net$params$module_merging_threshold
    TOMType <- get_TOMtype(net_type)

    mods0 <- WGCNA::sampledBlockwiseModules(
        nRuns = nRuns, replace = FALSE, datExpr = expr,
        maxBlockSize = 5000, checkSoftPower = FALSE, corType = cor_method,
        networkType = net_type, TOMType = TOMType, TOMDenom = "mean",
        mergeCutHeight = module_merging_threshold,
        reassignThreshold = 0, numericLabels = FALSE,
        checkMissingData = FALSE, quickCor = 0, verbose = 2
        )
    nGenes <- ncol(expr)

    # Define a matrix of labels for the original and all resampling runs
    labels <- matrix(0, nGenes, nRuns + 1)
    labels[, 1] <- mods0[[1]]$mods$colors

    # Relabel modules in each of the resampling runs
    labs <- Reduce(cbind, lapply(2:(nRuns+1), function(x) {
        labels[, x] <- WGCNA::matchLabels(mods0[[x-1]]$mods$colors, labels[, 1])
    }))
    labels <- cbind(labels[,1], labs)

    on.exit(graphics::layout(1))
    opar <- par(no.readonly=TRUE)
    on.exit(par(opar), add=TRUE, after=FALSE)
    p <- WGCNA::plotDendroAndColors(mods0[[1]]$mods$dendrograms[[1]],
                               labels,
                               c("Full data set", paste("Resampling",
                                                        seq_len(nRuns))),
                               main = "Dendrogram and modules: resampled data",
                               autoColorHeight = FALSE, colorHeight = 0.65,
                               dendroLabels = FALSE, hang = 0.03,
                               guideHang = 0.05,
                               addGuide = TRUE, guideAll = FALSE,
                               cex.main = 1, cex.lab = 0.8,
                               cex.colorLabels = 0.7,
                               marAll = c(0, 5, 3, 0))
    return(NULL)
}

#' Correlate module eigengenes to trait
#'
#' @param exp A gene expression data frame with genes in row names and
#' samples in column names or a `SummarizedExperiment` object.
#' @param metadata A data frame containing sample names in row names and
#' sample annotation in the first column. Ignored if `exp` is
#' a `SummarizedExperiment` object, since the function will extract colData.
#' @param MEs Module eigengenes. It is the 2nd element of the result list
#' generated by the function \code{exp2gcn}.
#' @param cor_method Method to calculate correlation. One of 'pearson',
#' 'spearman' or 'kendall'. Default is 'spearman'.
#' @param transpose Logical indicating whether to transpose the heatmap of not.
#' Default is FALSE.
#' @param palette RColorBrewer's color palette to use. Default is "RdYlBu",
#' a palette ranging from blue to red.
#' @param continuous_trait Logical indicating if trait is a continuous variable.
#' Default is FALSE.
#' @param cex.lab.x Font size for x axis labels. Default: 0.6.
#' @param cex.lab.y Font size for y axis labels. Default: 0.6.
#' @param cex.text Font size for numbers inside matrix. Default: 0.6.
#'
#' @return A data frame with correlation and correlation p-values for each pair
#' of ME and trait along with a heatmap.
#' @details Significance levels:
#' 1 asterisk: significant at alpha = 0.05.
#' 2 asterisks: significant at alpha = 0.01.
#' 3 asterisks: significant at alpha = 0.001.
#' no asterisk: not significant.
#'
#' @author Fabricio Almeida-Silva
#' @seealso
#'  \code{\link[WGCNA]{corPvalueStudent}},\code{\link[WGCNA]{labeledHeatmap}},\code{\link[WGCNA]{blueWhiteRed}}
#' @rdname module_trait_cor
#' @export
#' @importFrom WGCNA corPvalueStudent labeledHeatmap
#' @importFrom reshape2 melt
#' @importFrom RColorBrewer brewer.pal
#' @importFrom SummarizedExperiment colData
#' @importFrom graphics par
#' @examples
#' data(filt.se)
#' gcn <- exp2gcn(filt.se, SFTpower = 18, cor_method = "pearson")
#' module_trait_cor(filt.se, MEs=gcn$MEs)
module_trait_cor <- function(exp, metadata, MEs, cor_method="spearman",
                             transpose=FALSE, palette="RdYlBu",
                             continuous_trait=FALSE,
                             cex.lab.x=0.6, cex.lab.y=0.6,
                             cex.text=0.6) {
    if(is(exp, "SummarizedExperiment")) {
        metadata <- as.data.frame(SummarizedExperiment::colData(exp))
    }
    exp <- handleSE(exp)
    metadata <- metadata[colnames(exp), , drop=FALSE]
    trait <- handle_trait_type(metadata, continuous_trait)

    modtraitcor <- cor(as.matrix(MEs), trait, use = "p", method=cor_method)
    nSamples <- ncol(exp)
    modtraitpvalue <- WGCNA::corPvalueStudent(modtraitcor, nSamples)
    modtraitsymbol <- pval2symbol(modtraitpvalue)

    modtrait_long <- reshape2::melt(modtraitcor)
    pvals_long <- reshape2::melt(modtraitpvalue)
    combined_long <- merge(modtrait_long, pvals_long, by=c("Var1", "Var2"))
    colnames(combined_long) <- c("ME", "trait", "cor", "pvalue")
    combined_long$ME <- as.character(combined_long$ME)
    combined_long$trait <- as.character(combined_long$trait)

    textMatrix <- paste(signif(modtraitcor, 2), modtraitsymbol, sep = "")
    dim(textMatrix) <- dim(modtraitcor)

    if(transpose) {
        modtraitcor <- t(modtraitcor)
        textMatrix <- t(textMatrix)
        yLabels <- colnames(trait)
        xLabels <- names(MEs)
        xSymbols <- names(MEs)
        ySymbols <- NULL
        xColorLabels <- TRUE
        par(mar = c(5, 5, 1, 1))
    } else {
        par(mar = c(6, 8.5, 3, 3))
        yLabels <- names(MEs)
        xLabels <- colnames(trait)
        xColorLabels <- FALSE
        xSymbols <- NULL
        ySymbols <- names(MEs)
    }
    cols <- colorRampPalette(rev(RColorBrewer::brewer.pal(10, palette)))(100)
    hm <- WGCNA::labeledHeatmap(Matrix = modtraitcor,
                                yLabels = yLabels, xLabels = xLabels,
                                ySymbols = ySymbols, xSymbols = xSymbols,
                                colorLabels=FALSE,
                                colors = cols,
                                textMatrix = textMatrix, setStdMargins = FALSE,
                                cex.text = cex.text,
                                cex.lab.x = cex.lab.x, cex.lab.y = cex.lab.y,
                                zlim = c(-1,1), cex.main=1,
                                main = paste("Module-trait relationships"))
    return(combined_long)
}

#' Calculate gene significance for a given group of genes
#'
#' @param exp A gene expression data frame with genes in row names and
#' samples in column names or a `SummarizedExperiment` object.
#' @param metadata A data frame containing sample names in row names and
#' sample annotation in the first column. Ignored if `exp` is
#' a `SummarizedExperiment` object, since the function will extract colData.
#' @param genes Character vector of genes to be correlated with traits.
#' If not given, all genes in `exp` will be considered.
#' @param alpha Significance level. Default is 0.05.
#' @param min_cor Minimum correlation coefficient. Default is 0.2.
#' @param use_abs Logical indicating whether to filter by correlation using
#' absolute value or not. If TRUE, a \code{min_cor} of say 0.2 would keep all
#' correlations above 0.2 and below -0.2. Default is TRUE.
#' @param palette RColorBrewer's color palette to use. Default is "RdYlBu",
#' a palette ranging from blue to red.
#' @param show_rownames Logical indicating whether to show row names or not.
#' Default is FALSE.
#' @param continuous_trait Logical indicating if trait is a continuous variable.
#' Default is FALSE.
#'
#' @return A heatmap of gene significance (GS) and a list containing: \itemize{
#'   \item{filtered_corandp}{Filtered matrix of correlation and p-values}
#'   \item{raw_GS}{Raw matrix of gene significances}
#' }
#' @seealso
#'  \code{\link[WGCNA]{corPvalueStudent}}
#'  \code{\link[RColorBrewer]{RColorBrewer}}
#' @rdname gene_significance
#' @export
#' @importFrom reshape2 melt
#' @importFrom WGCNA corPvalueStudent
#' @importFrom ComplexHeatmap pheatmap
#' @importFrom RColorBrewer brewer.pal
#' @importFrom SummarizedExperiment colData
#' @examples
#' data(filt.se)
#' gs <- gene_significance(filt.se)
gene_significance <- function(exp, metadata, genes=NULL,
                              alpha = 0.05, min_cor = 0.2,
                              use_abs = TRUE,
                              palette="RdYlBu", show_rownames=FALSE,
                              continuous_trait=FALSE) {
    if(is(exp, "SummarizedExperiment")) {
        metadata <- as.data.frame(SummarizedExperiment::colData(exp))
    }
    exp <- handleSE(exp)
    final_exp <- exp[, rownames(metadata)]
    if(!is.null(genes)) {
        final_exp <- final_exp[genes, , drop=FALSE]
    }
    trait <- handle_trait_type(metadata, continuous_trait)
    GS <- cor(as.matrix(t(final_exp)), trait, use = "p")

    # Filter by correlation coefficient and p-value
    melt.cor <- reshape2::melt(GS)

    nSamples <- ncol(final_exp)
    GS.pvalue <- WGCNA::corPvalueStudent(GS, nSamples)
    melt.pvalue <- reshape2::melt(GS.pvalue)

    corandp <- merge(melt.cor, melt.pvalue, by = c("Var1", "Var2"))
    corandp$Var1 <- as.character(corandp$Var1)
    corandp$Var2 <- as.character(corandp$Var2)

    colnames(corandp) <- c("gene", "trait", "cor", "pvalue")

    if(use_abs) {
        corandp <- corandp[corandp$pvalue < alpha & abs(corandp$cor) > min_cor, ]
    } else {
        corandp <- corandp[corandp$pvalue < alpha & corandp$cor > min_cor, ]
    }
    cols <- colorRampPalette(rev(RColorBrewer::brewer.pal(10, palette)))(100)
    p <- ComplexHeatmap::pheatmap(as.matrix(GS), border_color = NA,
                             color=cols,
                             show_rownames=show_rownames,
                             main="Gene-trait correlations")

    resultlist <- list(filtered_corandp = corandp, raw_GS = GS)
    return(resultlist)
}


#' Get GCN hubs
#'
#' @param exp A gene expression data frame with genes in row names and
#' samples in column names or a `SummarizedExperiment` object.
#' @param net List object returned by \code{exp2gcn}.
#'
#' @return Data frame containing gene IDs, modules and
#' intramodular connectivity of all hubs.
#' @author Fabricio Almeida-Silva
#' @seealso
#'  \code{\link[WGCNA]{signedKME}}
#' @rdname get_hubs_gcn
#' @export
#' @importFrom WGCNA signedKME
#' @examples
#' data(filt.se)
#' gcn <- exp2gcn(filt.se, SFTpower = 18, cor_method = "pearson")
#' hubs <- get_hubs_gcn(filt.se, gcn)
get_hubs_gcn <- function(exp, net) {
    exp <- handleSE(exp)
    cor_method <- net$params$cor_method
    genes_modules <- net$genes_and_modules
    MEs <- net$MEs
    kIN <- net$kIN[, 2, drop=FALSE]

    # Add kIN info
    genes_modulesk <- merge(genes_modules, kIN, by.x="Genes", by.y="row.names")

    # Calculate kME
    if(cor_method == "spearman") {
        MM <- WGCNA::signedKME(t(exp), MEs, outputColumnName="",
                               corOptions = "use = 'p', method = 'spearman'")
    } else if(cor_method == "pearson") {
        MM <- WGCNA::signedKME(t(exp), MEs, outputColumnName="")
    } else if(cor_method == "biweight") {
        MM <- WGCNA::signedKME(t(exp), MEs, corFcn="bicor", outputColumnName="",
                               corOptions = "maxPOutliers = 0.1")
    } else {
        stop("Invalid correlation method. Pick one of 'spearman' or 'pearson'.")
    }

    # Add kME info
    kme_info <- merge(genes_modulesk, MM, by.x="Genes", by.y="row.names")

    # Keep top 10% genes with the highest degree for each module
    genes_mod_list <- split(kme_info, kme_info[,2])
    genes_mod_list$grey <- NULL
    top_10 <- lapply(genes_mod_list, function(x) {
        indices <- round(nrow(x) * 0.1)
        return(x[order(x$kWithin, decreasing=TRUE), ][seq_len(indices), ])
    })

    # Pick genes from the top 10% degree with kME above 0.8
    hubs <- Reduce(rbind, lapply(top_10, function(x) {
        cols <- c("Genes", "Modules", "kWithin", unique(x$Modules))
        h <- x[, cols]
        h <- h[h[,4] > 0.8, c(1,2,3)]
    }))
    rownames(hubs) <- seq_len(nrow(hubs))
    colnames(hubs) <- c("Gene", "Module", "kWithin")
    return(hubs)
}


#' Helper function to perform Fisher's Exact Test with parallel computing
#'
#' @param genes Character vector containing genes on which enrichment will be tested.
#' @param reference Character vector containing genes to be used as background.
#' @param genesets List of functional annotation categories.
#' (e.g., GO, pathway, etc.) with their associated genes.
#' @param adj Multiple testing correction method.
#' @return Results of Fisher's Exact Test in a data frame with TermID,
#' number of associated genes, number of genes in reference set,
#' P-value and adjusted P-value.
#' @importFrom stats p.adjust fisher.test
#' @importFrom BiocParallel bplapply
#' @noRd
par_enrich <- function(genes, reference, genesets, adj = "BH") {
    reference <- reference[!reference %in% genes]

    tab <- BiocParallel::bplapply(seq_along(genesets), function(i) {

        RinSet <- sum(reference %in% genesets[[i]])
        RninSet <- length(reference) - RinSet
        GinSet <- sum(genes %in% genesets[[i]])
        GninSet <- length(genes) - GinSet
        fmat <- matrix(c(GinSet, RinSet, GninSet, RninSet), nrow = 2,
                       ncol = 2, byrow = FALSE)
        colnames(fmat) <- c("inSet", "ninSet")
        rownames(fmat) <- c("genes", "reference")
        fish <- stats::fisher.test(fmat, alternative = "greater")
        pval <- fish$p.value
        inSet <- RinSet + GinSet
        res <- c(GinSet, inSet, pval)
        return(res)
    })
    rtab <- do.call(rbind, tab)
    rtab <- data.frame(as.vector(names(genesets)), rtab)
    rtab <- rtab[order(rtab[, 4]), ]
    colnames(rtab) <- c("TermID", "genes", "all", "pval")
    padj <- stats::p.adjust(rtab$pval, method = adj)
    tab.out <- data.frame(rtab, padj)

    return(tab.out)
}


#' Perform enrichment analysis for a set of genes
#'
#' @param genes Character vector containing genes for overrepresentation analysis.
#' @param background_genes Character vector of genes to be used as background
#' for the Fisher's Exact Test.
#' @param annotation Annotation data frame with genes in the first column and
#' functional annotation in the other columns. This data frame can be exported
#' from Biomart or similar databases.
#' @param column Column or columns of \code{annotation} to be used for enrichment.
#' Both character or numeric values with column indices can be used.
#' If users want to supply more than one column, input a character or
#' numeric vector. Default: all columns from \code{annotation}.
#' @param correction Multiple testing correction method. One of "holm",
#' "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr" or "none".
#' Default is "BH".
#' @param p P-value threshold. P-values below this threshold will be
#' considered significant. Default is 0.05.
#' @param bp_param BiocParallel back-end to be used.
#' Default: BiocParallel::SerialParam()
#' @return Data frame containing significant terms, p-values and associated genes.
#' @author Fabricio Almeida-Silva
#' @rdname enrichment_analysis
#' @export
#' @importFrom BiocParallel bplapply
#' @examples
#' \donttest{
#' data(filt.se)
#' data(zma.interpro)
#' genes <- rownames(filt.se)[1:50]
#' background_genes <- rownames(filt.se)
#' annotation <- zma.interpro
#' # Using p = 1 to show all results
#' enrich <- enrichment_analysis(genes, background_genes, annotation, p = 1)
#' }
enrichment_analysis <- function(genes, background_genes, annotation,
                                column = NULL,
                                correction = "BH", p = 0.05,
                                bp_param = BiocParallel::SerialParam()) {

    ## Get a dataframe of expressed genes and their annotations
    gene_column <- colnames(annotation)[1]

    # Handle missing values
    annotation[is.na(annotation)] <- "Unannotated"

    if(is.null(column)) { #all columns?
        gene_annotation <- annotation
    } else {
        if(is.numeric(column)) { # Input is column indices
            gene_annotation <- annotation[, c(1, column)]
        } else { #Input is column names
            gene_annotation <- annotation[, c(paste(gene_column), column)]
        }
    }

    background <- gene_annotation[gene_annotation[,1] %in% background_genes, ]

    if(ncol(background) == 2) { #only one annotation category
        # Named list of expressed genes and their annotations
        annotation_list <- split(background[,1], background[,2])

        # Perform the enrichment analysis
        enrich.table <- par_enrich(genes, background_genes, annotation_list, adj = correction)
        signif_enrich <- as.data.frame(enrich.table[enrich.table$padj <= p, ],
                                       stringsAsFactors = FALSE)

        if(nrow(signif_enrich) > 0) {
            signif_terms_genes <- annotation_list[as.character(signif_enrich[,1])]
            signif_terms_genes <- lapply(signif_terms_genes, function(x) unique(x[x %in% genes]))

            # Save final result with a column containing gene IDs of associated genes
            signif_enrich$GeneID <- unlist(lapply(signif_terms_genes,
                                                  paste, collapse = ","))
            df_signif_enrich <- signif_enrich
        } else {
            df_signif_enrich <- NULL
        }

    } else { # more than 1 annotation category
        annotation_list <- lapply(seq_along(background)[-1], function(x) {
            return(split(background[,1], background[,x]))
        })

        # Remove unannotated genes
        annotation_list_final <- lapply(annotation_list, function(x) {
            return(x[names(x) != "Unannotated"])
        })

        # Perform the enrichment analysis
        signif_enrich <- BiocParallel::bplapply(annotation_list_final, function(x) {
            enrich.table <- par_enrich(genes, background_genes, x, adj=correction)
            sig_enrich <- as.data.frame(enrich.table[enrich.table$padj < p, ],
                                        stringsAsFactors=FALSE)
            return(sig_enrich)
        }, BPPARAM = bp_param)

        # Remove empty data frames from list
        signif_enrich <- signif_enrich[vapply(signif_enrich, nrow, numeric(1)) > 0]

        if(length(signif_enrich) > 0) {

            # Create a data frame containing annotations and the annotation class
            annot_correspondence <- Reduce(rbind, lapply(seq_along(background)[-1], function(x) {
                annot_correspondence <- data.frame(TermID = background[,x],
                                                   Category = names(background)[x],
                                                   stringsAsFactors = FALSE)
                annot_correspondence <- annot_correspondence[!duplicated(annot_correspondence[,c(1,2)]),]
                return(annot_correspondence)
            }))

            # Add column containing the annotation class
            list_signif_enrich <- lapply(signif_enrich, function(x) {
                merge(x, annot_correspondence)
            })

            # Reduce list of data frames to a single data frame
            df_signif_enrich <- Reduce(rbind, list_signif_enrich)

            # Get genes associated to each term
            annotation_list_concat <- unlist(annotation_list, recursive = FALSE) # create list from list of lists
            signif_terms_genes <- annotation_list_concat[as.character(df_signif_enrich[,1])]
            signif_terms_genes <- lapply(signif_terms_genes, function(x) unique(x[x %in% genes]))

            # Get final table with enriched terms and a column containing the associated genes
            df_signif_enrich$GeneID <- unlist(lapply(signif_terms_genes,
                                                     paste, collapse = ","))

        } else {
            df_signif_enrich <- NULL
        }
    }
    return(df_signif_enrich)
}


#' Perform enrichment analysis for coexpression network modules
#'
#' @param net List object returned by \code{exp2gcn}.
#' @param background_genes Character vector of genes to be used as background
#' for the Fisher's Exact Test.
#' @param annotation Annotation data frame with genes in the first column
#' and functional annotation in the other columns. This data frame can
#' be exported from Biomart or similar databases.
#' @param column Column or columns of \code{annotation} to be used for enrichment.
#' Both character or numeric values with column indices can be used.
#' If users want to supply more than one column, input a character or
#' numeric vector. Default: all columns from \code{annotation}.
#' @param correction Multiple testing correction method. One of "holm",
#' "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr" or "none".
#' Default is "BH".
#' @param p P-value threshold. P-values below this threshold will be considered
#' significant. Default is 0.05.
#' @param bp_param BiocParallel back-end to be used.
#' Default: BiocParallel::SerialParam()
#' @return A data frame containing enriched terms, p-values, gene IDs
#' and module names.
#' @author Fabricio Almeida-Silva
#' @rdname module_enrichment
#' @importFrom BiocParallel bplapply SerialParam
#' @export
#' @examples
#' \donttest{
#' data(filt.se)
#' data(zma.interpro)
#' background <- rownames(filt.se)
#' gcn <- exp2gcn(filt.se, SFTpower = 18, cor_method = "pearson")
#' mod_enrich <- module_enrichment(gcn, background, zma.interpro, p=1)
#' }
module_enrichment <- function(net=NULL, background_genes, annotation, column = NULL,
                              correction = "BH", p = 0.05,
                              bp_param = BiocParallel::SerialParam()) {

    # Divide modules in different data frames of a list
    genes.modules <- net$genes_and_modules
    list.gmodules <- split(genes.modules, genes.modules$Modules)
    list.gmodules <- list.gmodules[names(list.gmodules) != "grey"]

    enrichment_allmodules <- BiocParallel::bplapply(seq_along(list.gmodules), function(x) {
        message("Enrichment analysis for module ",
                    names(list.gmodules)[x], "...")
        l <- enrichment_analysis(genes = as.character(list.gmodules[[x]][,1]),
                                 background_genes = background_genes,
                                 annotation = annotation,
                                 column = column,
                                 correction = correction, p = p)
        return(l)
    }, BPPARAM = bp_param)
    names(enrichment_allmodules) <- names(list.gmodules)

    # Remove NULL elements from list
    enrichment_filtered <- enrichment_allmodules[!vapply(enrichment_allmodules,
                                                         is.null, logical(1))]

    if(length(enrichment_filtered) == 0) {
        message("None of the modules had significant enrichment.")
        enrichment_final <- NULL
    } else {
        # Add module name to each data frame
        enrichment_modnames <- lapply(seq_along(enrichment_filtered), function(x) {
            return(cbind(enrichment_filtered[[x]],
                         Module=names(enrichment_filtered)[x]))
        })

        # Reduce list of data frames to a single data frame
        enrichment_final <- Reduce(rbind, enrichment_modnames)
    }

    return(enrichment_final)
}


#' Get 1st-order neighbors of a given gene or group of genes
#'
#' @param genes Character vector containing genes from which
#' direct neighbors will be extracted.
#' @param net List object returned by \code{exp2gcn}.
#' @param cor_threshold Correlation threshold to filter connections.
#' As a weighted network is a fully connected graph, a cutoff must be selected.
#' Default is 0.7.
#'
#' @return List containing 1st-order neighbors for each input gene.
#' @seealso \code{exp2gcn} \code{SFT_fit}
#' @author Fabricio Almeida-Silva
#' @export
#' @rdname get_neighbors
#' @examples
#' data(filt.se)
#' genes <- rownames(filt.se)[1:10]
#' gcn <- exp2gcn(filt.se, SFTpower = 18, cor_method = "pearson")
#' neighbors <- get_neighbors(genes, gcn)
get_neighbors <- function(genes, net, cor_threshold = 0.7) {

    net_type <- net$params$net_type
    power <- net$params$SFTpower
    edges <- net$correlation_matrix

    edges <- cormat_to_edgelist(edges)
    colnames(edges) <- c("Gene1", "Gene2", "Weight")

    filt_edges <- edges[edges$Gene1 %in% genes | edges$Gene2 %in% genes, ]
    if(net_type == "signed") {
        filt_edges <- filt_edges[abs(filt_edges$Weight) >= cor_threshold, ]
    } else {
        filt_edges <- filt_edges[filt_edges$Weight >= cor_threshold, ]
    }

    result_list <- lapply(genes, function(x) {
        y <- filt_edges[rowSums(filt_edges == x) > 0, ]
        y <- c(as.character(y$Gene1), as.character(y$Gene2))
        y <- unique(y[y != x])
        return(y)
    })
    names(result_list) <- genes
    return(result_list)
}


#' Get edge list from an adjacency matrix for a group of genes
#'
#' @param net List object returned by \code{exp2gcn}.
#' @param genes Character vector containing a subset of genes from which
#' edges will be extracted. It can be ignored if the user wants to extract
#' an edge list for a given module instead of individual genes.
#' @param module Character with module name from which edges will be extracted.
#' To include 2 or more modules, input the names in a character vector.
#' @param filter Logical indicating whether to filter the edge list or not.
#' @param method Method to filter spurious correlations. One of "Zscore",
#' "optimalSFT", "pvalue" or "min_cor". See details for more information
#' on the methods. Default: 'optimalSFT'
#' @param r_optimal_test Numeric vector with the correlation thresholds
#' to be tested for optimal scale-free topology fit. Only valid
#' if \code{method} equals "optimalSFT". Default: seq(0.4, 0.9, by = 0.1)
#' @param Zcutoff Minimum Z-score threshold. Only valid if
#'  \code{method} equals "Zscore". Default: 1.96
#' @param pvalue_cutoff Maximum P-value threshold. Only valid
#' if \code{method} equals "pvalue". Default: 0.05
#' @param rcutoff Minimum correlation threshold. Only valid
#' if \code{method} equals "min_cor". Default: 0.7
#' @param nSamples Number of samples in the data set from which
#' the correlation matrix was calculated. Only required
#' if \code{method} equals "pvalue".
#' @param check_SFT Logical indicating whether to test if the resulting network
#' is close to a scale-free topology or not. Default: FALSE.
#' @param bp_param BiocParallel back-end to be used.
#' Default: BiocParallel::SerialParam()
#'
#' @return Data frame with edge list for the input genes.
#' @details The default method ("optimalSFT") will create several different
#' edge lists by filtering the original correlation matrix by the thresholds
#' specified in \code{r_optimal_test}. Then, it will calculate a scale-free
#' topology fit index for each of the possible networks and return the network
#' that best fits the scale-free topology.
#' The method "Zscore" will apply a Fisher Z-transformation for the correlation
#' coefficients and remove the Z-scores below the threshold specified
#' in \code{Zcutoff}.
#' The method "pvalue" will calculate Student asymptotic p-value for the
#' correlations and remove correlations whose p-values are above the threshold
#' specified in \code{pvalue_cutoff}.
#' The method "min_cor" will remove correlations below the minimum correlation
#' threshold specified in \code{rcutoff}.
#' @seealso
#'  \code{\link[WGCNA]{scaleFreeFitIndex}},\code{\link[WGCNA]{corPvalueStudent}}
#'  \code{\link[igraph]{fit_power_law}}
#' @seealso \code{SFT_fit}
#' @seealso \code{exp2gcn}.
#' @author Fabricio Almeida-Silva
#' @rdname get_edge_list
#' @export
#' @importFrom WGCNA scaleFreeFitIndex corPvalueStudent
#' @importFrom ggpubr ggline
#' @importFrom ggplot2 theme element_text
#' @importFrom BiocParallel bplapply SerialParam
#' @examples
#' data(filt.se)
#' gcn <- exp2gcn(filt.se, SFTpower = 18, cor_method = "pearson")
#' genes <- rownames(filt.se)[1:50]
#' edges <- get_edge_list(gcn, genes=genes, filter = FALSE)
get_edge_list <- function(net, genes = NULL, module = NULL,
                          filter = FALSE, method = "optimalSFT",
                          r_optimal_test = seq(0.4, 0.9, by=0.1),
                          Zcutoff = 1.96, pvalue_cutoff = 0.05, rcutoff = 0.7,
                          nSamples = NULL,
                          check_SFT = FALSE,
                          bp_param = BiocParallel::SerialParam()) {

    # Define objects containing correlation matrix and data frame of genes and modules
    cor_matrix <- net$correlation_matrix

    # Should we extract genes in a module?
    if(!is.null(module)) {
        genes_modules <- net$genes_and_modules
        keep <- genes_modules[genes_modules$Modules %in% module, 1]
        cor_matrix <- cor_matrix[keep, keep]
    }

    # Should we extract a user-defined gene set?
    if(!is.null(genes)) {
        cor_matrix <- cor_matrix[genes, genes]
    }

    # Should we filter the matrix?
    if(filter) {
        # Create edge list from correlation matrix
        edges <- cormat_to_edgelist(cor_matrix)
        colnames(edges) <- c("Gene1", "Gene2", "Weight")

        if(method == "Zscore") {
            # Apply Fisher-Z transformation to correlation values
            edgesZ <- edges
            edgesZ$Weight <- 0.5 * log((1+edges$Weight) / (1-edges$Weight))

            edgelist <- edgesZ[edgesZ$Weight >= Zcutoff, ]
        } else if(method == "optimalSFT") {
            cutoff <- r_optimal_test

            # Create list of 2 elements: edge lists, each with a different correlation threshold, and degree
            list_mat <- replicate(length(cutoff), cor_matrix, simplify = FALSE)

            list_cormat_filtered <- BiocParallel::bplapply(seq_along(list_mat), function(x) {
                # Convert r values below threshold to NA and set diagonals to 0
                matrix <- list_mat[[x]]
                matrix[matrix < cutoff[x] ] <- NA
                diag(matrix) <- 0

                # Calculate degree
                degree <- rowSums(matrix, na.rm=TRUE)

                # Convert lower triangle and diagonals to NA
                matrix[lower.tri(matrix, diag=TRUE)] <- NA

                # Convert symmetrical matrix to edge list (Gene1, Gene2, Weight)
                matrix <- na.omit(data.frame(as.table(matrix), stringsAsFactors=FALSE))
                result <- list(matrix=matrix, degree=degree)
                return(result)
            }, BPPARAM = bp_param)

            degree_list <- lapply(list_cormat_filtered, function(x) return(x[[2]]))

            # Calculate scale-free topology
            sft.rsquared <- unlist(lapply(degree_list, function(x) WGCNA::scaleFreeFitIndex(x)$Rsquared.SFT))
            max.index <- which.max(sft.rsquared)

            # Plot scale-free topology fit for r values
            plot.data <- data.frame(x=cutoff, y=sft.rsquared, stringsAsFactors = FALSE)
            plot <- ggpubr::ggline(plot.data, x = "x", y = "y", size=2,
                                   color="firebrick",
                                   xlab = "Correlation (r) values",
                                   ylab = expression(paste("Scale-free topology fit - ", R^{2})),
                                   title = "Scale-free topology fit for given r values", font.title = c(13, "bold")) +
                ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5))
            print(plot)
            optimalr <- cutoff[max.index]
            message("The correlation threshold that best fits the scale-free topology is ", optimalr)
            edgelist <- list_cormat_filtered[[max.index]][[1]]
        } else if(method == "pvalue") {
            if(is.null(nSamples)) {
                stop("Please, specify the number of samples used to calculate the correlation matrix")
            }

            # Calculate Student asymptotic p-value for given correlations
            cor.pvalue <- WGCNA::corPvalueStudent(edges$Weight, nSamples)

            # Create a data frame of correlations and p-values
            corandp <- edges
            corandp$pvalue <- cor.pvalue

            # Create a final edge list containing only significant correlations
            edgelist <- corandp[corandp$pvalue < pvalue_cutoff, c(1,2,3)]

        } else if(method == "min_cor") {
            edgelist <- edges[edges$Weight >= rcutoff, ]

        } else{
            stop("Please, specify a valid filtering method.")
        }

    } else {
        # Create edge list from correlation matrix without filtering
        edgelist <- cormat_to_edgelist(cor_matrix)
        colnames(edgelist) <- c("Gene1", "Gene2", "Weight")
    }

    # Check scale-free topology fit
    if(check_SFT) {
        test <- check_SFT(edgelist, net_type="gcn")
    }

    return(edgelist)
}