#' Merge SE objects into single SE object.
#' 
#' @param x a \code{\link{SummarizedExperiment}} object or a list of 
#' \code{\link{SummarizedExperiment}} objects.
#' 
#' @param y a \code{\link{SummarizedExperiment}} object when \code{x} is a
#' \code{\link{SummarizedExperiment}} object. Disabled when \code{x} is a list.
#' 
#' @param assay_name A single character value for selecting the
#' \code{\link[SummarizedExperiment:SummarizedExperiment-class]{assay}}
#' to be merged. (By default: \code{assay_name = "counts"})
#' 
#' @param join A single character value for selecting the joining method.
#' Must be 'full', 'inner', 'left', or 'right'. 'left' and 'right' are disabled
#' when more than two objects are being merged.  (By default: \code{join = "full"})
#' 
#' @param missing_values NA, 0, or a single character values specifying the notation
#' of missing values. (By default: \code{missing_values = NA})
#' 
#' @param collapse_samples A boolean value for selecting whether to collapse identically
#' named samples to one. (By default: \code{collapse_samples = FALSE})
#' 
#' @param verbose A single boolean value to choose whether to show messages. 
#' (By default: \code{verbose = TRUE})
#'
#' @param ... optional arguments (not used).
#'
#' @return A single \code{SummarizedExperiment} object.
#'
#' @details
#' This function merges multiple \code{SummarizedExperiment} objects. It combines
#' \code{rowData}, \code{assays}, and \code{colData} so that the output includes
#' each unique row and column ones. The merging is done based on \code{rownames} and
#' \code{colnames}. \code{rowTree} and \code{colTree} are preserved if linkage
#' between rows/cols and the tree is found.
#' 
#' Equally named rows are interpreted as equal. Further
#' matching based on \code{rowData} is not done. For samples, collapsing 
#' is disabled by default meaning that equally named samples that are stored 
#' in different objects are interpreted as unique. Collapsing can be enabled 
#' with \code{collapse_samples = TRUE} when equally named samples describe the same
#' sample. 
#' 
#' If, for example, all rows are not shared with
#' individual objects, there are missing values in \code{assays}. The notation of missing
#' can be specified with the \code{missing_values} argument. If input consists of
#' \code{TreeSummarizedExperiment} objects, also \code{rowTree}, \code{colTree}, and
#' \code{referenceSeq} are preserved if possible. The data is preserved if 
#' all the rows or columns can be found from it.
#' 
#' Compared to \code{cbind} and \code{rbind} \code{mergeSEs} 
#' allows more freely merging since \code{cbind} and \code{rbind} expect 
#' that rows and columns are matching, respectively.
#' 
#' You can choose joining methods from \code{'full'}, \code{'inner'},
#'  \code{'left'}, and  \code{'right'}. In all the methods, all the samples are 
#'  included in the result object. However, with different methods, it is possible 
#'  to choose which rows are included.
#' 
#' \itemize{
#'   \item{\code{full} -- all unique features}
#'   \item{\code{inner} -- all shared features}
#'   \item{\code{left} -- all the features of the first object}
#'   \item{\code{right} -- all the features of the second object}
#' }
#' 
#' You can also doe e.g., a full join by using a function \code{full_join} which is 
#' an alias for \code{mergeSEs}. Also other joining methods have dplyr-like aliases.
#' 
#' The output depends on the input. If the input contains \code{SummarizedExperiment}
#' object, then the output will be \code{SummarizedExperiment}. When all the input
#' objects belong to \code{TreeSummarizedExperiment}, the output will be 
#' \code{TreeSummarizedExperiment}.
#'
#' @seealso
#' \itemize{
#'   \item{\code{TreeSummarizedExperiment::cbind}}
#'   \item{\code{TreeSummarizedExperiment::rbind}}
#'   \item{\code{\link[dplyr:full_join]{full_join}}}
#'   \item{\code{\link[dplyr:inner_join]{inner_join}}}
#'   \item{\code{\link[dplyr:left_join]{left_join}}}
#'   \item{\code{\link[dplyr:right_join]{right_join}}}
#' }
#'
#' @name mergeSEs
#' @export
#'
#' @author Leo Lahti and Tuomas Borman. Contact: \url{microbiome.github.io}
#' 
#' @examples
#' data(GlobalPatterns)
#' data(esophagus)
#' data(enterotype)
#' 
#' # Take only subsets so that it wont take so long
#' tse1 <- GlobalPatterns[1:100, ]
#' tse2 <- esophagus
#' tse3 <- enterotype[1:100, ]
#' 
#' # Merge two TreeSEs
#' tse <- mergeSEs(tse1, tse2)
#' 
#' # Merge a list of TreeSEs
#' list <- SimpleList(tse1, tse2, tse3)
#' tse <- mergeSEs(list, assay_name = "counts", missing_values = 0)
#' tse
#' 
#' # With 'join', it is possible to specify the merging method. Subsets are used
#' # here just to show the functionality
#' tse_temp <- mergeSEs(tse[1:10, 1:10], tse[5:100, 11:20], join = "left")
#' tse_temp
#' 
#' # You can also do a left_join by using alias "left_join"
#' tse_temp <- left_join(tse[1:10, 1:10], tse[5:100, 11:20])
#' 
#' # If your objects contain samples that describe one and same sample,
#' # you can collapse equally named samples to one by specifying 'collapse_samples'
#' tse_temp <- inner_join(list(tse[1:10, 1], tse[1:20, 1], tse[1:5, 1]), 
#'                        collapse_samples = TRUE)
#' tse_temp
#' 
NULL

################################### Generic ####################################

#' @rdname mergeSEs
#' @export
setGeneric("mergeSEs", signature = c("x"),
        function(x, ... )
            standardGeneric("mergeSEs"))

###################### Function for SimpleList of TreeSEs ######################

#' @rdname mergeSEs
#' @export
setMethod("mergeSEs", signature = c(x = "SimpleList"),
        function(x, assay_name = "counts", join = "full", 
                 missing_values = NA, collapse_samples = FALSE, verbose = TRUE, 
                 ... ){
            ################## Input check ##################
            # Check the objects 
            class <- .check_objects_and_give_class(x)
            # Can the assay_name the found form all the objects
            assay_name_bool <- .assays_cannot_be_found(assay_name = assay_name, x)
            if( any(assay_name_bool) ){
                stop("'assay_name' must specify an assay from assays. 'assay_name' ",
                     "cannot be found at least in one SE object.",
                     call. = FALSE)
            }
            # Check join
            if( !(.is_a_string(join) &&
                join %in% c("full", "inner", "left", "right") ) ){
                stop("'join' must be 'full', 'inner', 'left', or 'right'.",
                     call. = FALSE)
            }
            # Check if join is not available
            if( length(x) > 2 &&
                join %in% c("left", "right") ){
                stop("Joining method 'left' and 'right' are not available ",
                     "when more than two objects are being merged.",
                     call. = FALSE)
            }
            # Is missing_values one of the allowed ones
            missing_values_bool <- length(missing_values) == 1L &&
                (is.numeric(missing_values) && missing_values == 0) ||
                .is_a_string(missing_values) || is.na(missing_values)
            # If not then give error
            if(  !missing_values_bool ){
                stop("'missing_values' must be 0, NA, or a single character value.",
                     call. = FALSE)
            }
            # Check collapse_samples
            if( !.is_a_bool(collapse_samples) ){
                stop("'collapse_samples' must be TRUE or FALSE.",
                     call. = FALSE)
            }
            # Check verbose
            if( !.is_a_bool(verbose) ){
                stop("'verbose' must be TRUE or FALSE.",
                     call. = FALSE)
            }
            ################ Input check end ################
            # Give message if TRUE
            if( verbose ){
                message("Merging with ", join, " join...")
                message("1/", length(x), appendLF = FALSE)
            }
            # Merge objects
            tse <- .merge_SEs(x, class, join, assay_name, 
                             missing_values, collapse_samples, verbose)
            return(tse)
        }
)

########################### Function for two TreeSEs ###########################

#' @rdname mergeSEs
#' @export
setMethod("mergeSEs", signature = c(x = "SummarizedExperiment"),
        function(x, y = NULL, ...){
            ################## Input check ##################
            # Check y
            if( !(is(y, "SummarizedExperiment")) ){
                stop("'y' must be a 'SummarizedExperiment' object.",
                     call. = FALSE)
            } 
            ################ Input check end ################
            # Create a list based on TreeSEs
            list <- SimpleList(x, y)
            # Call the function for list
            mergeSEs(list, ...)
        }
)

########################### Function for list TreeSEs ##########################

#' @rdname mergeSEs
#' @export
setMethod("mergeSEs", signature = c(x = "list"),
          function(x, ...){
              # Convert into a list
              x <- SimpleList(x)
              # Call the function for list
              mergeSEs(x, ...)
          }
)

################################# full_join ####################################

#' @rdname mergeSEs
#' @export
setGeneric("full_join", signature = c("x"),
    function(x, ...)
        standardGeneric("full_join"))

#' @rdname mergeSEs
#' @export
setMethod("full_join", signature = c(x = "ANY"),
    function(x, ...){
        mergeSEs(x, join = "full", ...)
    }
)

################################# inner_join ###################################

#' @rdname mergeSEs
#' @export
setGeneric("inner_join", signature = c("x"),
    function(x, ...)
        standardGeneric("inner_join"))

#' @rdname mergeSEs
#' @export
setMethod("inner_join", signature = c(x = "ANY"),
    function(x, ...){
        mergeSEs(x, join = "inner", ...)
    }
)

################################# left_join ####################################

#' @rdname mergeSEs
#' @export
setGeneric("left_join", signature = c("x"),
    function(x, ...)
        standardGeneric("left_join"))

#' @rdname mergeSEs
#' @export
setMethod("left_join", signature = c(x = "ANY"),
    function(x, ...){
        mergeSEs(x, join = "left", ...)
    }
)

################################# right_join ###################################

#' @rdname mergeSEs
#' @export
setGeneric("right_join", signature = c("x"),
    function(x, ...)
        standardGeneric("right_join"))

#' @rdname mergeSEs
#' @export
setMethod("right_join", signature = c(x = "ANY"),
    function(x, ...){
        mergeSEs(x, join = "right", ...)
    }
)

################################ HELP FUNCTIONS ################################

################################## .merge_SEs ##################################
# This function merges SE objects into one SE

# Input: A list of SEs
# Output: SE

#' @importFrom SingleCellExperiment SingleCellExperiment
.merge_SEs <- function(x, class, join, assay_name, 
                      missing_values, collapse_samples, verbose){
    # Take first element and remove it from the list
    tse <- x[[1]]
    x[[1]] <- NULL
    
    # Initialize a list for TreeSE-specific slots
    tse_args <- list(
        rowTrees = NULL,
        colTrees = NULL,
        refSeqs = NULL
    )
    # If the class is TreeSE, get TreeSE-specific slots
    if( class == "TreeSummarizedExperiment" ){
        tse_args <- .get_TreeSE_args(tse, tse_args)
    }
    
    # Get the data in a list
    args <- .get_SummarizedExperiment_data(tse = tse, assay_name = assay_name)
    
    # Get the function based on class
    FUN_constructor <- switch(class,
                              TreeSummarizedExperiment = TreeSummarizedExperiment,
                              SingleCellExperiment = SingleCellExperiment,
                              SummarizedExperiment = SummarizedExperiment
    )
    # Create an object
    tse <- do.call(FUN_constructor, args = args)
    
    # Lopp through individual TreeSEs and add them to tse
    if( length(x) > 0 ){
        for( i in 1:length(x) ){
            # Give message if TRUE
            if( verbose ){
                message("\r", i+1, "/", length(x)+1, appendLF = FALSE)
            }
            
            # Get the ith object
            temp <- x[[i]]

            # Modify names if specified
            if( !collapse_samples ){
                temp <- .get_unique_sample_names(tse, temp, i+1)
            }
            # Merge data
            args <- .merge_SummarizedExperiments(
                tse1 = tse,
                tse2 = temp,
                join = join,
                assay_name = assay_name,
                missing_values = missing_values
                )
            # Create an object
            tse <- do.call(FUN_constructor, args = args)
            
            # If class is TreeSE, get trees and links, and reference sequences
            if( class == "TreeSummarizedExperiment" ){
                tse_args <- .get_TreeSE_args(temp, tse_args)
            }
        }
    }
    # Add new line to, so that possible warning or  message has new line
    if( verbose ){
        message("")
    }
    # Get the data
    rowTrees <- tse_args$rowTrees
    colTrees <- tse_args$colTrees
    refSeqs <- tse_args$refSeqs
    # If data includes rowTrees, add them
    if( !is.null(rowTrees) ){
        tse <- .check_and_add_trees(tse, rowTrees, "row", verbose)
    }
    # If data includes colTrees, add them
    if( !is.null(colTrees) ){
        tse <- .check_and_add_trees(tse, colTrees, "col", verbose)
    }
    # If data includes reference sequences, add them
    if( !is.null(refSeqs) ){
        tse <- .check_and_add_refSeqs(tse, refSeqs, verbose)
    }
    return(tse)
}

############################ .check_and_add_refSeqs ############################
# This function check if reference sequences can be added, and adds them if it
# is possible

# Input: reference sequences and TreeSE
# Output: TreeSE
.check_and_add_refSeqs <- function(tse, refSeqs, verbose){
    # Give message if wanted
    if( verbose ){
        message("Adding referenceSeqs...")
    }
    
    # Get the rownames that are included in reference sequences
    rows_that_have_seqs <- lapply(refSeqs, FUN = function(x){
        names(x[[1]])
    })
    rows_that_have_seqs <- unlist(rows_that_have_seqs)
    # Check that all the rownames are included
    if( !all(rownames(tse) %in% rows_that_have_seqs) || is.null(rownames(tse)) ){
        warning("referenceSeqs do not match with the data so they are discarded.",
                call. = FALSE)
        return(tse)
    }
    # Get the maximum number of DNA sets that individual TreeSE had / max number of 
    # sets that individual rownames set had.
    max_numrow <- max(lengths(refSeqs))
    
    # Initialize a list
    result_list <- list()
    # Loop from 1 to max number of DNA sets
    for(i in seq_len(max_numrow) ){
        # Loop over DNA set list. Each element is found from unique TreeSE
        temp_seqs <- lapply(refSeqs, FUN = function(x){
            # If the ith element cannot be found, give the last
            if( i > length(x) ){
                return(x[[length(x)]])
            } else{
                # Otherwise give the ith element
                return(x[[i]])
            }
        })
        # Combine the list that includes DNA sets from unique TreeSEs.
        temp_seqs <- do.call(c, temp_seqs)
        # Get only those taxa that are included in TreeSE
        temp_seqs <- temp_seqs[ match(rownames(tse), names(temp_seqs)), ]
        # Add combined ssequences into a list
        result_list <- c(result_list, temp_seqs)
    }
    # Create a DNAStrinSetList if there are more than one element
    if(length(result_list) > 1){
        result <- do.call(DNAStringSetList, result_list)
    } else{
        # Otherwise, give the only DNA set as it is
        result <- result_list[[1]]
    }
    # Add it to the correct slot
    referenceSeq(tse) <- result
    return(tse)
}

############################# .check_and_add_trees #############################
# This function check if tree can be added, and adds it if it can

# Input: tree data and TreeSE
# Output: TreeSE
.check_and_add_trees <- function(tse, trees_and_links, MARGIN, verbose){
    # Give a message if verbose is specified
    if( verbose ){
        message("Adding ", MARGIN, "Tree(s)...")
    }
    # Get trees
    trees <- trees_and_links$trees
    # Get links
    links <- trees_and_links$links
    # Based on margin, get rownames or colnames of the TreeSE object; to check
    # if the data matches with trees
    if(MARGIN == "row"){
        names <- rownames(tse)
    } else{
        names <- colnames(tse)
    }
    # All rownames/colnames should be included in trees/links
    if( !all(names %in% links[["names"]]) || is.null(names) ){
        warning(MARGIN, "Tree(s) does not match with the data so it is discarded.",
                call. = FALSE)
        return(tse)
    }
    
    # If there are multiple trees, select non-duplicated trees, best fitting 
    # combination of trees. Get minimum number of trees that represent the data
    # based on link data.
    if( length(trees) > 1 ){
        # From the links, for each tree, get row/cols that are linked with tree 
        tree_labs <- split(links[["nodeLab"]], f = links$whichTree)
        
        # Loop thorugh tree labs, check which trees include which node labs
        result <- lapply(tree_labs, FUN = function(x){
            c( links[["nodeLab"]] %in% x )
        })
        # Create a data.frame
        result <- as.data.frame(result)
        
        # Loop from 1 to number of trees
        for( i in seq_len(ncol(result)) ){
            # Create all possible combinations from trees, each combination has i trees. 
            combinations <- combn(result, i, simplify = FALSE)
            # Does this combination have all the node labels (rows or columns) 
            res <- lapply(combinations, FUN = function(x){
                all( rowSums(x) > 0 )
            })
            # Unlist the list of boolean values
            res <- unlist(res)
            # If combination that includes all the rows/cols was found
            if( any(res) ){
                # Take the first combination that have all the rows/cols
                combinations <- combinations[[which(res)[[1]]]]
                # Take the names of trees
                tree_names <- colnames(combinations)
                # Break so that for loop is not continued anymore
                break
            }
        }
        # Get the trees that are included in the final combination
        trees <- trees[tree_names]
        # Subset result by taking only those trees that are included in final object
        result <- result[ , tree_names, drop = FALSE]
        # In which tree this node label is found (each row represent each node label)
        whichTree <- apply(result, 1, FUN = function(x){
            names(result)[x == TRUE][[1]]
            }
        )
        whichTree <- unlist(whichTree)
        # Update links
        links[["whichTree"]] <- whichTree
        # Remove duplicates
        links <- links[ !duplicated(links[["names"]]), ]
        # Ensure that links are in correct order
        links <- links[ match(names, links[["names"]]), ]
    }
    
    # Create a LinkDataFrame based on the link data
    links <- LinkDataFrame(
        nodeLab = links[["nodeLab"]],
        nodeNum = links[["nodeNum"]],
        nodeLab_alias = links[["nodeLab_alias"]],
        isLeaf = links[["isLeaf"]],
        whichTree = links[["whichTree"]]
    )
    # Add the data in correct slot based on MARGIN
    if(MARGIN == "row" ){
        tse@rowTree <- trees
        tse@rowLinks <- links
    } else{
        tse@colTree <- trees
        tse@colLinks <- links
    }
    return(tse)
}

############################### .get_TreeSE_args ###############################
# This function fetches TreeSummarizedExperiment specific data: rowTree, colTree,
# and referenceSeq

# Input: TreeSE and argument list
# Output: An argument list
.get_TreeSE_args <- function(tse, tse_args){
    # If rowTree slot is not NULL
    if( !is.null(tse@rowTree) ){
        # Get trees that will be added
        trees_add <- tse@rowTree
        # Get rowLinks, convert them to basic DataFrame, 
        # so that additional column can be added
        links <- DataFrame(rowLinks(tse))
        # Add rownames as one of the columns
        links$names <- rownames(tse)
        
        # If there is no data yet / if rowTree arguments are NULL
        if( is.null(tse_args$rowTrees) ){
            # Get the tree data as a list. Tree is as a list, and links as DF
            rowTrees <- list(
                trees = trees_add,
                links = links
            )
            # Replace NULL with tree data
            tse_args$rowTrees <- rowTrees
        } else{
            # If tree data already exist
            # How many trees there already are
            tree_num_before <- length(tse_args$rowTrees$tree)
            # Get unique names
            unique_names <- make.unique( 
                names( c(tse_args$rowTrees$tree, trees_add) )
            )
            # Update the names of current data
            names(tse_args$rowTrees$tree) <- unique_names[ tree_num_before ]
            # Get unique names of trees that will be added
            unique_names_add <- unique_names[ -seq_len(tree_num_before) ]
            # Get corresponding current names
            names_add <- names(trees_add)
            # Update tree names from links
            links[ , "whichTree" ] <- 
                unique_names_add[ match( links[ , "whichTree" ], names_add ) ]
            # Update tree names
            names(trees_add) <- unique_names_add
            # Add data to a list
            tse_args$rowTrees <- list( 
                trees = c(tse_args$rowTrees$trees, trees_add),
                links = rbind(tse_args$rowTrees$links, links)
                )
        }
    }
    # If colTree slot is not NULL
    if( !is.null(tse@colTree) ){
        # Get trees that will be added
        trees_add <- tse@rowTree
        # Get colLinks, convert them to basic DataFrame, 
        # so that additional column can be added
        links <- DataFrame(colLinks(tse))
        # Add colnames as one of the columns
        links$names <- colnames(tse)
        
        # If there is no data yet / if colTree arguments are NULL
        if( is.null(tse_args$colTrees) ){
            # Get the tree data as a list. Tree is as a list, and links as DF
            colTrees <- list(
                trees = trees_add,
                links = links
            )
            # Replace NULL with tree data
            tse_args$colTrees <- colTrees
        } else{
            # If tree data already exist
            # How many trees there already are
            tree_num_before <- length(tse_args$colTrees$tree)
            # Get unique names
            unique_names <- make.unique( 
                names( c(tse_args$colTrees$tree, trees_add) )
            )
            # Update the names of current data
            names(tse_args$colTrees$tree) <- unique_names[ tree_num_before ]
            # Get unique names of trees that will be added
            unique_names_add <- unique_names[ -seq_len(tree_num_before) ]
            # Get corresponding current names
            names_add <- names(trees_add)
            # Update tree names from links
            links[ , "whichTree" ] <- 
                unique_names_add[ match( links[ , "whichTree" ], names_add ) ]
            # Update tree names
            names(trees_add) <- unique_names_add
            # Add data to a list
            tse_args$rowTrees <- list( 
                trees = c(tse_args$colTrees$trees, trees_add),
                links = rbind(tse_args$colTrees$links, links)
            )
        }
    }
    # If reference sequences exist
    if( !is.null(referenceSeq(tse)) ){
        # Get the data
        refSeq <- referenceSeq(tse)
        # Check if it is a individual set
        if( is(refSeq, "DNAStringSet") ){
            # Convert individual set to a list, so that all refseqs are in same 
            # format
            refSeq <- DNAStringSetList(refSeq)
        }
        # Add data to a list
        refSeqs <- list(
            refSeq
        )
        # If there is no data yet, replace the NULL
        if( is.null(tse_args$refSeqs) ){
            tse_args$refSeqs <- refSeqs
        } else{
            # otherwise add data to a list
            tse_args$refSeqs <- c( tse_args$refSeqs, refSeqs ) 
        }
    }
    return(tse_args)
}

######################## .get_SummarizedExperiment_data ########################
# This function gets the desired data from one SE object and creates a list of 
# arguments containing the data
# Arguments of SCE and TreeSE are also fetched with this function. TreeSE-specific
# slots are collected with different function so that they are merged at the end.

# Input: SE
# Output: A list of arguments
.get_SummarizedExperiment_data <- function(tse, assay_name){
    # Remove all information but rowData, colData, metadata and assay
    row_data <- rowData(tse)
    col_data <- colData(tse)
    assay <- assay(tse, assay_name)
    assays <- SimpleList(name = assay)
    names(assays) <- assay_name
    metadata <- metadata(tse)
    # Create a list of arguments
    args <- list(assays = assays,
                rowData = row_data,
                colData = col_data,
                metadata = metadata
    )
    return(args)
    
}

######################## .check_objects_and_give_class #########################
# This function checks that the object are in correct format

# Input: a list of objects
# Output: A shared class of objects
.check_objects_and_give_class <- function(x){
    # Allowed classes
    allowed_classes <- c("TreeSummarizedExperiment", "SingleCellExperiment", "SummarizedExperiment")
    
    # Get the class based on hierarchy TreeSE --> SCE --> SE
    if( all( unlist( lapply(x, is, class2 = allowed_classes[[1]]) ) ) ){
        class <- allowed_classes[1]
    } else if( all( unlist( lapply(x, is, class2 = allowed_classes[[2]]) ) ) ){
        class <- allowed_classes[2]
    } else if( all( unlist( lapply(x, is, class2 = allowed_classes[[3]]) ) ) ){
        class <- allowed_classes[3]
    # If there is an object that does not belong to these classes give an error
    } else{
        stop("Input includes an object that is not 'SummarizedExperiment'.",
             call. = FALSE)
    }
    # If there are multiple classes, give a warning
    if( length(unique( unlist(lapply(x, function(y){ class(y)})) )) > 1 ){
        warning("The Input consist of multiple classes. ",
                "The output is '", class, "'.",
                call. = FALSE)
    }
    # Check that there are no object with no dimensions
    if( any(unlist(lapply(x, FUN = function(y){ nrow(y) == 0 || ncol(y) == 0}))) ){
        stop("Input includes an object that has either no columns or/and no rows.",
             call. = FALSE)
    }
    # Check if there are no colnames
    if( any(unlist( lapply(x, FUN = function(y){is.null(colnames(y))}) )) ){
        stop("Input includes object(s) whose colnames is NULL. Please add ",
             "colnames.",
             call. = FALSE)
    }
    # Check if there are no rownames
    if( any(unlist( lapply(x, FUN = function(y){is.null(rownames(y))}) )) ){
        stop("Input includes object(s) whose rownames is NULL. Please add ",
             "rownames.",
             call. = FALSE)
    }
    return(class)
}

########################### .assays_cannot_be_found ############################
# This function checks that the assay can be found from TreeSE objects of a list.

# Input: the name of the assay and a list of TreeSE objects
# Output: A list of boolean values
.assays_cannot_be_found <- function(assay_name, x){
    # Check if the assay_name can be found. If yes, then FALSE. If not, then TRUE
    list <- lapply(x, .assay_cannot_be_found, assay_name = assay_name)
    # Unlist the list
    result <- unlist(list)
    return(result)
}

############################ .assay_cannot_be_found #############################
# This function checks that the assay can be found from TreeSE. If it cannot be found
# --> TRUE, if it can be found --> FALSE

# Input: the name of the assay and TreSE object
# Output: TRUE or FALSE
.assay_cannot_be_found <- function(assay_name, tse){
    # Check if the assay_name can be found. If yes, then FALSE. If not, then TRUE
    tryCatch(
        {
            .check_assay_present(assay_name, tse)
            return(FALSE)
            
        },
        error = function(cond) {
            return(TRUE)
        }
    )
}

########################### .get_unique_sample_names ###########################
# This function convert colnames unique

# Input: TreeSEs
# Output: One TreeSE with unique sample names compared to other TreeSE
.get_unique_sample_names <- function(tse1, tse2, iteration){
    # Get indices of those sample names that match
    ind <-  colnames(tse2) %in% colnames(tse1)
    # Get duplicated sample names
    duplicated_colnames <-  colnames(tse2)[ind]
    if( length(duplicated_colnames) > 0 ) {
        # Add the number of object to duplicated sample names
        duplicated_colnames <- paste0(duplicated_colnames, "_", iteration)
        # Add new sample names to the tse object
        colnames(tse2)[ind] <- duplicated_colnames
    }
    return(tse2)
}

######################## .merge_SummarizedExperiments ##########################
# This function merges the data of two SE objects into one set of arguments that
# can be feed to create a single object.
# TreeSE and SCE are all merged with this function since SCE or TreeSE-specific
# slots are not merged at this point. TreeSE-specific slots are collected and
# merged at the end.

# Input: Two SEs
# Output: A list of arguments
.merge_SummarizedExperiments <- function(tse1, tse2, join,  
                                         assay_name, missing_values){
    # Merge rowData
    rowdata <- .merge_rowdata(tse1, tse2, join)
    # Merge colData
    coldata <- .merge_coldata(tse1, tse2, join)
    # Merge assay
    assay <- .merge_assay(tse1, tse2, assay_name, join, missing_values, rowdata, coldata)
    assays <- SimpleList(name = assay)
    names(assays) <- assay_name
    # Combine metadata
    metadata <- c( metadata(tse1), metadata(tse2) )
    
    # Create a list of data
    args <- list(assays = assays,
                rowData = rowdata,
                colData = coldata,
                metadata = metadata)
    return(args)
}

################################ .merge_assay ##################################
# This function merges assays.

# Input: Two TreeSEs, the name of the assay, joining method, value to denote
# missing values, merged rowData, and merged colData
# Output: Merged assay
.merge_assay <- function(tse1, tse2, assay_name, join,
                         missing_values, rd, cd){
    # Take assays
    assay1 <- assay(tse1, assay_name)
    assay2 <- assay(tse2, assay_name)
    
    # Merge two assays into one
    assay <- .join_two_tables(assay1, assay2, join)
    
    # Fill missing values
    assay[ is.na(assay) ] <- missing_values
    # Convert into matrix
    assay <- as.matrix(assay)
    
    # Order the assay based on rowData and colData
    assay <- assay[ match(rownames(rd), rownames(assay)), , drop = FALSE ]
    assay <- assay[ , match(rownames(cd), colnames(assay)), drop = FALSE]
    
    return(assay)
}

############################### .merge_rowdata #################################
# This function merges rowDatas,

# Input: Two TreeSEs and joining method
# Output: Merged rowData
.merge_rowdata <- function(tse1, tse2, join){
    # Take rowDatas
    rd1 <- rowData(tse1)
    rd2 <- rowData(tse2)
    
    # Convert column names to lower
    if( length(colnames(rd1)) > 0 ){
        colnames(rd1) <- tolower(colnames(rd1))
    }
    if( length(colnames(rd2)) > 0 ){
        colnames(rd2) <- tolower(colnames(rd2))
    }
    
    # Merge rowdata
    rd <- .join_two_tables(rd1, rd2, join)
    
    # Get column indices that match with taxonomy ranks
    ranks_ind <- match( TAXONOMY_RANKS, colnames(rd) )
    # Remove NAs
    ranks_ind <- ranks_ind[ !is.na(ranks_ind) ]
    # If ranks were found
    if( length(ranks_ind) != 0 ){
        # Get the data in correct order, take only column that have ranks
        rd_rank <- rd[ , ranks_ind, drop = FALSE]
        # Take other columns
        rd_other <- rd[ , -ranks_ind, drop = FALSE]
        # Get rank names
        rank_names <- colnames(rd_rank)
        # Convert names s that they have capital letters
        new_rank_names <- paste(toupper(substr(rank_names, 1, 1)), 
                                substr(rank_names, 2, nchar(rank_names)), sep = "")
        # Add new names to colnames of rd_rank
        colnames(rd_rank) <- new_rank_names
        
        # Combine columns
        rd <- cbind(rd_rank, rd_other)
    }
    return(rd)
}

############################### .merge_coldata #################################
# This function merges colDatas,

# Input: Two TreeSEs and joining method
# Output: Merged colData
.merge_coldata <- function(tse1, tse2, join){
    # Take colDatas
    cd1 <- colData(tse1)
    cd2 <- colData(tse2)
    
    # Merge coldata
    cd <- .join_two_tables(cd1, cd2, join = "full")
    # Convert into DataFrame
    cd <- DataFrame(cd)
    
    return(cd)
}

############################## .join_two_tables ################################
# This general function is used to merge rowDatas, colDatas, and assays.

# Input: Two tables and joining method
# Output: One merged table

#' @importFrom dplyr coalesce
.join_two_tables <- function(df1, df2, join){
    # Get parameter based on join
    all.x <- switch(join,
                    full = TRUE,
                    inner = FALSE,
                    left = TRUE,
                    right = FALSE
    )
    all.y <- switch(join,
                    full = TRUE,
                    inner = FALSE,
                    left = FALSE,
                    right = TRUE
    )
    
    # Ensure that the data is in correct format
    df1 <- as.data.frame(df1)
    df2 <- as.data.frame(df2)
    
    # Get matching variables indices
    matching_variables_ids1 <- match( colnames(df2), colnames(df1) )
    # Get matching variable names
    matching_variables1 <- colnames(df1)[ matching_variables_ids1 ]
    # Remove NAs
    matching_variables1 <- matching_variables1[ !is.na(matching_variables1) ]
    
    # Get matching variables indices
    matching_variables_ids2 <- match( colnames(df1), colnames(df2) )
    # Get matching variable names
    matching_variables2 <- colnames(df2)[ matching_variables_ids2 ]
    # Remove NAs
    matching_variables2 <- matching_variables2[ !is.na(matching_variables2) ]
    
    # Make the matching variables unique
    matching_variables_mod1 <- paste0(matching_variables1, "_X")
    matching_variables_ids1 <- matching_variables_ids1[ !is.na(matching_variables_ids1) ]
    colnames(df1)[ matching_variables_ids1 ] <- matching_variables_mod1
    matching_variables_mod2 <- paste0(matching_variables2, "_Y")
    matching_variables_ids2 <- matching_variables_ids2[ !is.na(matching_variables_ids2) ]
    colnames(df2)[ matching_variables_ids2 ] <- matching_variables_mod2
    
    # Add rownames to one of the columns
    df1$rownames_merge_ID <- rownames(df1)
    df2$rownames_merge_ID <- rownames(df2)
    # Merge data frames into one data frame
    df <- merge(df1, df2, by = "rownames_merge_ID", all.x = all.x, all.y = all.y)
    # Add rownames and remove additional column
    rownames(df) <- df$rownames_merge_ID
    df$rownames_merge_ID <- NULL
    
    # Combine matching variables if found
    if( length(matching_variables1) > 0 ){
        for(i in 1:length(matching_variables1) ){
            # Get columns
            x <- matching_variables_mod1[i]
            y <- matching_variables_mod2[i]
            # Combine information from columns
            x_and_y_combined <- coalesce( df[ , x], df[ , y] )
            # Remove additional columns
            df[ , x ] <- NULL
            df[ , y ] <- NULL
            # Add column that has combined information
            df[ , matching_variables1[i] ] <- x_and_y_combined
        }
    }
    return(df)
}