##
## function: gsva
## purpose: main function of the package which estimates activity
##          scores for each given gene-set

setGeneric("gsva", function(expr, gset.idx.list, ...) standardGeneric("gsva"))

setMethod("gsva", signature(expr="HDF5Array", gset.idx.list="list"),
          function(expr, gset.idx.list, annotation,
  method=c("gsva", "ssgsea", "zscore", "plage"),
  kcdf=c("Gaussian", "Poisson", "none"),
  abs.ranking=FALSE,
  min.sz=1,
  max.sz=Inf,
  parallel.sz=1L,
  mx.diff=TRUE,
  tau=switch(method, gsva=1, ssgsea=0.25, NA),
  ssgsea.norm=TRUE,
  verbose=TRUE,
  BPPARAM=SerialParam(progressbar=verbose))
{
  method <- match.arg(method)
  kcdf <- match.arg(kcdf)
  
  warning("Using 'HDF5Array' objects as input is still in an experimental stage.")

  ## filter genes according to verious criteria,
  ## e.g., constant expression
  expr <- .filterFeatures(expr, method)
  
  ## map to the actual features for which expression data is available
  mapped.gset.idx.list <- .mapGeneSetsToFeatures(gset.idx.list, rownames(expr))
  
  ## remove gene sets from the analysis for which no features are available
  ## and meet the minimum and maximum gene-set size specified by the user
  mapped.gset.idx.list <- filterGeneSets(mapped.gset.idx.list,
                                         min.sz=max(1, min.sz),
                                         max.sz=max.sz)
  
  if (!missing(kcdf)) {
    if (kcdf == "Gaussian") {
      rnaseq <- FALSE
      kernel <- TRUE
    } else if (kcdf == "Poisson") {
      rnaseq <- TRUE
      kernel <- TRUE
    } else
      kernel <- FALSE
  }
  
  rval <- .gsva(expr, mapped.gset.idx.list, method, kcdf, rnaseq, abs.ranking,
                parallel.sz, mx.diff, tau, kernel, ssgsea.norm, verbose, BPPARAM)
  
  rval
})

setMethod("gsva", signature(expr="SingleCellExperiment", gset.idx.list="list"),
          function(expr, gset.idx.list, annotation,
  method=c("gsva", "ssgsea", "zscore", "plage"),
  kcdf=c("Gaussian", "Poisson", "none"),
  abs.ranking=FALSE,
  min.sz=1,
  max.sz=Inf,
  parallel.sz=1L, 
  mx.diff=TRUE,
  tau=switch(method, gsva=1, ssgsea=0.25, NA),
  ssgsea.norm=TRUE,
  verbose=TRUE,
  BPPARAM=SerialParam(progressbar=verbose))
{
  method <- match.arg(method)
  kcdf <- match.arg(kcdf)
  
  warning("Using 'SingleCellExperiment' objects as input is still in an experimental stage.")

  if (length(assays(expr)) == 0L)
    stop("The input SummarizedExperiment object has no assay data.")
  
  se <- expr
  
  if (missing(annotation))
    annotation <- names(assays(se))[1]
  else {
    if (!is.character(annotation))
      stop("The 'annotation' argument must contain a character string.")
    annotation <- annotation[1]
    
    if (!annotation %in% names(assays(se)))
      stop(sprintf("Assay %s not found in the input SummarizedExperiment object.", annotation))
  }
  expr <- assays(se)[[annotation]]
  
  ## filter genes according to various criteria,
  ## e.g., constant expression
  
  expr <- .filterFeatures(expr, method)
  
  ## map to the actual features for which expression data is available
  mapped.gset.idx.list <- .mapGeneSetsToFeatures(gset.idx.list, rownames(expr))
  
  ## remove gene sets from the analysis for which no features are available
  ## and meet the minimum and maximum gene-set size specified by the user
  mapped.gset.idx.list <- filterGeneSets(mapped.gset.idx.list,
                                         min.sz=max(1, min.sz),
                                         max.sz=max.sz)
  
  if (!missing(kcdf)) {
    if (kcdf == "Gaussian") {
      rnaseq <- FALSE
      kernel <- TRUE
    } else if (kcdf == "Poisson") {
      rnaseq <- TRUE
      kernel <- TRUE
    } else
      kernel <- FALSE
  }
  
  eSco <- .gsva(expr, mapped.gset.idx.list, method, kcdf, rnaseq, abs.ranking,
                parallel.sz, mx.diff, tau, kernel, ssgsea.norm, verbose, BPPARAM) 
  
  rval <- SingleCellExperiment(assays=SimpleList(es=eSco),
                               colData=colData(se),
                               metadata=metadata(se))
  metadata(rval)$annotation <- NULL
  
  rval
          })
          
setMethod("gsva", signature(expr="dgCMatrix", gset.idx.list="list"),
          function(expr, gset.idx.list, annotation,
  method=c("gsva", "ssgsea", "zscore", "plage"),
  kcdf=c("Gaussian", "Poisson", "none"),
  abs.ranking=FALSE,
  min.sz=1,
  max.sz=Inf,
  parallel.sz=1L,
  mx.diff=TRUE,
  tau=switch(method, gsva=1, ssgsea=0.25, NA),
  ssgsea.norm=TRUE,
  verbose=TRUE,
  BPPARAM=SerialParam(progressbar=verbose))
{
  method <- match.arg(method)
  kcdf <- match.arg(kcdf)
  
  warning("Using 'dgCMatrix' objects as input is still in an experimental stage.")

  ## filter genes according to verious criteria,
  ## e.g., constant expression
  expr <- .filterFeatures(expr, method)
  
  ## map to the actual features for which expression data is available
  mapped.gset.idx.list <- .mapGeneSetsToFeatures(gset.idx.list, rownames(expr))
  
  ## remove gene sets from the analysis for which no features are available
  ## and meet the minimum and maximum gene-set size specified by the user
  mapped.gset.idx.list <- filterGeneSets(mapped.gset.idx.list,
                                         min.sz=max(1, min.sz),
                                         max.sz=max.sz)
  
  if (!missing(kcdf)) {
    if (kcdf == "Gaussian") {
      rnaseq <- FALSE
      kernel <- TRUE
    } else if (kcdf == "Poisson") {
      rnaseq <- TRUE
      kernel <- TRUE
    } else
      kernel <- FALSE
  }
  
  rval <- .gsva(expr, mapped.gset.idx.list, method, kcdf, rnaseq, abs.ranking,
                parallel.sz, mx.diff, tau, kernel, ssgsea.norm, verbose, BPPARAM)
  
  rval
})

setMethod("gsva", signature(expr="SummarizedExperiment", gset.idx.list="GeneSetCollection"),
          function(expr, gset.idx.list, annotation,
  method=c("gsva", "ssgsea", "zscore", "plage"),
  kcdf=c("Gaussian", "Poisson", "none"),
  abs.ranking=FALSE,
  min.sz=1,
  max.sz=Inf,
  parallel.sz=1L, 
  mx.diff=TRUE,
  tau=switch(method, gsva=1, ssgsea=0.25, NA),
  ssgsea.norm=TRUE,
  verbose=TRUE,
  BPPARAM=SerialParam(progressbar=verbose))
{
  method <- match.arg(method)
  kcdf <- match.arg(kcdf)

  if (length(assays(expr)) == 0L)
    stop("The input SummarizedExperiment object has no assay data.")

  se <- expr
  if (missing(annotation))
    annotation <- names(assays(se))[1]
  else {
    if (!is.character(annotation))
      stop("The 'annotation' argument must contain a character string.")
    annotation <- annotation[1]

    if (!annotation %in% names(assays(se)))
      stop(sprintf("Assay %s not found in the input SummarizedExperiment object.", annotation))
  }
  expr <- as.matrix(assays(se)[[annotation]])

  ## filter genes according to verious criteria,
  ## e.g., constant expression
  expr <- .filterFeatures(expr, method)

  annotpkg <- metadata(se)$annotation
  if (!is.null(annotpkg) && length(annotpkg) > 0 && is.character(annotpkg) && annotpkg != "") {
    if (all(!c(annotpkg, paste0(annotpkg, ".db")) %in% installed.packages()))
      stop(sprintf("Please install the annotation package %s. If %s does not seem to exist as a package, please try to append the suffix .db to its name.", annotpkg, annotpkg))

    if (verbose)
      cat("Mapping identifiers between gene sets and feature names\n")

    ## map gene identifiers of the gene sets to the features in the chip
    ## Biobase::annotation() is necessary to disambiguate from the
    ## 'annotation' argument
    mapped.gset.idx.list <- mapIdentifiers(gset.idx.list,
                                           AnnoOrEntrezIdentifier(annotpkg))
  } else {
    mapped.gset.idx.list <- gset.idx.list
    if (verbose) {
      cat("No annotation package name available in the input 'SummarizedExperiment' object 'expr'.",
          "Attempting to directly match identifiers in 'expr' to gene sets.", sep="\n")
    }
  }
  mapped.gset.idx.list <- geneIds(mapped.gset.idx.list) 

  ## map to the actual features for which expression data is available
  mapped.gset.idx.list <- .mapGeneSetsToFeatures(mapped.gset.idx.list, rownames(expr))

  ## remove gene sets from the analysis for which no features are available
  ## and meet the minimum and maximum gene-set size specified by the user
  mapped.gset.idx.list <- filterGeneSets(mapped.gset.idx.list,
                                         min.sz=max(1, min.sz),
                                         max.sz=max.sz)

  if (!missing(kcdf)) {
    if (kcdf == "Gaussian") {
      rnaseq <- FALSE
      kernel <- TRUE
    } else if (kcdf == "Poisson") {
      rnaseq <- TRUE
      kernel <- TRUE
    } else
      kernel <- FALSE
  }

  eSco <- .gsva(expr, mapped.gset.idx.list, method, kcdf, rnaseq, abs.ranking,
                parallel.sz, mx.diff, tau, kernel, ssgsea.norm, verbose, BPPARAM) 

  rval <- SummarizedExperiment(assays=SimpleList(es=eSco),
                               colData=colData(se),
                               metadata=metadata(se))
  metadata(rval)$annotation <- NULL

  rval
})

setMethod("gsva", signature(expr="SummarizedExperiment", gset.idx.list="list"),
          function(expr, gset.idx.list, annotation,
  method=c("gsva", "ssgsea", "zscore", "plage"),
  kcdf=c("Gaussian", "Poisson", "none"),
  abs.ranking=FALSE,
  min.sz=1,
  max.sz=Inf,
  parallel.sz=1L, 
  mx.diff=TRUE,
  tau=switch(method, gsva=1, ssgsea=0.25, NA),
  ssgsea.norm=TRUE,
  verbose=TRUE,
  BPPARAM=SerialParam(progressbar=verbose))
{
  method <- match.arg(method)
  kcdf <- match.arg(kcdf)

  if (length(assays(expr)) == 0L)
    stop("The input SummarizedExperiment object has no assay data.")

  se <- expr
  if (missing(annotation))
    annotation <- names(assays(se))[1]
  else {
    if (!is.character(annotation))
      stop("The 'annotation' argument must contain a character string.")
    annotation <- annotation[1]

    if (!annotation %in% names(assays(se)))
      stop(sprintf("Assay %s not found in the input SummarizedExperiment object.", annotation))
  }
  expr <- as.matrix(assays(se)[[annotation]])

  ## filter genes according to verious criteria,
  ## e.g., constant expression
  expr <- .filterFeatures(expr, method)

  ## map to the actual features for which expression data is available
  mapped.gset.idx.list <- .mapGeneSetsToFeatures(gset.idx.list, rownames(expr))
  
  ## remove gene sets from the analysis for which no features are available
  ## and meet the minimum and maximum gene-set size specified by the user
  mapped.gset.idx.list <- filterGeneSets(mapped.gset.idx.list,
                                         min.sz=max(1, min.sz),
                                         max.sz=max.sz)

  if (!missing(kcdf)) {
    if (kcdf == "Gaussian") {
      rnaseq <- FALSE
      kernel <- TRUE
    } else if (kcdf == "Poisson") {
      rnaseq <- TRUE
      kernel <- TRUE
    } else
      kernel <- FALSE
  }

  eSco <- .gsva(expr, mapped.gset.idx.list, method, kcdf, rnaseq, abs.ranking,
                parallel.sz, mx.diff, tau, kernel, ssgsea.norm, verbose, BPPARAM) 

  rval <- SummarizedExperiment(assays=SimpleList(es=eSco),
                               colData=colData(se),
                               metadata=metadata(se))
  metadata(rval)$annotation <- NULL

  rval
})

setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="list"),
          function(expr, gset.idx.list, annotation,
  method=c("gsva", "ssgsea", "zscore", "plage"),
  kcdf=c("Gaussian", "Poisson", "none"),
  abs.ranking=FALSE,
  min.sz=1,
  max.sz=Inf,
  parallel.sz=1L, 
  mx.diff=TRUE,
  tau=switch(method, gsva=1, ssgsea=0.25, NA),
  ssgsea.norm=TRUE,
  verbose=TRUE,
  BPPARAM=SerialParam(progressbar=verbose))
{
  method <- match.arg(method)
  kcdf <- match.arg(kcdf)

  eset <- expr
  expr <- exprs(eset)
  ## filter genes according to verious criteria,
  ## e.g., constant expression
  expr <- .filterFeatures(expr, method)

  ## map to the actual features for which expression data is available
  mapped.gset.idx.list <- .mapGeneSetsToFeatures(gset.idx.list, rownames(expr))
  
  ## remove gene sets from the analysis for which no features are available
  ## and meet the minimum and maximum gene-set size specified by the user
  mapped.gset.idx.list <- filterGeneSets(mapped.gset.idx.list,
                                         min.sz=max(1, min.sz),
                                         max.sz=max.sz)

  if (!missing(kcdf)) {
    if (kcdf == "Gaussian") {
      rnaseq <- FALSE
      kernel <- TRUE
    } else if (kcdf == "Poisson") {
      rnaseq <- TRUE
      kernel <- TRUE
    } else
      kernel <- FALSE
  }

  eSco <- .gsva(expr, mapped.gset.idx.list, method, kcdf, rnaseq, abs.ranking,
                parallel.sz, mx.diff, tau, kernel, ssgsea.norm, verbose, BPPARAM) 

  rval <- new("ExpressionSet", exprs=eSco, phenoData=phenoData(eset),
              experimentData=experimentData(eset), annotation="")

  rval
})

setMethod("gsva", signature(expr="ExpressionSet", gset.idx.list="GeneSetCollection"),
          function(expr, gset.idx.list, annotation,
  method=c("gsva", "ssgsea", "zscore", "plage"),
  kcdf=c("Gaussian", "Poisson", "none"),
  abs.ranking=FALSE,
  min.sz=1,
  max.sz=Inf,
  parallel.sz=1L, 
  mx.diff=TRUE,
  tau=switch(method, gsva=1, ssgsea=0.25, NA),
  ssgsea.norm=TRUE,
  verbose=TRUE,
  BPPARAM=SerialParam(progressbar=verbose))
{
  method <- match.arg(method)
  kcdf <- match.arg(kcdf)

  eset <- expr
  expr <- exprs(eset)
  ## filter genes according to verious criteria,
  ## e.g., constant expression
  expr <- .filterFeatures(expr, method)

  annotpkg <- Biobase::annotation(eset)
  if (length(annotpkg) > 0 && annotpkg != "") {
    if (all(!c(annotpkg, paste0(annotpkg, ".db")) %in% installed.packages()))
      stop(sprintf("Please install the annotation package %s. If %s does not seem to exist as a package, please try to append the suffix .db to its name.", annotpkg, annotpkg))

    if (verbose)
      cat("Mapping identifiers between gene sets and feature names\n")

    ## map gene identifiers of the gene sets to the features in the chip
    ## Biobase::annotation() is necessary to disambiguate from the
    ## 'annotation' argument
    mapped.gset.idx.list <- mapIdentifiers(gset.idx.list,
                                           AnnoOrEntrezIdentifier(annotpkg))
  } else {
    mapped.gset.idx.list <- gset.idx.list
    if (verbose) {
      cat("No annotation package name available in the input 'ExpressionSet' object 'expr'.",
        "Attempting to directly match identifiers in 'expr' to gene sets.", sep="\n")
    }
  }
  mapped.gset.idx.list <- geneIds(mapped.gset.idx.list) 

  ## map to the actual features for which expression data is available
  mapped.gset.idx.list <- .mapGeneSetsToFeatures(mapped.gset.idx.list, rownames(expr))
  
  ## remove gene sets from the analysis for which no features are available
  ## and meet the minimum and maximum gene-set size specified by the user
  mapped.gset.idx.list <- filterGeneSets(mapped.gset.idx.list,
                                         min.sz=max(1, min.sz),
                                         max.sz=max.sz)

  if (!missing(kcdf)) {
    if (kcdf == "Gaussian") {
      rnaseq <- FALSE
      kernel <- TRUE
    } else if (kcdf == "Poisson") {
      rnaseq <- TRUE
      kernel <- TRUE
    } else
      kernel <- FALSE
  }

  eSco <- .gsva(expr, mapped.gset.idx.list, method, kcdf, rnaseq, abs.ranking,
                parallel.sz, mx.diff, tau, kernel, ssgsea.norm, verbose, BPPARAM)

  rval <- new("ExpressionSet", exprs=eSco, phenoData=phenoData(eset),
              experimentData=experimentData(eset), annotation="")

  rval
})

setMethod("gsva", signature(expr="matrix", gset.idx.list="GeneSetCollection"),
          function(expr, gset.idx.list, annotation,
  method=c("gsva", "ssgsea", "zscore", "plage"),
  kcdf=c("Gaussian", "Poisson", "none"),
  abs.ranking=FALSE,
  min.sz=1,
  max.sz=Inf,
  parallel.sz=1L, 
  mx.diff=TRUE,
  tau=switch(method, gsva=1, ssgsea=0.25, NA),
  ssgsea.norm=TRUE,
  verbose=TRUE,
  BPPARAM=SerialParam(progressbar=verbose))
{
  method <- match.arg(method)
  kcdf <- match.arg(kcdf)

  ## filter genes according to verious criteria,
  ## e.g., constant expression
  expr <- .filterFeatures(expr, method)

  ## map gene identifiers of the gene sets to the features in the matrix
  mapped.gset.idx.list <- gset.idx.list
  if (!missing(annotation)) {
    if (verbose)
      cat("Mapping identifiers between gene sets and feature names\n")

    mapped.gset.idx.list <- mapIdentifiers(gset.idx.list,
                                           AnnoOrEntrezIdentifier(annotation))
  }
  mapped.gset.idx.list <- geneIds(mapped.gset.idx.list)
  
  ## map to the actual features for which expression data is available
  mapped.gset.idx.list <- .mapGeneSetsToFeatures(mapped.gset.idx.list, rownames(expr))
  
  ## remove gene sets from the analysis for which no features are available
  ## and meet the minimum and maximum gene-set size specified by the user
  mapped.gset.idx.list <- filterGeneSets(mapped.gset.idx.list,
                                         min.sz=max(1, min.sz),
                                         max.sz=max.sz)

  if (!missing(kcdf)) {
    if (kcdf == "Gaussian") {
      rnaseq <- FALSE
      kernel <- TRUE
    } else if (kcdf == "Poisson") {
      rnaseq <- TRUE
      kernel <- TRUE
    } else
      kernel <- FALSE
  }

  rval <- .gsva(expr, mapped.gset.idx.list, method, kcdf, rnaseq, abs.ranking,
                parallel.sz, mx.diff, tau, kernel, ssgsea.norm,
                verbose, BPPARAM)

  rval
})

setMethod("gsva", signature(expr="matrix", gset.idx.list="list"),
          function(expr, gset.idx.list, annotation,
  method=c("gsva", "ssgsea", "zscore", "plage"),
  kcdf=c("Gaussian", "Poisson", "none"),
  abs.ranking=FALSE,
  min.sz=1,
  max.sz=Inf,
  parallel.sz=1L, 
  mx.diff=TRUE,
  tau=switch(method, gsva=1, ssgsea=0.25, NA),
  ssgsea.norm=TRUE,
  verbose=TRUE,
  BPPARAM=SerialParam(progressbar=verbose))
{
  method <- match.arg(method)
  kcdf <- match.arg(kcdf)

  ## filter genes according to verious criteria,
  ## e.g., constant expression
  expr <- .filterFeatures(expr, method)

  ## map to the actual features for which expression data is available
  mapped.gset.idx.list <- .mapGeneSetsToFeatures(gset.idx.list, rownames(expr))
  
  ## remove gene sets from the analysis for which no features are available
  ## and meet the minimum and maximum gene-set size specified by the user
  mapped.gset.idx.list <- filterGeneSets(mapped.gset.idx.list,
                                         min.sz=max(1, min.sz),
                                         max.sz=max.sz)

  if (!missing(kcdf)) {
    if (kcdf == "Gaussian") {
      rnaseq <- FALSE
      kernel <- TRUE
    } else if (kcdf == "Poisson") {
      rnaseq <- TRUE
      kernel <- TRUE
    } else
      kernel <- FALSE
  }

  rval <- .gsva(expr, mapped.gset.idx.list, method, kcdf, rnaseq, abs.ranking,
                parallel.sz, mx.diff, tau, kernel, ssgsea.norm, verbose, BPPARAM)

  rval
})

.gsva <- function(expr, gset.idx.list,
  method=c("gsva", "ssgsea", "zscore", "plage"),
  kcdf=c("Gaussian", "Poisson", "none"),
  rnaseq=FALSE,
  abs.ranking=FALSE,
  parallel.sz=1L,
  mx.diff=TRUE,
  tau=1,
  kernel=TRUE,
  ssgsea.norm=TRUE,
  verbose=TRUE,
  BPPARAM=SerialParam(progressbar=verbose)) {
  
  if(is(expr, "DelayedArray")){
    warning("Using 'DelayedArray' objects as input is still in an experimental stage.")

    return(.gsvaDelayedArray(expr, gset.idx.list, method, kcdf, rnaseq, abs.ranking,
                             parallel.sz, mx.diff, tau, kernel, ssgsea.norm, verbose, BPPARAM))
  }

	if (length(gset.idx.list) == 0)
		stop("The gene set list is empty! Filter may be too stringent.")

  if (any(lengths(gset.idx.list) == 1))
    warning("Some gene sets have size one. Consider setting 'min.sz > 1'.")

  parallel.sz <- as.integer(parallel.sz)
  if (parallel.sz < 1L)
    parallel.sz <- 1L
	
  ## because we keep the argument 'parallel.sz' for backwards compatibility
  ## we need to harmonize it with the contents of BPPARAM
  if (parallel.sz > 1L && class(BPPARAM) == "SerialParam") {
    BPPARAM=MulticoreParam(progressbar=verbose, workers=parallel.sz, tasks=100)
  } else if (parallel.sz == 1L && class(BPPARAM) != "SerialParam") {
    parallel.sz <- bpnworkers(BPPARAM)
  } else if (parallel.sz > 1L && class(BPPARAM) != "SerialParam") {
    bpworkers(BPPARAM) <- parallel.sz
  }

  if (class(BPPARAM) != "SerialParam" && verbose)
    cat(sprintf("Setting parallel calculations through a %s back-end\nwith workers=%d and tasks=100.\n",
                    class(BPPARAM), parallel.sz))

  if (method == "ssgsea") {
	  if(verbose)
		  cat("Estimating ssGSEA scores for", length(gset.idx.list),"gene sets.\n")

    return(ssgsea(expr, gset.idx.list, alpha=tau, parallel.sz=parallel.sz,
                  normalization=ssgsea.norm, verbose=verbose, BPPARAM=BPPARAM))
  }

  if (method == "zscore") {
    if (rnaseq)
      stop("rnaseq=TRUE does not work with method='zscore'.")

	  if(verbose)
		  cat("Estimating combined z-scores for", length(gset.idx.list), "gene sets.\n")

    return(zscore(expr, gset.idx.list, parallel.sz, verbose, BPPARAM=BPPARAM))
  }

  if (method == "plage") {
    if (rnaseq)
      stop("rnaseq=TRUE does not work with method='plage'.")

	  if(verbose)
		  cat("Estimating PLAGE scores for", length(gset.idx.list),"gene sets.\n")

    return(plage(expr, gset.idx.list, parallel.sz, verbose, BPPARAM=BPPARAM))
  }

	if(verbose)
		cat("Estimating GSVA scores for", length(gset.idx.list),"gene sets.\n")
	
	n.samples <- ncol(expr)
	n.genes <- nrow(expr)
	n.gset <- length(gset.idx.list)
	
	es.obs <- matrix(NaN, n.gset, n.samples, dimnames=list(names(gset.idx.list),colnames(expr)))
	colnames(es.obs) <- colnames(expr)
	rownames(es.obs) <- names(gset.idx.list)
	
	es.obs <- compute.geneset.es(expr, gset.idx.list, 1:n.samples,
                               rnaseq=rnaseq, abs.ranking=abs.ranking,
                               parallel.sz=parallel.sz,
                               mx.diff=mx.diff, tau=tau, kernel=kernel,
                               verbose=verbose, BPPARAM=BPPARAM)
	
	colnames(es.obs) <- colnames(expr)
	rownames(es.obs) <- names(gset.idx.list)

	es.obs
}


compute.gene.density <- function(expr, sample.idxs, rnaseq=FALSE, kernel=TRUE){
	n.test.samples <- ncol(expr)
	n.genes <- nrow(expr)
	n.density.samples <- length(sample.idxs)
	
  gene.density <- NA
  if (kernel) {
	  A = .C("matrix_density_R",
			as.double(t(expr[ ,sample.idxs, drop=FALSE])),
			as.double(t(expr)),
			R = double(n.test.samples * n.genes),
			n.density.samples,
			n.test.samples,
			n.genes,
      as.integer(rnaseq))$R
	
	  gene.density <- t(matrix(A, n.test.samples, n.genes))
  } else {
    gene.density <- t(apply(expr, 1, function(x, sample.idxs) {
                                     f <- ecdf(x[sample.idxs])
                                     f(x)
                                   }, sample.idxs))
    gene.density <- log(gene.density / (1-gene.density))
  }

	return(gene.density)	
}

compute.geneset.es <- function(expr, gset.idx.list, sample.idxs, rnaseq=FALSE,
                               abs.ranking, parallel.sz=1L, 
                               mx.diff=TRUE, tau=1, kernel=TRUE,
                               verbose=TRUE, BPPARAM=SerialParam(progressbar=verbose)) {
	num_genes <- nrow(expr)
	if (verbose) {
    if (kernel) {
      if (rnaseq)
        cat("Estimating ECDFs with Poisson kernels\n")
      else
        cat("Estimating ECDFs with Gaussian kernels\n")
    } else
      cat("Estimating ECDFs directly\n")
  }

  ## open parallelism only if ECDFs have to be estimated for
  ## more than 100 genes on more than 100 samples
  if (parallel.sz > 1 && length(sample.idxs > 100) && nrow(expr) > 100) {
    if (verbose)
      cat(sprintf("Estimating ECDFs in parallel on %d cores\n", as.integer(parallel.sz)))
    iter <- function(Y, n_chunks=BiocParallel::multicoreWorkers()) {
      idx <- splitIndices(nrow(Y), min(nrow(Y), n_chunks))
      i <- 0L
      function() {
        if (i == length(idx))
          return(NULL)
        i <<- i + 1L
        Y[idx[[i]], , drop=FALSE]
      }
    }
    gene.density <- bpiterate(iter(expr, 100),
                              compute.gene.density,
                              sample.idxs=sample.idxs,
                              rnaseq=rnaseq, kernel=kernel,
                              REDUCE=rbind, reduce.in.order=TRUE,
                              BPPARAM=BPPARAM)
  } else 
	  gene.density <- compute.gene.density(expr, sample.idxs, rnaseq, kernel)
	
	compute_rank_score <- function(sort_idx_vec){
		tmp <- rep(0, num_genes)
		tmp[sort_idx_vec] <- abs(seq(from=num_genes,to=1) - num_genes/2)
		return (tmp)
	}
	
	rank.scores <- rep(0, num_genes)
  sort.sgn.idxs <- apply(gene.density, 2, order, decreasing=TRUE) # n.genes * n.samples
	
	rank.scores <- apply(sort.sgn.idxs, 2, compute_rank_score)

  m <- bplapply(gset.idx.list, ks_test_m,
                gene.density=rank.scores,
                sort.idxs=sort.sgn.idxs,
                mx.diff=mx.diff, abs.ranking=abs.ranking,
                tau=tau, verbose=verbose,
                BPPARAM=BPPARAM)
  m <- do.call("rbind", m)
  colnames(m) <- colnames(expr)

	return (m)
}


ks_test_m <- function(gset_idxs, gene.density, sort.idxs, mx.diff=TRUE,
                      abs.ranking=FALSE, tau=1, verbose=TRUE){
	
	n.genes <- nrow(gene.density)
	n.samples <- ncol(gene.density)
	n.geneset <- length(gset_idxs)

	geneset.sample.es = .C("ks_matrix_R",
			as.double(gene.density),
			R = double(n.samples),
			as.integer(sort.idxs),
			n.genes,
			as.integer(gset_idxs),
			n.geneset,
			as.double(tau),
			n.samples,
			as.integer(mx.diff),
      as.integer(abs.ranking))$R

	return(geneset.sample.es)
}


## ks-test in R code - testing only
ks_test_Rcode <- function(gene.density, gset_idxs, tau=1, make.plot=FALSE){
	
	n.genes = length(gene.density)
	n.gset = length(gset_idxs)
	
	sum.gset <- sum(abs(gene.density[gset_idxs])^tau)
	
	dec = 1 / (n.genes - n.gset)
	
	sort.idxs <- order(gene.density,decreasing=T)
	offsets <- sort(match(gset_idxs, sort.idxs))
	
	last.idx = 0
	values <- rep(NaN, length(gset_idxs))
	current = 0
	for(i in seq_along(offsets)){
		current = current + abs(gene.density[sort.idxs[offsets[i]]])^tau / sum.gset - dec * (offsets[i]-last.idx-1)
		
		values[i] = current
		last.idx = offsets[i]
	}
	check_zero = current - dec * (n.genes-last.idx)
	#if(check_zero > 10^-15){ 
	#	stop(paste=c("Expected zero sum for ks:", check_zero))
	#}
	if(make.plot){ plot(offsets, values,type="l") } 
	
	max.idx = order(abs(values),decreasing=T)[1]
	mx.value <- values[max.idx]
	
	return (mx.value)
}


.rndWalk <- function(gSetIdx, geneRanking, j, R, alpha) {
  indicatorFunInsideGeneSet <- match(geneRanking, gSetIdx)
  indicatorFunInsideGeneSet[!is.na(indicatorFunInsideGeneSet)] <- 1
  indicatorFunInsideGeneSet[is.na(indicatorFunInsideGeneSet)] <- 0
  stepCDFinGeneSet <- cumsum((abs(R[geneRanking, j])^alpha * 
                      indicatorFunInsideGeneSet)) /
                      sum((abs(R[geneRanking, j])^alpha *
                      indicatorFunInsideGeneSet))
  stepCDFoutGeneSet <- cumsum(!indicatorFunInsideGeneSet) /
                       sum(!indicatorFunInsideGeneSet)
  walkStat <- stepCDFinGeneSet - stepCDFoutGeneSet

  sum(walkStat) 
}

## optimized version of the function .rndWalk by Alexey Sergushichev
## https://github.com/rcastelo/GSVA/pull/15
## based on his paper https://doi.org/10.1101/060012
.fastRndWalk <- function(gSetIdx, geneRanking, j, Ra) {
    n <- length(geneRanking)
    k <- length(gSetIdx)
    idxs <- sort.int(match(gSetIdx, geneRanking))
    
    stepCDFinGeneSet2 <- 
        sum(Ra[geneRanking[idxs], j] * (n - idxs + 1)) /
        sum((Ra[geneRanking[idxs], j]))    
    
    
    stepCDFoutGeneSet2 <- (n * (n + 1) / 2 - sum(n - idxs + 1)) / (n - k)
    
    walkStat <- stepCDFinGeneSet2 - stepCDFoutGeneSet2

    walkStat
}


ssgsea <- function(X, geneSets, alpha=0.25, parallel.sz,
                   normalization=TRUE, verbose=TRUE,
                   BPPARAM=SerialParam(progressbar=verbose)) {

  n <- ncol(X)
  
  print("Calculating ranks...")
  
  R <- t(sparseMatrixStats::colRanks(X, ties.method = "average"))
  mode(R) <- "integer"

  print("Calculating absolute values from ranks...")
  
  Ra <- abs(R)^alpha
  
  es <- bplapply(as.list(1:n), function(j) {
    geneRanking <- order(R[, j], decreasing=TRUE)
    es_sample <- lapply(geneSets, .fastRndWalk, geneRanking, j, Ra)
    
    unlist(es_sample)
  }, BPPARAM=BPPARAM)
  
  es <- do.call("cbind", es)
  
  
  if (normalization) {
    print("Normalizing...")
    ## normalize enrichment scores by using the entire data set, as indicated
    ## by Barbie et al., 2009, online methods, pg. 2
    es <- es[, 1:n, drop=FALSE] / (range(es)[2] - range(es)[1])
  }
  
  if (length(geneSets) == 1)
    es <- matrix(es, nrow=1)
  
  rownames(es) <- names(geneSets)
  colnames(es) <- colnames(X)
  
  if(is(X, "dgCMatrix")){
    es <- as(as(as(es, "dMatrix"), "generalMatrix"), "CsparseMatrix")
  }
  
  es
}

combinez <- function(gSetIdx, j, Z) sum(Z[gSetIdx, j]) / sqrt(length(gSetIdx))

zscore <- function(X, geneSets, parallel.sz, verbose=TRUE,
                   BPPARAM=SerialParam(progressbar=verbose)) {

  n <- ncol(X)

  if(is(X, "dgCMatrix")){
    message("Please bear in mind that this method first scales the values of the gene
    expression data. In order to take advantage of the sparse Matrix type, the scaling
    will only be applied to the non-zero values of the data. This is a provisional 
    solution in order to give support to the dgCMatrix format.")
    
    Z <- t(X)
    Z <- .dgCapply(Z, scale, 2)
    Z <- t(Z)
    
  } else {
    Z <- t(scale(t(X)))
  }

  es <- bplapply(as.list(1:n), function(j, Z, geneSets) {
    es_sample <- lapply(geneSets, combinez, j, Z)
    unlist(es_sample)
  }, Z, geneSets, BPPARAM=BPPARAM)

  es <- do.call("cbind", es)
  
  if(is(X, "dgCMatrix")){
    es <- as(as(as(es, "dMatrix"), "generalMatrix"), "CsparseMatrix")
  }

  rownames(es) <- names(geneSets)
  colnames(es) <- colnames(X)

  es
}

rightsingularsvdvectorgset <- function(gSetIdx, Z) {
  if(is(Z, "dgCMatrix")){
    s <- BiocSingular::runExactSVD(Z[gSetIdx, ])
  } else {
    s <- svd(Z[gSetIdx, ])
  }
  s$v[, 1]
}

plage <- function(X, geneSets, parallel.sz, verbose=TRUE,
                  BPPARAM=SerialParam(progressbar=verbose)) {
  if(is(X, "dgCMatrix")){
    message("Please bear in mind that this method first scales the values of the gene
    expression data. In order to take advantage of the sparse Matrix type, the scaling
    will only be applied to the non-zero values of the data. This is a provisional 
    solution in order to give support to the dgCMatrix format.")
    
    Z <- Matrix::t(X)
    Z <- .dgCapply(Z, scale, 2)
    Z <- Matrix::t(Z)
    
    es <- bplapply(geneSets, rightsingularsvdvectorgset, Z,
                   BPPARAM=BPPARAM)
    
    es <- do.call(rbind, es)
    
    es <- as(as(as(es, "dMatrix"), "generalMatrix"), "CsparseMatrix")
    
  } else {
    
    Z <- t(scale(t(X)))
    
    es <- bplapply(geneSets, rightsingularsvdvectorgset, Z,
                   BPPARAM=BPPARAM)
    
    es <- do.call(rbind, es)
    
    if (length(geneSets) == 1)
      es <- matrix(es, nrow=1)
    
    rownames(es) <- names(geneSets)
    colnames(es) <- colnames(X)
  }
  
  es
}

setGeneric("filterGeneSets", function(gSets, ...) standardGeneric("filterGeneSets"))

setMethod("filterGeneSets", signature(gSets="list"),
          function(gSets, min.sz=1, max.sz=Inf) {
	gSetsLen <- lengths(gSets)
	return (gSets[gSetsLen >= min.sz & gSetsLen <= max.sz])	
})

setMethod("filterGeneSets", signature(gSets="GeneSetCollection"),
          function(gSets, min.sz=1, max.sz=Inf) {
  filterGeneSets(geneIds(gSets), min.sz, max.sz)
})



setGeneric("computeGeneSetsOverlap", function(gSets, uniqGenes=unique(unlist(gSets, use.names=FALSE)), ...) standardGeneric("computeGeneSetsOverlap"))

setMethod("computeGeneSetsOverlap", signature(gSets="list", uniqGenes="character"),
          function(gSets, uniqGenes, min.sz=1, max.sz=Inf) {
  totalGenes <- length(uniqGenes)

  ## map to the actual features for which expression data is available
  gSets <- .mapGeneSetsToFeatures(gSets, uniqGenes)

  lenGsets <- lengths(gSets)
  totalGsets <- length(gSets)

  gSetsMembershipMatrix <- matrix(0, nrow=totalGenes, ncol=totalGsets,
                                  dimnames=list(uniqGenes, names(gSets)))
  members <- cbind(unlist(gSets, use.names=FALSE), rep(1:totalGsets, times=lenGsets))
  gSetsMembershipMatrix[members] <- 1

  .computeGeneSetsOverlap(gSetsMembershipMatrix, min.sz, max.sz)
})

setMethod("computeGeneSetsOverlap", signature(gSets="list", uniqGenes="ExpressionSet"),
          function(gSets, uniqGenes, min.sz=1, max.sz=Inf) {
  uniqGenes <- featureNames(uniqGenes)
  totalGenes <- length(uniqGenes)

  ## map to the actual features for which expression data is available
  gSets <- .mapGeneSetsToFeatures(gSets, uniqGenes)

  lenGsets <- lengths(gSets)
  totalGsets <- length(gSets)

  gSetsMembershipMatrix <- matrix(0, nrow=totalGenes, ncol=totalGsets,
                                  dimnames=list(uniqGenes, names(gSets)))
  members <- cbind(unlist(gSets, use.names=FALSE), rep(1:totalGsets, times=lenGsets))
  gSetsMembershipMatrix[members] <- 1

  .computeGeneSetsOverlap(gSetsMembershipMatrix, min.sz, max.sz)
})

setMethod("computeGeneSetsOverlap", signature(gSets="GeneSetCollection", uniqGenes="character"),
          function(gSets, uniqGenes, min.sz=1, max.sz=Inf) {

  gSetsMembershipMatrix <- incidence(gSets)
  gSetsMembershipMatrix <- t(gSetsMembershipMatrix[, colnames(gSetsMembershipMatrix) %in% uniqGenes])

  .computeGeneSetsOverlap(gSetsMembershipMatrix, min.sz, max.sz)
})

setMethod("computeGeneSetsOverlap", signature(gSets="GeneSetCollection", uniqGenes="ExpressionSet"),
          function(gSets, uniqGenes, min.sz=1, max.sz=Inf) {
  ## map gene identifiers of the gene sets to the features in the chip
  ## Biobase::annotation() is necessary to disambiguate from the
  ## 'annotation' argument
  gSets <- mapIdentifiers(gSets, AnnoOrEntrezIdentifier(Biobase::annotation(uniqGenes)))
  
  uniqGenes <- featureNames(uniqGenes)

  gSetsMembershipMatrix <- incidence(gSets)
  gSetsMembershipMatrix <- t(gSetsMembershipMatrix[, colnames(gSetsMembershipMatrix) %in% uniqGenes])

  .computeGeneSetsOverlap(gSetsMembershipMatrix, min.sz, max.sz)
})

.computeGeneSetsOverlap <- function(gSetsMembershipMatrix, min.sz=1, max.sz=Inf) {
  ## gSetsMembershipMatrix should be a (genes x gene-sets) incidence matrix

  lenGsets <- colSums(gSetsMembershipMatrix)

  szFilterMask <- lenGsets >= max(1, min.sz) & lenGsets <= max.sz
  if (!any(szFilterMask))
    stop("No gene set meets the minimum and maximum size filter\n")

  gSetsMembershipMatrix <- gSetsMembershipMatrix[, szFilterMask]
  lenGsets <- lenGsets[szFilterMask]

  totalGsets <- ncol(gSetsMembershipMatrix)

  M <- t(gSetsMembershipMatrix) %*% gSetsMembershipMatrix

  M1 <- matrix(lenGsets, nrow=totalGsets, ncol=totalGsets,
               dimnames=list(colnames(gSetsMembershipMatrix), colnames(gSetsMembershipMatrix)))
  M2 <- t(M1)
  M.min <- matrix(0, nrow=totalGsets, ncol=totalGsets)
  M.min[M1 < M2] <- M1[M1 < M2]
  M.min[M2 <= M1] <- M2[M2 <= M1]
  overlapMatrix <- M / M.min

  return (overlapMatrix)
}

## from https://stat.ethz.ch/pipermail/r-help/2005-September/078974.html
## function: isPackageLoaded
## purpose: to check whether the package specified by the name given in
##          the input argument is loaded. this function is borrowed from
##          the discussion on the R-help list found in this url:
##          https://stat.ethz.ch/pipermail/r-help/2005-September/078974.html
## parameters: name - package name
## return: TRUE if the package is loaded, FALSE otherwise

.isPackageLoaded <- function(name) {
  ## Purpose: is package 'name' loaded?
  ## --------------------------------------------------
  (paste("package:", name, sep="") %in% search()) ||
  (name %in% loadedNamespaces())
}

##
## ARE THESE FUNCTIONS STILL NECESSARY ?????
##

##a <- replicate(1000, compute.null.enrichment(10000,50,make.plot=F))

compute.null.enrichment <- function(n.genes, n.geneset, make.plot=FALSE){
	ranks <- (n.genes/2) - rev(1:n.genes)
	#null.gset.idxs <- seq(1, n.genes, by=round(n.genes / n.geneset))
	null.gset.idxs <- sample(n.genes, n.geneset)
	null.es <- ks_test_Rcode(ranks, null.gset.idxs,make.plot=make.plot)
	return (null.es)
}


load.gmt.data <- function(gmt.file.path){
	tmp <- readLines(gmt.file.path)
	gsets <- list()
	for(i in 1:length(tmp)){
		t <- strsplit(tmp[i],'\t')[[1]]
		gsets[[t[1]]] <- t[3:length(t)]
	}
	return (gsets)
}

compute.gset.overlap.score <- function(gset.idxs){
	n <- length(gset.idxs)
	mx.idx <- max(unlist(gset.idxs, use.names=F))
	l <- c(sapply(gset.idxs, length))
	
	gset.M <- matrix(0, nrow=mx.idx, ncol=n)
	for(i in 1:n){
		gset.M[gset.idxs[[i]],i] = 1
	}
	M <- t(gset.M) %*% gset.M
	
	M1 <- matrix(l, nrow=n, ncol=n)
	M2 <- t(M1)
	M.min <- matrix(0, nrow=n, ncol=n)
	M.min[M1 < M2] <- M1[M1 < M2]
	M.min[M2 <= M1] <- M2[M2 <= M1]
	M.score <- M / M.min
	return (M.score)
}