#' Fit H0 model and evaluate fit statistics
#' 
#' @param df tidy data_frame retrieved after import of a 2D-TPP 
#' dataset, potential filtering and addition of a column "nObs"
#' containing the number of observations per protein
#' @param maxit maximal number of iterations the optimization
#' should be given, default is set to 500
#' @param optim_fun optimization function that should be used
#' for fitting the H0 model
#' @param gr_fun optional gradient function for optim_fun,
#' default is NULL
#' 
#' @return data frame with H0 model characteristics for each
#' protein
#' 
#' @export
#'
#' @examples
#' 
#' data("simulated_cell_extract_df")
#' temp_df <- simulated_cell_extract_df %>% 
#'   filter(clustername %in% paste0("protein", 1:5)) %>% 
#'   group_by(representative) %>% 
#'   mutate(nObs = n()) %>% 
#'   ungroup 
#'   
#' fitH0Model(temp_df)
#' 
#' @import dplyr
#' @importFrom stats optim
fitH0Model <- function(df, 
                       maxit = 500,
                       optim_fun = .min_RSS_h0,
                       gr_fun = NULL){
  
  representative <- clustername <- nObs <- 
    temperature <- . <- NULL
  
  h0_df <- df %>%
    group_by(representative, clustername, nObs) %>%
    mutate(temp_i = dense_rank(temperature)) %>%
    do({
      unique_temp <- unique(.$temperature)
      len_temp <- length(unique_temp)
      start_par = vapply(unique_temp, function(x)
        mean(filter(., temperature == x)$log2_value), 1)
      h0_model = try(optim(par = start_par,
                       fn = optim_fun,
                       len_temp = len_temp,
                       data = .,
                       method = "L-BFGS-B",
                       gr = gr_fun,
                       control = list(maxit = maxit)))
      .eval_optim_result(h0_model, hypothesis = "H0",
                        data = .)
    }) %>%
    group_by(representative, clustername) %>%
    ungroup()
  
  return(h0_df)
}

.fitEvalH1 <- function(df_fil, unique_temp, len_temp,
                      optim_fun = .min_RSS_h1_slopeEC50, 
                      optim_fun_2 = NULL,
                      gr_fun = NULL,
                      gr_fun_2 = NULL,
                      ec50_lower_limit = NULL,
                      ec50_upper_limit= NULL,
                      slopEC50 = TRUE, maxit = 500){
  .min_RSS_h1_slopeEC50 <- NULL
  if(is.null(ec50_lower_limit)){
    ec50_lower_limit <- min(unique(df_fil$log_conc)[
      which(is.finite(unique(df_fil$log_conc)))])
  }
  if(is.null(ec50_upper_limit)){
    ec50_upper_limit <- max(unique(df_fil$log_conc)[
      which(is.finite(unique(df_fil$log_conc)))])
  }
  start_par = .getStartParameters(
    df = df_fil, 
    unique_temp = unique_temp, 
    len_temp = len_temp, 
    slopEC50 = slopEC50)
  opt_limits <- .getOptLimits(
    ec50Limits = c(ec50_lower_limit, ec50_upper_limit),
    len_temp = len_temp, 
    slopEC50 = slopEC50)
  lower = opt_limits$lower 
  upper = opt_limits$upper
  h1_model = try(optim(par = start_par,
                       fn = optim_fun,
                       gr = gr_fun,
                       len_temp = len_temp,
                       data = df_fil,
                       method = "L-BFGS-B",
                       upper = upper,
                       lower = lower,
                       control = list(maxit = maxit)))
  if(!is.null(optim_fun_2) & 
     !is(h1_model, "try-error")){
    h1_model = try(optim(par = h1_model$par,
                         fn = optim_fun_2,
                         gr = gr_fun_2,
                         len_temp = len_temp,
                         data = df_fil,
                         method = "L-BFGS-B",
                         upper = upper,
                         lower = lower,
                         control = list(maxit = maxit)))
  }
  return(h1_model)
}

#' Fit H1 model and evaluate fit statistics
#' 
#' @param df tidy data_frame retrieved after import of a 2D-TPP 
#' dataset, potential filtering and addition of a column "nObs"
#' containing the number of observations per protein
#' @param maxit maximal number of iterations the optimization
#' should be given, default is set to 500
#' @param optim_fun optimization function that should be used
#' for fitting the H0 model
#' @param optim_fun_2 optional secound optimization function for
#' fitting the H1 model that should be used based on the fitted 
#' parameters of the optimizationfor based on optim_fun
#' @param gr_fun optional gradient function for optim_fun,
#' default is NULL
#' @param gr_fun_2 optional gradient function for optim_fun_2,
#' default is NULL
#' @param ec50_lower_limit lower limit of ec50 parameter
#' @param ec50_upper_limit lower limit of ec50 parameter
#' @param slopEC50 logical flag indicating whether the h1 model is
#' fitted with a linear model describing the shift od the pEC50 over 
#' temperatures
#' 
#' @return data frame with H1 model characteristics for each
#' protein
#' 
#' @export
#'
#' @examples
#' 
#' data("simulated_cell_extract_df")
#' temp_df <- simulated_cell_extract_df %>% 
#'   filter(clustername %in% paste0("protein", 1:5)) %>% 
#'   group_by(representative) %>% 
#'   mutate(nObs = n()) %>% 
#'   ungroup
#'   
#' fitH1Model(temp_df)
#' 
#' @import dplyr
#' @importFrom stats optim
#' @importFrom methods is
fitH1Model <- function(df, 
                       maxit = 500,
                       optim_fun = .min_RSS_h1_slope_pEC50,
                       optim_fun_2 = NULL,
                       gr_fun = NULL,
                       gr_fun_2 = NULL,
                       ec50_lower_limit = NULL,
                       ec50_upper_limit = NULL,
                       slopEC50 = TRUE){
  
  representative <- clustername <- nObs <- 
    temperature <- . <- NULL
  
  if(is.null(ec50_lower_limit)){
    ec50_lower_limit <- min(unique(df$log_conc)[
      which(is.finite(unique(df$log_conc)))])
  }
  if(is.null(ec50_upper_limit)){
    ec50_upper_limit <- max(unique(df$log_conc)[
      which(is.finite(unique(df$log_conc)))])
  }
  
  h1_df <- df %>%
    group_by(representative, clustername, nObs) %>%
    mutate(temp_i = dense_rank(temperature)) %>%
    do({
      unique_temp <- unique(.$temperature)
      len_temp <- length(unique_temp)
      h1_model <- .fitEvalH1(df_fil = .,
                            unique_temp = unique_temp, 
                            len_temp = len_temp,
                            optim_fun = optim_fun, 
                            optim_fun_2 = optim_fun_2,
                            gr_fun = gr_fun,
                            gr_fun_2 = gr_fun_2,
                            slopEC50 = slopEC50, 
                            maxit = maxit)
      .eval_optim_result(h1_model, hypothesis = "H1",
                        data = ., len_temp = len_temp,
                        slopEC50 = slopEC50)
    }) %>%
    group_by(representative, clustername) %>%
    ungroup()
  
  return(h1_df)
}

#' @importFrom methods is
#' @importFrom stats fft
.eval_optim_result <- function(optim_result, hypothesis = "H1",
                              data, len_temp = NULL,
                              slopEC50 = TRUE){
  # evaluate optimization results for H0 or H1 models 
  if(!is(optim_result, "try-error")){
    if(hypothesis == "H1"){
      pEC50 = -optim_result$par[1]
      if(!slopEC50){
        slope = optim_result$par[2]
      }else{
        pEC50_slope = optim_result$par[2]
        slope = optim_result$par[3]
      }
      rss = optim_result$value
      nCoeffs = length(optim_result$par)
      fitStats <- 
        data.frame(rss = rss, nCoeffs = nCoeffs,
                   pEC50 = pEC50, slope = slope)
      if(slopEC50){
        fitStats$pEC50_slope <- pEC50_slope
      }
      if(!is.null(len_temp)){
        if(!slopEC50){
            alpha <- optim_result$par[(4 + len_temp):(3 + len_temp*2)]
        }else{
            alpha <- optim_result$par[(5 + len_temp):(4 + len_temp*2)]
        }
        alpha_fft_df <- data.frame(freq = seq_len(length(alpha)),
                                   strength = Mod(fft(alpha)))
        alpha_fft_fit <- lm(strength ~ poly(freq, 2), 
                            data = alpha_fft_df[-1,])
        if(coef(alpha_fft_fit)[3] < 0){
            fitStats$detected_effect <- "carry-over"
        }else if(alpha[1] > (max(alpha[-1])/3)){
            fitStats$detected_effect <- "expression/solubility"
        }else{
            fitStats$detected_effect <- "stability"
        }
      }
      names(fitStats) <- paste0(names(fitStats), hypothesis)
      return(fitStats)
    }else{
      rss = optim_result$value
      nCoeffs = length(optim_result$par)
      fitStats <- data.frame(rss = rss,  nCoeffs = nCoeffs)
      names(fitStats) <- paste0(names(fitStats), hypothesis)
      return(fitStats)
    }
  }else{
    fitStats <- 
      data.frame(rss = NA,nCoeffs = NA,
                 pEC50 = NA, slope = NA)
    names(fitStats) <- paste0(names(fitStats), hypothesis)
    return(fitStats)
  }
}

#' Compute F statistic from H1 and H0 model characteristics
#'
#' @param h0_df data frame with H0 model characteristics for each
#' protein
#' @param h1_df data frame with H1 model characteristics for each
#' protein
#' 
#' @return data frame with H0 and H1 model characteristics for each
#' protein and respectively computed F statistics
#'
#'
#' @examples
#' data("simulated_cell_extract_df")
#' temp_df <- simulated_cell_extract_df %>% 
#'   filter(clustername %in% paste0("protein", 1:20)) %>% 
#'   group_by(representative) %>% 
#'   mutate(nObs = n()) %>% 
#'   ungroup 
#'   
#' h0_df <- fitH0Model(temp_df)
#' h1_df <- fitH1Model(temp_df)
#'   
#' computeFstat(h0_df, h1_df)
#' 
#' @export
#' 
#' @import dplyr
computeFstat <- function(h0_df, h1_df){
  
  nCoeffsH1 <- nCoeffsH0 <- nObs <- rssH0 <- 
    rssH1 <- df2 <- df1 <- NULL
  
  sum_df <- 
    left_join(h0_df, h1_df, by = c("representative",
                                   "clustername", "nObs")) %>%
    ungroup() %>%
    mutate(df1 = nCoeffsH1 - nCoeffsH0, df2 = nObs - nCoeffsH1) %>%
    mutate(F_statistic = ((rssH0 - rssH1) / rssH1) * (df2/df1))
  
  return(sum_df)
}

#' Fit H0 and H1 model to 2D thermal profiles of proteins
#' and compute F statistic
#'
#' @param df tidy data_frame retrieved after import of a 2D-TPP 
#' dataset, potential filtering and addition of a column "nObs"
#' containing the number of observations per protein
#' @param maxit maximal number of iterations the optimization
#' should be given, default is set to 500
#' @param optim_fun_h0 optimization function that should be used
#' for fitting the H0 model
#' @param optim_fun_h1 optimization function that should be used
#' for fitting the H1 model
#' @param optim_fun_h1_2 optional additional optimization function 
#' that will be run with paramters retrieved from optim_fun_h1 and 
#' should be used for fitting the H1 model with the trimmed sum
#' model, default is NULL
#' @param gr_fun_h0 optional gradient function for optim_fun_h0,
#' default is NULL
#' @param gr_fun_h1 optional gradient function for optim_fun_h1,
#' default is NULL
#' @param gr_fun_h1_2 optional gradient function for optim_fun_h1_2,
#' default is NULL
#' @param ec50_lower_limit lower limit of ec50 parameter
#' @param ec50_upper_limit lower limit of ec50 parameter
#' @param slopEC50 logical flag indicating whether the h1 model is
#' fitted with a linear model describing the shift od the pEC50 over 
#' temperatures
#' 
#' @return data frame with H0 and H1 model characteristics for each
#' protein and respectively computed F statistics
#' 
#' @examples 
#' data("simulated_cell_extract_df")
#' temp_df <- simulated_cell_extract_df %>% 
#'   group_by(representative) %>% 
#'   mutate(nObs = n()) %>% 
#'   ungroup 
#' fitAndEvalDataset(temp_df)  
#' 
#' @export
fitAndEvalDataset <- function(df, maxit = 500,
                              optim_fun_h0 = .min_RSS_h0,
                              optim_fun_h1 = .min_RSS_h1_slope_pEC50,
                              optim_fun_h1_2 = NULL,
                              gr_fun_h0 = NULL,
                              gr_fun_h1 = NULL,
                              gr_fun_h1_2 = NULL,
                              ec50_lower_limit = NULL,
                              ec50_upper_limit = NULL,
                              slopEC50 = TRUE){
  
  h0_df <- fitH0Model(df = df,
                      maxit = maxit,
                      optim_fun = optim_fun_h0,
                      gr_fun = gr_fun_h0)
  
  h1_df <- fitH1Model(df = df,
                      maxit = maxit,
                      optim_fun = optim_fun_h1,
                      optim_fun_2 = optim_fun_h1_2,
                      gr_fun = gr_fun_h1,
                      gr_fun_2 = gr_fun_h1_2,
                      ec50_lower_limit = ec50_lower_limit,
                      ec50_upper_limit = ec50_upper_limit,
                      slopEC50 = slopEC50)
  
  sum_df <- computeFstat(h0_df, h1_df)
  
  return(sum_df)
}


.minObsFilter <- function(df, minObs = 20){
  # Filter data frame for a minimal number of observations
  # per protein
  representative <- clustername <- rel_value <- 
    nObs <- NULL
  
  df_fil <- df %>%
    group_by(representative, clustername) %>%
    mutate(nObs = n()) %>%
    filter(nObs >= minObs) %>%
    ungroup()
  
  return(df_fil)
}

.independentFilter <- function(df, fcThres = 1.5){
  # Filter data frame independently based on maximal 
  # fold change per protein
  representative <- clustername <- rel_value <- NULL
  
  df_fil <- df %>%
    group_by(representative, clustername) %>%
    filter(any(rel_value > fcThres) | any(rel_value < 1/fcThres)) %>%
    ungroup
  
  return(df_fil)
}


.getEC50Limits <- function(df){
  log_conc <- NULL
  
  ec50_lower_limit <- min(unique(df$log_conc)[
    which(is.finite(unique(df$log_conc)))])
  ec50_upper_limit <- max(unique(df$log_conc)[
    which(is.finite(unique(df$log_conc)))])
  
  return(c(ec50_lower_limit, 
           ec50_upper_limit))
}

#' Compete H0 and H1 models per protein and obtain F statistic
#' 
#' @param df tidy data frame retrieved after import of a 2D-TPP 
#' dataset, potential filtering and addition of a column "nObs"
#' containing the number of observations per protein
#' @param fcThres numeric value of minimal fold change 
#' (or inverse fold change) a protein has to show to be kept 
#' upon independent filtering
#' @param independentFiltering boolean flag indicating whether
#' independent filtering should be performed based on minimal
#' fold changes per protein profile
#' @param minObs numeric value of minimal number of observations
#' that should be required per protein
#' @param maxit maximal number of iterations the optimization
#' should be given, default is set to 500
#' @param optim_fun_h0 optimization function that should be used
#' for fitting the H0 model
#' @param optim_fun_h1 optimization function that should be used
#' for fitting the H1 model
#' @param optim_fun_h1_2 optional additional optimization function 
#' that will be run with paramters retrieved from optim_fun_h1 and 
#' should be used for fitting the H1 model with the trimmed sum
#' model, default is NULL
#' @param gr_fun_h0 optional gradient function for optim_fun_h0,
#' default is NULL
#' @param gr_fun_h1 optional gradient function for optim_fun_h1,
#' default is NULL
#' @param gr_fun_h1_2 optional gradient function for optim_fun_h1_2,
#' default is NULL
#' 
#' @return data frame summarising the fit characteristics of H0 and
#' H1 models and therof resulting computed F statistics per protein
#' 
#' @examples 
#' data("simulated_cell_extract_df")
#' temp_df <- simulated_cell_extract_df %>% 
#'   filter(clustername %in% paste0("protein", 1:10)) %>% 
#'   group_by(representative) %>% 
#'   mutate(nObs = n()) %>% 
#'   ungroup 
#' competeModels(temp_df)  
#' 
#' @export
competeModels <- function(df, fcThres = 1.5,
                          independentFiltering = FALSE,
                          minObs = 20,
                          optim_fun_h0 = .min_RSS_h0,
                          optim_fun_h1 = .min_RSS_h1_slope_pEC50,
                          optim_fun_h1_2 = NULL,
                          gr_fun_h0 = NULL,
                          gr_fun_h1 = NULL,
                          gr_fun_h1_2 = NULL,
                          maxit = 750){
  
  .checkDfColumns(df)
  
  if(identical(optim_fun_h1, .min_RSS_h1_slope_pEC50)){
    slopEC50 = TRUE
  }else{
    slopEC50 = FALSE
  }
  
  ec50_limits <- .getEC50Limits(df)
  
  df_fil <- .minObsFilter(df, minObs = minObs)
  
  if(independentFiltering){
    message(paste("Independent Filtering: removing proteins without", 
            "any values crossing the set threshold."))
    df_fil <- .independentFilter(df_fil, fcThres = fcThres) 
  }

  sum_df <- fitAndEvalDataset(
    df_fil,
    maxit = maxit,
    optim_fun_h0 = optim_fun_h0,
    optim_fun_h1 = optim_fun_h1,
    optim_fun_h1_2 = optim_fun_h1_2,
    ec50_lower_limit = ec50_limits[1],
    ec50_upper_limit = ec50_limits[2],
    slopEC50 = slopEC50)
  
  return(sum_df)
}