#include "FusionCDFData.h"
#include <iostream>
#include "R_affx_constants.h"

using namespace std;
using namespace affymetrix_fusion_io;

#include <R.h>
#include <Rdefines.h>  
#include <wchar.h>
#include <wctype.h>

extern "C" {
  /************************************************************************
   *
   * R_affx_cdf_nbrOfCellsPerUnitGroup()
   *
   ************************************************************************/
  SEXP R_affx_cdf_nbrOfCellsPerUnitGroup(SEXP fname, SEXP units, SEXP verbose) 
  {
    FusionCDFData cdf;
    FusionCDFFileHeader header;
    string str;
    int str_length; 
    char* cstr; 
    
    SEXP names = R_NilValue, 
         probe_sets = R_NilValue,
         r_groups = R_NilValue, 
         r_group_names = R_NilValue;
    
    bool readAll = true; 
    int nsets = 0, nunits = 0;
    int iset = 0;
    const char* cdfFileName = CHAR(STRING_ELT(fname, 0));
    int i_verboseFlag = INTEGER(verbose)[0];

    FusionCDFProbeSetInformation probeset;

    /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     * Opens file
     * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
    cdf.SetFileName(cdfFileName);
    if (i_verboseFlag >= R_AFFX_VERBOSE) {
      Rprintf("Attempting to read CDF File: %s\n", cdf.GetFileName().c_str());
    }

    if (cdf.Read() == false) {
      error("Failed to read the CDF file.");
    }


    /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     * Read header and get unit indices to be read
     * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
    header = cdf.GetHeader();
    nsets = header.GetNumProbeSets();

    nunits = length(units);
    if (nunits == 0) {
      nunits  = nsets;
    } else {
      readAll = false;
      /* Validate argument 'units': */
      for (int ii = 0; ii < nunits; ii++) {
        iset = INTEGER(units)[ii];
        if (iset < 1 || iset > nsets) {
          error("Argument 'units' contains an element out of range.");
        }
      }
    }

    /* Allocate R character vector and R list for the names and units */
    PROTECT(names = NEW_CHARACTER(nunits));
    PROTECT(probe_sets = NEW_LIST(nunits)); 

    /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     * Access information for the requested units one by one
     * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
    for (int ii = 0; ii < nunits; ii++) {
      if (readAll) {
        iset = ii;
      } else {
        /* Unit indices are zero-based in Fusion SDK. */
        iset = INTEGER(units)[ii] - 1;
      }

      /* Retrieve the current unit */
      cdf.GetProbeSetInformation(iset, probeset);
      
      /* Record its name */
      str = cdf.GetProbeSetName(iset);
      str_length = str.size();
      cstr = Calloc(str_length+1, char);
      strncpy(cstr, str.c_str(), str_length);
      cstr[str_length] = '\0';
      SET_STRING_ELT(names, ii, mkChar(cstr));
      Free(cstr);
      
      /* Get the number of groups in the unit */
      int ngroups = probeset.GetNumGroups();
      
      /* Allocate an R list and a vector of names for the groups */
      PROTECT(r_groups = NEW_INTEGER(ngroups));
      PROTECT(r_group_names = NEW_CHARACTER(ngroups));

      /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
       * For each group, query the number of cells (probes)
       * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
      for (int igroup = 0; igroup < ngroups; igroup++) {
        FusionCDFProbeGroupInformation group;

        /* Get the current group */
        probeset.GetGroupInformation(igroup, group);

        /* Get the name of the group */
        str = group.GetName();
        str_length = str.size();
        cstr = Calloc(str_length+1, char);
        strncpy(cstr, str.c_str(), str_length);
        cstr[str_length] = '\0';
        SET_STRING_ELT(r_group_names, igroup, mkChar(cstr));
        Free(cstr);

        /* Get the number of cells (probes) in the group */
        int ncells = group.GetNumCells();
        INTEGER(r_groups)[igroup] = ncells;
      }
      
      /** set the group names. **/
      setAttrib(r_groups, R_NamesSymbol, r_group_names);

      /** now set the probe_set in the main probe_set list. **/
      SET_VECTOR_ELT(probe_sets, ii, r_groups);

      /** pop the group list and group names of the stack. **/
      UNPROTECT(2); /* 'r_group_names' and then 'r_groups' */
    }
    
    /** set the names down here at the end. **/
    setAttrib(probe_sets, R_NamesSymbol, names);

    /** unprotect the names and the main probe set list.**/
    UNPROTECT(2);  /* 'names' and then 'probe_sets' */
   
    return probe_sets;
  }



  /************************************************************************
   *
   * R_affx_cdf_groupNames()
   *
   ************************************************************************/
  SEXP R_affx_cdf_groupNames(SEXP fname, SEXP units, SEXP truncateGroupNames, SEXP verbose) 
  {
    FusionCDFData cdf;
    FusionCDFFileHeader header;
    string str;
    int str_length; 
    char* cstr; 
    
    SEXP names = R_NilValue, 
         probe_sets = R_NilValue,
         r_group_names = R_NilValue;
    
    bool readAll = true; 
    int nsets = 0, nunits = 0;
    int iset = 0;
    const char* cdfFileName = CHAR(STRING_ELT(fname, 0));
    int i_truncateGroupNames = INTEGER(truncateGroupNames)[0];
    int i_verboseFlag = INTEGER(verbose)[0];

    /** pointer to the name of the probeset. **/
    char* name;
    char bfr[512];

    FusionCDFProbeSetInformation probeset;

    /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     * Opens file
     * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
    cdf.SetFileName(cdfFileName);
    if (i_verboseFlag >= R_AFFX_VERBOSE) {
      Rprintf("Attempting to read CDF File: %s\n", cdf.GetFileName().c_str());
    }

    if (cdf.Read() == false) {
      error("Failed to read the CDF file.");
    }


    /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     * Read header and get unit indices to be read
     * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
    header = cdf.GetHeader();
    nsets = header.GetNumProbeSets();

    nunits = length(units);
    if (nunits == 0) {
      nunits  = nsets;
    } else {
      readAll = false;
      /* Validate argument 'units': */
      for (int ii = 0; ii < nunits; ii++) {
        iset = INTEGER(units)[ii];
        if (iset < 1 || iset > nsets) {
          error("Argument 'units' contains an element out of range.");
        }
      }
    }

    /* Allocate R character vector and R list for the names and units */
    PROTECT(probe_sets = NEW_LIST(nunits)); 
    PROTECT(names = NEW_CHARACTER(nunits));

    /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     * Access information for the requested units one by one
     * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
    for (int ii = 0; ii < nunits; ii++) {
      if (readAll) {
        iset = ii;
      } else {
        /* Unit indices are zero-based in Fusion SDK. */
        iset = INTEGER(units)[ii] - 1;
      }

      /* Retrieve the current unit */
      cdf.GetProbeSetInformation(iset, probeset);
      
      /* Record its name */
      str = cdf.GetProbeSetName(iset);
      str_length = str.size();
      name = Calloc(str_length+1, char);
      strncpy(name, str.c_str(), str_length);
      name[str_length] = '\0';
      SET_STRING_ELT(names, ii, mkChar(name));
      
      /* Get the number of groups in the unit */
      int ngroups = probeset.GetNumGroups();
      
      /* Allocate a vector of names for the groups */
      PROTECT(r_group_names = NEW_CHARACTER(ngroups));

      /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
       * For each group, query the number of cells (probes)
       * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
      for (int igroup = 0; igroup < ngroups; igroup++) {
        FusionCDFProbeGroupInformation group;

        /* Get the current group */
        probeset.GetGroupInformation(igroup, group);

        /* Get the name of the group */
        str = group.GetName();
        str_length = str.size();
        cstr = Calloc(str_length+1, char);
        strncpy(cstr, str.c_str(), str_length);
        cstr[str_length] = '\0';
 
        /* If group name starts with the unit name, and i_truncateGroupNames
           is TRUE, strip it off. */
        int len = strlen(name);
        int res = strncmp(cstr, name, len);
        if (res == 0 && i_truncateGroupNames) {
          int last = strlen(cstr)-len;
          for (int kk = 0; kk < last; kk++)
            bfr[kk] = cstr[len+kk];
          bfr[last] = '\0';
          SET_STRING_ELT(r_group_names, igroup, mkChar(bfr));
        } else {
          SET_STRING_ELT(r_group_names, igroup, mkChar(cstr));
        }
        Free(cstr);
      }
      
      /** now set the probe_set in the main probe_set list. **/
      SET_VECTOR_ELT(probe_sets, ii, r_group_names);

      /** pop the group list and group names of the stack. **/
      UNPROTECT(1);  /* 'r_group_names' */
      Free(name);
    } /* for (int ii=0 ...) */
    
    /** set the names down here at the end. **/
    setAttrib(probe_sets, R_NamesSymbol, names);

    /** unprotect the names and the main probe set list.**/
    UNPROTECT(2);  /* 'names' and then 'probe_sets' */
   
    return probe_sets;
  } /* R_affx_cdf_groupNames() */



  /************************************************************************
   *
   * R_affx_pt_base_is_pm
   *
   * Return 1 if p_base/t_base pair is PM (complementary), 0 otherwise.
   *
   ************************************************************************/
  int R_affx_pt_base_is_pm(char p_base, char t_base)
  {
    int is_pm = 0; /* 0 is false, 1 is true */
    if (p_base == t_base) {
      is_pm = 0;
    } else {
      if ((p_base == 'a' || p_base == 'A') &&
          (t_base == 't' || t_base == 'T'))   {
        is_pm = 1;
      } else if ((p_base == 't' || p_base == 'T') &&
                 (t_base == 'a' || t_base == 'A'))   {
        is_pm = 1;
      } else if ((p_base == 'c' || p_base == 'C') &&
                 (t_base == 'g' || t_base == 'G'))   {
        is_pm = 1;
      } else if ((p_base == 'g' || p_base == 'G') &&
                 (t_base == 'c' || t_base == 'C'))   {
        is_pm = 1;
      } else {
        is_pm = 0;
      }
    }
    return is_pm;
  } /* R_affx_pt_base_is_pm() */


  /************************************************************************
   *
   * R_affx_cdf_isPm()
   *
   ************************************************************************/
  SEXP R_affx_cdf_isPm(SEXP fname, SEXP units, SEXP verbose) 
  {
    FusionCDFData cdf;
    FusionCDFFileHeader header;
    string str;
    int str_length; 
    char* cstr; 
    
    SEXP names = R_NilValue, 
         probe_sets = R_NilValue,
         r_groups = R_NilValue, 
         r_group_names = R_NilValue,
         isPm = R_NilValue;
    
    bool readAll = true; 
    int nsets = 0, nunits = 0;
    int iset = 0;
    const char* cdfFileName = CHAR(STRING_ELT(fname, 0));
    int i_verboseFlag = INTEGER(verbose)[0];

    /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     * Opens file
     * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
    cdf.SetFileName(cdfFileName);
    if (i_verboseFlag >= R_AFFX_VERBOSE) {
      Rprintf("Attempting to read CDF File: %s\n", cdf.GetFileName().c_str());
    }

    if (cdf.Read() == false) {
      error("Failed to read the CDF file.");
    }


    /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     * Read header and get unit indices to be read
     * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
    header = cdf.GetHeader();
    nsets = header.GetNumProbeSets();

    nunits = length(units);
    if (nunits == 0) {
      nunits  = nsets;
    } else {
      readAll = false;
      /* Validate argument 'units': */
      for (int ii = 0; ii < nunits; ii++) {
        iset = INTEGER(units)[ii];
        if (iset < 1 || iset > nsets) {
          error("Argument 'units' contains an element out of range.");
        }
      }
    }

    /* Allocate R character vector and R list for the names and units */
    PROTECT(probe_sets = NEW_LIST(nunits)); 
    PROTECT(names = NEW_CHARACTER(nunits));

    /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     * Access information for the requested units one by one
     * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
    for (int ii = 0; ii < nunits; ii++) {
      FusionCDFProbeSetInformation probeset;

      if (readAll) {
        iset = ii;
      } else {
        /* Unit indices are zero-based in Fusion SDK. */
        iset = INTEGER(units)[ii] - 1;
      }

      /* Retrieve the current unit */
      cdf.GetProbeSetInformation(iset, probeset);
      
      /* Record its name */
      str = cdf.GetProbeSetName(iset);
      str_length = str.size();
      cstr = Calloc(str_length+1, char);
      strncpy(cstr, str.c_str(), str_length);
      cstr[str_length] = '\0';
      SET_STRING_ELT(names, ii, mkChar(cstr));
      Free(cstr);
      
      /* Get the number of groups in the unit */
      int ngroups = probeset.GetNumGroups();
      
      /* Allocate an R list and a vector of names for the groups */
      PROTECT(r_groups = NEW_LIST(ngroups));
      PROTECT(r_group_names = NEW_CHARACTER(ngroups));

      /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
       * For each group, query the number of cells (probes)
       * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
      for (int igroup = 0; igroup < ngroups; igroup++) {
        FusionCDFProbeGroupInformation group;

        /* Get the current group */
        probeset.GetGroupInformation(igroup, group);

        /* Get the name of the group */
        str = group.GetName();
        str_length = str.size();
        cstr = Calloc(str_length+1, char);
        strncpy(cstr, str.c_str(), str_length);
        cstr[str_length] = '\0';
        SET_STRING_ELT(r_group_names, igroup, mkChar(cstr));
        Free(cstr);

        /* Get the number of cells (probes) in the group */
        int ncells = group.GetNumCells();
        PROTECT(isPm = NEW_LOGICAL(ncells));

        /* For each cell, record if it is an mis-match probe or not */
        for (int icell = 0; icell < ncells; icell++) {
          FusionCDFProbeInformation probe;
          group.GetCell(icell, probe);
          char p_base = probe.GetPBase();
          char t_base = probe.GetTBase();
          LOGICAL(isPm)[icell] = R_affx_pt_base_is_pm(p_base, t_base);
        } /* for (int icell ...) */

        SET_VECTOR_ELT(r_groups, igroup, isPm);

        UNPROTECT(1);  /* 'isPm' */
      }
      
      /** set the group names. **/
      setAttrib(r_groups, R_NamesSymbol, r_group_names);

      /** now set the probe_set in the main probe_set list. **/
      SET_VECTOR_ELT(probe_sets, ii, r_groups);

      /** pop the group list and group names of the stack. **/
      UNPROTECT(2); /* 'r_group_names' and then 'r_groups' */
    }
    
    /** set the names down here at the end. **/
    setAttrib(probe_sets, R_NamesSymbol, names);

    /** unprotect the names and the main probe set list.**/
    UNPROTECT(2);  /* 'names' and then 'probe_sets' */
   
    return probe_sets;
  } /* R_affx_cdf_isPm() */

} /** end extern C **/


/***************************************************************************
 * HISTORY:
 * 2007-03-05 
 * o Added argument 'truncateGroupNames' to R_affx_cdf_group_names().
 * 2006-11-27
 * o Added Seth Falcon's help function R_affx_pt_base_is_pm().
 * o Made R_affx_cdf_isMm() & R_affx_cdf_isPmOrMm() deprecated, because
 *   they've never been used. /HB
 * 2006-03-28
 * o Unit indices are now one-based. /HB
 * 2006-01-15
 * 2006-01-15
 * o It is now possible to specify what readCdfUnits() should return. /HB
 * 2006-01-12
 * o BUG FIX: The check of the upper-limit of unit indicies was done 
 *   assuming one-based indices. /HB
 * o Added R_affx_cdf_isMm(), R_affx_cdf_isPm() & R_affx_cdf_isPmOrMm(). /HB
 * 2006-01-11
 * o Added R_affx_cdf_nbrOfCellsPerUnitGroup() /HB
 * 2006-01-10
 * o Updated the "units" code to be more similar to the corresponding code
 *   for CEL files. /HB
 * o Added a return value to non-void function R_affx_get_cdf_file_qc(). /HB
 * 2006-01-09
 * o Added R_affx_get_cdf_units() and R_affx_get_cdf_unit.names(). /HB
 * o Created.  The purpose was to make it possible to read subsets of units
 *   and not just all units at once. /HB
 **************************************************************************/