/*********************************************************************
 * Clustal Omega - Multiple sequence alignment
 *
 * Copyright (C) 2010 University College Dublin
 *
 * Clustal-Omega is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This file is part of Clustal-Omega.
 *
 ********************************************************************/

/*
 *  RCS $Id: seq.h 283 2013-06-10 17:42:14Z fabian $
 */

#ifndef CLUSTALO_SEQ_H
#define CLUSTALO_SEQ_H

#include "../squid/squid.h"

#include "util.h"


/**
 * int-encoded sequence types.
 * these are in sync with squid's seqtypes and only used for
 * convenience here
 */
#define SEQTYPE_UNKNOWN kOtherSeq
#define SEQTYPE_DNA kDNA
#define SEQTYPE_RNA kRNA
#define SEQTYPE_PROTEIN kAmino

/* Alphabets are defined in squid.h: AMINO_ALPHABET, DNA_ALPHABET,
 * RNA_ALPHABET (all uppercase)
 */
#define AMINOACID_ANY 'X'
#define NUCLEOTIDE_ANY 'N'

/**
 * @brief structure for storing multiple sequences
 *
 */
typedef struct {
    int nseqs; /**< number of sequences */
    int seqtype; /**< sequence type */
    char *filename; /**< input file / source of sequences */
    bool aligned; /**< true if all seqs are same length **/
    
    /** (working) sequence residues as char pointer.
     * range for first index: 0--nseq-1.
     * changes during alignment.
     */
    char **seq;
    
    /** original sequence residues as char pointer.
     * range for first index: 0--nseq-1.
     * only set during input
     */
    char **orig_seq;
    
  /** order in which sequences appear in guide-tree
   */
  int *tree_order;

    /**
     * @brief Squid's sequence info structure.
     * Index range: 0--nseq-1.
     *
     * extra data are available:
     * int flags;
     *
     * name:
     * char name[SQINFO_NAMELEN];
     *
     * database identifier:
     * char id[SQINFO_NAMELEN];
     *
     * database accession no:
     * char acc[SQINFO_NAMELEN];
     *
     * description:      
     * char desc[SQINFO_DESCLEN];
     *
     * length of this seq, incl gaps in our case!:
     * int len;
     *
     * start position on source seq (valid range: 1..len):
     * int start;
     *
     * end position on source seq (valid range: 1..len):
     * int stop;
     *
     * original length of source seq:
     * int olen;
     *
     * kRNA, kDNA, kAmino, or kOther:
     * int type;
     *
     * secondary structure string (index range: 0..len-1):
     * char *ss;
     *
     * percent side chain surface access (index range: 0..len-1):
     * char *sa;                  
     * 
     * @see squid.h
     * @see LogSqInfo()
     *
     */
    SQINFO *sqinfo; 
} mseq_t;

extern void
AliStat(mseq_t *prMSeq, bool bSampling, bool bReportAll);

extern void
AddSeq(mseq_t **prMSeqDest_p, char *pcSeqName, char *pcSeqRes);

extern void
SeqSwap(mseq_t *mseq, int i, int j);

extern void
DealignMSeq(mseq_t *mseq);

extern const char *
SeqTypeToStr(int seqtype);

extern int
ReadSequences(mseq_t *prMSeq_p, char *pcSeqFile, 
              int iSeqType,  int iSeqFmt, bool bIsProfile, 
              int iMaxNumSeq, int iMaxSeqLen);

extern int
ReadSequencesFromR(mseq_t *prMSeq_p, int seqLength, char **seq, char **seqNames,
              int iSeqType,  int iSeqFmt, bool bIsProfile,
              int iMaxNumSeq, int iMaxSeqLen);

extern void
NewMSeq(mseq_t **mseq);

extern void
FreeMSeq(mseq_t **mseq);
extern void
FreeRSeq(mseq_t **mseq, bool bRSequence);

extern void
CopyMSeq(mseq_t **prMSeqDest_p, mseq_t *prMSeqSrc);

extern void
LogSqInfo(SQINFO *sqinfo);

extern int
FindSeqName(char *seqname, mseq_t *mseq);

extern int
WriteAlignment(mseq_t *mseq, const char *aln_outfile, int msafile_format, int iWrap, bool bResno, int *result_c, char ***result_v);

extern void
DealignSeq(char *seq);

extern void
ShuffleMSeq(mseq_t *prMSeq);

extern void
SortMSeqByLength(mseq_t *prMSeq, const char cOrder);

void
JoinMSeqs(mseq_t **prMSeqDest_p, mseq_t *prMSeqToAdd);

bool
SeqsAreAligned(mseq_t *prMSeq, bool bIsProfile);

#endif