/*****************************************************************
 * SQUID - a library of functions for biological sequence analysis
 * Copyright (C) 1992-2002 Washington University School of Medicine
 * 
 *     This source code is freely distributed under the terms of the
 *     GNU General Public License. See the files COPYRIGHT and LICENSE
 *     for details.
 *****************************************************************/

/* iupac.c
 * 
 * Globally defines the IUPAC symbols for nucleic acid sequence
 * Slowly evolving into a repository of globals. Tue Apr 20 1993
 *
 * RCS $Id: iupac.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: iupac.c,v 1.3 2001/02/21 21:09:10 eddy Exp)
 */
#include "squid.h"

/* Default expected nucleotide occurrence frequencies, A/C/G/T.
 * Used (for instance) as the default distribution for 
 * i.i.d. random nucleotide sequences.
 */
float dnafq[4] = { 0.25, 0.25, 0.25, 0.25 };

/* Dayhoff f(i) amino acid occurrence frequencies. 
 * From SwissProt 34: 21,210,388 residues
 * In alphabetic order by single-letter code.
 * Used (for instance) as the default distribution for
 * i.i.d. random protein sequences.
 */
float aafq[20] = {
  0.075520,			/* A */
  0.016973,			/* C */
  0.053029,			/* D */
  0.063204,			/* E */
  0.040762,			/* F */
  0.068448,			/* G */
  0.022406,			/* H */
  0.057284,			/* I */
  0.059398,			/* K */
  0.093399,			/* L */
  0.023569,			/* M */
  0.045293,			/* N */
  0.049262,			/* P */
  0.040231,			/* Q */
  0.051573,			/* R */
  0.072214,			/* S */
  0.057454,			/* T */
  0.065252,			/* V */
  0.012513,			/* W */
  0.031985			/* Y */
};

char aa_alphabet[] = AMINO_ALPHABET;
				/* aa_index converts to pam's 27x27 scheme */
int  aa_index[20]  = { 0,  2,  3,  4,  5,  6,  7,  8, 10, 11, 
	              12, 13, 15, 16, 17, 18, 19, 21, 22, 24 };

				/* IUPAC code translations */
				/* note: sequence chars are UPPER CASE */
struct iupactype iupac[] = {
  { 'A', 'T', NTA, NTT, },
  { 'C', 'G', NTC, NTG, },
  { 'G', 'C', NTG, NTC, },
  { 'T', 'A', NTT, NTA, },
  { 'U', 'A', NTU, NTA, },
  { 'N', 'N', NTN, NTN, },
  { ' ', ' ', NTGAP, NTGAP, },
  { 'R', 'Y', NTR, NTY, },
  { 'Y', 'R', NTY, NTR, },
  { 'M', 'K', NTM, NTK, },
  { 'K', 'M', NTK, NTM, },
  { 'S', 'S', NTS, NTS, },
  { 'W', 'W', NTW, NTW, },
  { 'H', 'D', NTH, NTD, },
  { 'B', 'V', NTB, NTV, },
  { 'V', 'B', NTV, NTB, },
  { 'D', 'H', NTD, NTH, },
  };


char *stdcode1[65] = {
  "K",				/* AAA */
  "N",				/* AAC */
  "K",				/* AAG */
  "N",				/* AAU */
  "T",				/* ACA */
  "T",				/* ACC */
  "T",				/* ACG */
  "T",				/* ACU */
  "R",				/* AGA */
  "S",				/* AGC */
  "R",				/* AGG */
  "S",				/* AGU */
  "I",				/* AUA */
  "I",				/* AUC */
  "M",				/* AUG */
  "I",				/* AUU */
  "Q",				/* CAA */
  "H",				/* CAC */
  "Q",				/* CAG */
  "H",				/* CAU */
  "P",				/* CCA */
  "P",				/* CCC */
  "P",				/* CCG */
  "P",				/* CCU */
  "R",				/* CGA */
  "R",				/* CGC */
  "R",				/* CGG */
  "R",				/* CGU */
  "L",				/* CUA */
  "L",				/* CUC */
  "L",				/* CUG */
  "L",				/* CUU */
  "E",				/* GAA */
  "D",				/* GAC */
  "E",				/* GAG */
  "D",				/* GAU */
  "A",				/* GCA */
  "A",				/* GCC */
  "A",				/* GCG */
  "A",				/* GCU */
  "G",				/* GGA */
  "G",				/* GGC */
  "G",				/* GGG */
  "G",				/* GGU */
  "V",				/* GUA */
  "V",				/* GUC */
  "V",				/* GUG */
  "V",				/* GUU */
  "*",				/* UAA */
  "Y",				/* UAC */
  "*",				/* UAG */
  "Y",				/* UAU */
  "S",				/* UCA */
  "S",				/* UCC */
  "S",				/* UCG */
  "S",				/* UCU */
  "*",				/* UGA */
  "C",				/* UGC */
  "W",				/* UGG */
  "C",				/* UGU */
  "L",				/* UUA */
  "F",				/* UUC */
  "L",				/* UUG */
  "F",				/* UUU */
  "X",				/* unknown */
};




char *stdcode3[65] = {
  "Lys",			/* AAA */
  "Asn",			/* AAC */
  "Lys",			/* AAG */
  "Asn",			/* AAU */
  "Thr",			/* ACA */
  "Thr",			/* ACC */
  "Thr",			/* ACG */
  "Thr",			/* ACU */
  "Arg",			/* AGA */
  "Ser",			/* AGC */
  "Arg",			/* AGG */
  "Ser",			/* AGU */
  "Ile",			/* AUA */
  "Ile",			/* AUC */
  "Met",			/* AUG */
  "Ile",			/* AUU */
  "Gln",			/* CAA */
  "His",			/* CAC */
  "Gln",			/* CAG */
  "His",			/* CAU */
  "Pro",			/* CCA */
  "Pro",			/* CCC */
  "Pro",			/* CCG */
  "Pro",			/* CCU */
  "Arg",			/* CGA */
  "Arg",			/* CGC */
  "Arg",			/* CGG */
  "Arg",			/* CGU */
  "Leu",			/* CUA */
  "Leu",			/* CUC */
  "Leu",			/* CUG */
  "Leu",			/* CUU */
  "Glu",			/* GAA */
  "Asp",			/* GAC */
  "Glu",			/* GAG */
  "Asp",			/* GAU */
  "Ala",			/* GCA */
  "Ala",			/* GCC */
  "Ala",			/* GCG */
  "Ala",			/* GCU */
  "Gly",			/* GGA */
  "Gly",			/* GGC */
  "Gly",			/* GGG */
  "Gly",			/* GGU */
  "Val",			/* GUA */
  "Val",			/* GUC */
  "Val",			/* GUG */
  "Val",			/* GUU */
  "***",			/* UAA */
  "Tyr",			/* UAC */
  "***",			/* UAG */
  "Tyr",			/* UAU */
  "Ser",			/* UCA */
  "Ser",			/* UCC */
  "Ser",			/* UCG */
  "Ser",			/* UCU */
  "***",			/* UGA */
  "Cys",			/* UGC */
  "Trp",			/* UGG */
  "Cys",			/* UGU */
  "Leu",			/* UUA */
  "Phe",			/* UUC */
  "Leu",			/* UUG */
  "Trp",			/* UUU */
  "XXX",			/* unknown */
};