src/ClustalOmega/src/squid/stockholm.c
dafeef0b
 /*****************************************************************
  * SQUID - a library of functions for biological sequence analysis
  * Copyright (C) 1992-2002 Washington University School of Medicine
  * 
  *     This source code is freely distributed under the terms of the
  *     GNU General Public License. See the files COPYRIGHT and LICENSE
  *     for details.
  *****************************************************************/
 
 /* stockholm.c
  * SRE, Fri May 28 15:46:41 1999
  * 
  * Reading/writing of Stockholm format multiple sequence alignments.
  * 
  * example of API:
  * 
  * MSA     *msa;
  * FILE    *fp;        -- opened for write with fopen()
  * MSAFILE *afp;       -- opened for read with MSAFileOpen()
  *      
  * while ((msa = ReadStockholm(afp)) != NULL)
  *   {
  *      WriteStockholm(fp, msa);
  *      MSAFree(msa);
  *   }
  * 
  * RCS $Id: stockholm.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: stockholm.c,v 1.7 2002/10/12 04:40:36 eddy Exp)
  */
 #include <stdio.h>
 #include <string.h>
 #include "squid.h"
 #include "msa.h"
 
 static int  parse_gf(MSA *msa, char *buf);
 static int  parse_gs(MSA *msa, char *buf);
 static int  parse_gc(MSA *msa, char *buf);
 static int  parse_gr(MSA *msa, char *buf);
 static int  parse_comment(MSA *msa, char *buf);
 static int  parse_sequence(MSA *msa, char *buf);
 static void actually_write_stockholm(FILE *fp, MSA *msa, int cpl);
 
 #ifdef TESTDRIVE_STOCKHOLM
 /*****************************************************************
  * stockholm.c test driver: 
  * cc -DTESTDRIVE_STOCKHOLM -g -O2 -Wall -o test stockholm.c msa.c gki.c sqerror.c sre_string.c file.c hsregex.c sre_math.c sre_ctype.c -lm 
  * 
  */
 int
 main(int argc, char **argv)
 {
   MSAFILE *afp;
   MSA     *msa;
   char    *file;
   
   file = argv[1];
 
   if ((afp = MSAFileOpen(file, MSAFILE_STOCKHOLM, NULL)) == NULL)
     Die("Couldn't open %s\n", file);
 
   while ((msa = ReadStockholm(afp)) != NULL)
     {
       WriteStockholm(stdout, msa);
       MSAFree(msa); 
     }
   
   MSAFileClose(afp);
   throw(ClustalOmegaException, "0");
 }
 /******************************************************************/
 #endif /* testdriver */
 
 
 /* Function: ReadStockholm()
  * Date:     SRE, Fri May 21 17:33:10 1999 [St. Louis]
  *
  * Purpose:  Parse the next alignment from an open Stockholm
  *           format alignment file. Return the alignment, or
  *           NULL if there are no more alignments in the file.
  *
  * Args:     afp  - open alignment file
  *
  * Returns:  MSA *   - an alignment object. 
  *                     caller responsible for an MSAFree() 
  *           NULL if no more alignments
  *
  * Diagnostics:
  *           Will Die() here with a (potentially) useful message
  *           if a parsing error occurs 
  */
 MSA *
 ReadStockholm(MSAFILE *afp)
 {
   MSA   *msa;
   char  *s;
   int    status;
 
   if (feof(afp->f)) return NULL;
 
   /* Initialize allocation of the MSA.
    */
   msa = MSAAlloc(10, 0);
 
   /* Check the magic Stockholm header line.
    * We have to skip blank lines here, else we perceive
    * trailing blank lines in a file as a format error when
    * reading in multi-record mode.
    */
   do {
     if ((s = MSAFileGetLine(afp)) == NULL) {
       MSAFree(msa);
       return NULL;
     }
   } while (IsBlankline(s));
 
   if (strncmp(s, "# STOCKHOLM 1.", 14) != 0)
     Die("\
 File %s doesn't appear to be in Stockholm format.\n\
 Assuming there isn't some other problem with your file (it is an\n\
 alignment file, right?), please either:\n\
   a) use the Babelfish format autotranslator option (-B, usually);\n\
   b) specify the file's format with the --informat option; or\n\
   a) reformat the alignment to Stockholm format.\n", 
 	afp->fname);
 
   /* Read the alignment file one line at a time.
    */
   while ((s = MSAFileGetLine(afp)) != NULL) 
     {
       while (*s == ' ' || *s == '\t') s++;  /* skip leading whitespace */
 
       if (*s == '#') {
 	if      (strncmp(s, "#=GF", 4) == 0)   status = parse_gf(msa, s);
 	else if (strncmp(s, "#=GS", 4) == 0)   status = parse_gs(msa, s);
 	else if (strncmp(s, "#=GC", 4) == 0)   status = parse_gc(msa, s);
 	else if (strncmp(s, "#=GR", 4) == 0)   status = parse_gr(msa, s);
 	else                                   status = parse_comment(msa, s);
       } 
       else if (strncmp(s, "//",   2) == 0)   break;
       else if (*s == '\n')                   continue;
       else                                   status = parse_sequence(msa, s);
 
       if (status == 0)  
 	Die("Stockholm format parse error: line %d of file %s while reading alignment %s",
 	    afp->linenumber, afp->fname, msa->name == NULL? "" : msa->name);
     }
 
   if (s == NULL && msa->nseq != 0)
     Die ("Didn't find // at end of alignment %s", msa->name == NULL ? "" : msa->name);
 
   if (s == NULL && msa->nseq == 0) {
     				/* probably just some junk at end of file */
       MSAFree(msa); 
       return NULL; 
     }
   
   MSAVerifyParse(msa);
   return msa;
 }
 
 
 /* Function: WriteStockholm()
  * Date:     SRE, Mon May 31 19:15:22 1999 [St. Louis]
  *
  * Purpose:  Write an alignment in standard multi-block 
  *           Stockholm format to an open file. A wrapper
  *           for actually_write_stockholm().
  *
  * Args:     fp  - file that's open for writing
  *           msa - alignment to write    
  *
  * Returns:  (void)
  */
 void
 WriteStockholm(FILE *fp, MSA *msa)
 {
   actually_write_stockholm(fp, msa, 50); /* 50 char per block */
 }
 
 /* Function: WriteStockholmOneBlock()
  * Date:     SRE, Mon May 31 19:15:22 1999 [St. Louis]
  *
  * Purpose:  Write an alignment in Pfam's single-block
  *           Stockholm format to an open file. A wrapper
  *           for actually_write_stockholm().
  *
  * Args:     fp  - file that's open for writing
  *           msa - alignment to write    
  *
  * Returns:  (void)
  */
 void
 WriteStockholmOneBlock(FILE *fp, MSA *msa)
 {
   actually_write_stockholm(fp, msa, msa->alen); /* one big block */
 }
 
 
 /* Function: actually_write_stockholm()
  * Date:     SRE, Fri May 21 17:39:22 1999 [St. Louis]
  *
  * Purpose:  Write an alignment in Stockholm format to 
  *           an open file. This is the function that actually
  *           does the work. The API's WriteStockholm()
  *           and WriteStockholmOneBlock() are wrappers.
  *
  * Args:     fp    - file that's open for writing
  *           msa   - alignment to write        
  *           cpl   - characters to write per line in alignment block
  *
  * Returns:  (void)
  */
 static void
 actually_write_stockholm(FILE *fp, MSA *msa, int cpl)
 {
   int  i, j;
   int  len = 0;
   int  namewidth;
   int  typewidth = 0;		/* markup tags are up to 5 chars long */
   int  markupwidth = 0;		/* #=GR, #=GC are four char wide + 1 space */
   char *buf;
   int  currpos;
   char *s, *tok;
   
   /* Figure out how much space we need for name + markup
    * to keep the alignment in register. Required by Stockholm
    * spec, even though our Stockholm parser doesn't care (Erik's does).
    */
   namewidth = 0;
   for (i = 0; i < msa->nseq; i++)
     if ((len = strlen(msa->sqname[i])) > namewidth) 
       namewidth = len;
 
   /* Figure out how much space we need for markup tags
    *   markupwidth = always 4 if we're doing markup:  strlen("#=GR")
    *   typewidth   = longest markup tag
    */
   if (msa->ss      != NULL) { markupwidth = 4; typewidth = 2; }
   if (msa->sa      != NULL) { markupwidth = 4; typewidth = 2; }
   for (i = 0; i < msa->ngr; i++)
     if ((len = strlen(msa->gr_tag[i])) > typewidth) typewidth = len;
 
   if (msa->rf      != NULL) { markupwidth = 4; if (typewidth < 2) typewidth = 2; }
   if (msa->ss_cons != NULL) { markupwidth = 4; if (typewidth < 7) typewidth = 7; }
   if (msa->sa_cons != NULL) { markupwidth = 4; if (typewidth < 7) typewidth = 7; }
   for (i = 0; i < msa->ngc; i++)
     if ((len = strlen(msa->gc_tag[i])) > typewidth) typewidth = len;
   
   buf = MallocOrDie(sizeof(char) * (cpl+namewidth+typewidth+markupwidth+61)); 
 
   /* Magic Stockholm header
    */
   fprintf(fp, "# STOCKHOLM 1.0\n");
 
   /* Free text comments
    */
   for (i = 0;  i < msa->ncomment; i++)
     fprintf(fp, "# %s\n", msa->comment[i]);
   if (msa->ncomment > 0) fprintf(fp, "\n");
 
   /* GF section: per-file annotation
    */
   if (msa->name  != NULL)       fprintf(fp, "#=GF ID    %s\n", msa->name);
   if (msa->acc   != NULL)       fprintf(fp, "#=GF AC    %s\n", msa->acc);
   if (msa->desc  != NULL)       fprintf(fp, "#=GF DE    %s\n", msa->desc);
   if (msa->au    != NULL)       fprintf(fp, "#=GF AU    %s\n", msa->au);
   
   /* Thresholds are hacky. Pfam has two. Rfam has one.
    */
   if      (msa->cutoff_is_set[MSA_CUTOFF_GA1] && msa->cutoff_is_set[MSA_CUTOFF_GA2])
     fprintf(fp, "#=GF GA    %.1f %.1f\n", msa->cutoff[MSA_CUTOFF_GA1], msa->cutoff[MSA_CUTOFF_GA2]);
   else if (msa->cutoff_is_set[MSA_CUTOFF_GA1])
     fprintf(fp, "#=GF GA    %.1f\n", msa->cutoff[MSA_CUTOFF_GA1]);
   if      (msa->cutoff_is_set[MSA_CUTOFF_NC1] && msa->cutoff_is_set[MSA_CUTOFF_NC2])
     fprintf(fp, "#=GF NC    %.1f %.1f\n", msa->cutoff[MSA_CUTOFF_NC1], msa->cutoff[MSA_CUTOFF_NC2]);
   else if (msa->cutoff_is_set[MSA_CUTOFF_NC1])
     fprintf(fp, "#=GF NC    %.1f\n", msa->cutoff[MSA_CUTOFF_NC1]);
   if      (msa->cutoff_is_set[MSA_CUTOFF_TC1] && msa->cutoff_is_set[MSA_CUTOFF_TC2])
     fprintf(fp, "#=GF TC    %.1f %.1f\n", msa->cutoff[MSA_CUTOFF_TC1], msa->cutoff[MSA_CUTOFF_TC2]);
   else if (msa->cutoff_is_set[MSA_CUTOFF_TC1])
     fprintf(fp, "#=GF TC    %.1f\n", msa->cutoff[MSA_CUTOFF_TC1]);
 
   for (i = 0; i < msa->ngf; i++)
     fprintf(fp, "#=GF %-5s %s\n", msa->gf_tag[i], msa->gf[i]); 
   fprintf(fp, "\n");
 
 
   /* GS section: per-sequence annotation
    */
   if (msa->flags & MSA_SET_WGT) 
     {
       for (i = 0; i < msa->nseq; i++) 
 	fprintf(fp, "#=GS %-*.*s WT    %.2f\n", namewidth, namewidth, msa->sqname[i], msa->wgt[i]);
       fprintf(fp, "\n");
     }
   if (msa->sqacc != NULL) 
     {
       for (i = 0; i < msa->nseq; i++) 
 	if (msa->sqacc[i] != NULL)
 	  fprintf(fp, "#=GS %-*.*s AC    %s\n", namewidth, namewidth, msa->sqname[i], msa->sqacc[i]);
       fprintf(fp, "\n");
     }
   if (msa->sqdesc != NULL) 
     {
       for (i = 0; i < msa->nseq; i++) 
 	if (msa->sqdesc[i] != NULL)
 	  fprintf(fp, "#=GS %*.*s DE    %s\n", namewidth, namewidth, msa->sqname[i], msa->sqdesc[i]);
       fprintf(fp, "\n");
     }
   for (i = 0; i < msa->ngs; i++)
     {
       /* Multiannotated GS tags are possible; for example, 
        *     #=GS foo DR PDB; 1xxx;
        *     #=GS foo DR PDB; 2yyy;
        * These are stored, for example, as:
        *     msa->gs[0][0] = "PDB; 1xxx;\nPDB; 2yyy;"
        * and must be decomposed.
        */
       for (j = 0; j < msa->nseq; j++)
 	if (msa->gs[i][j] != NULL)
 	  {
 	    s = msa->gs[i][j];
 	    while ((tok = sre_strtok(&s, "\n", NULL)) != NULL)
 	      fprintf(fp, "#=GS %*.*s %5s %s\n", namewidth, namewidth,
 		      msa->sqname[j], msa->gs_tag[i], tok);
 	  }
       fprintf(fp, "\n");
     }
 
   /* Alignment section:
    * contains aligned sequence, #=GR annotation, and #=GC annotation
    */
   for (currpos = 0; currpos < msa->alen; currpos += cpl)
     {
       if (currpos > 0) fprintf(fp, "\n");
       for (i = 0; i < msa->nseq; i++)
 	{
 	  strncpy(buf, msa->aseq[i] + currpos, cpl);
 	  buf[cpl] = '\0';	      
 	  fprintf(fp, "%-*.*s  %s\n", namewidth+typewidth+markupwidth, namewidth+typewidth+markupwidth, 
 		  msa->sqname[i], buf);
 
 	  if (msa->ss != NULL && msa->ss[i] != NULL) {
 	    strncpy(buf, msa->ss[i] + currpos, cpl);
 	    buf[cpl] = '\0';	 
 	    fprintf(fp, "#=GR %-*.*s SS     %s\n", namewidth, namewidth, msa->sqname[i], buf);
 	  }
 	  if (msa->sa != NULL && msa->sa[i] != NULL) {
 	    strncpy(buf, msa->sa[i] + currpos, cpl);
 	    buf[cpl] = '\0';
 	    fprintf(fp, "#=GR %-*.*s SA     %s\n", namewidth, namewidth, msa->sqname[i], buf);
 	  }
 	  for (j = 0; j < msa->ngr; j++)
 	    if (msa->gr[j][i] != NULL) {
 	      strncpy(buf, msa->gr[j][i] + currpos, cpl);
 	      buf[cpl] = '\0';
 	      fprintf(fp, "#=GR %-*.*s %5s  %s\n", 
 		      namewidth, namewidth, msa->sqname[i], msa->gr_tag[j], buf);
 	    }
 	}
       if (msa->ss_cons != NULL) {
 	strncpy(buf, msa->ss_cons + currpos, cpl);
 	buf[cpl] = '\0';
 	fprintf(fp, "#=GC %-*.*s %s\n", namewidth+typewidth, namewidth+typewidth, "SS_cons", buf);
       }
 
       if (msa->sa_cons != NULL) {
 	strncpy(buf, msa->sa_cons + currpos, cpl);
 	buf[cpl] = '\0';
 	fprintf(fp, "#=GC %-*.*s %s\n", namewidth+typewidth, namewidth+typewidth, "SA_cons", buf);
       }
 
       if (msa->rf != NULL) {
 	strncpy(buf, msa->rf + currpos, cpl);
 	buf[cpl] = '\0';
 	fprintf(fp, "#=GC %-*.*s %s\n", namewidth+typewidth, namewidth+typewidth, "RF", buf);
       }
       for (j = 0; j < msa->ngc; j++) {
 	strncpy(buf, msa->gc[j] + currpos, cpl);
 	buf[cpl] = '\0';
 	fprintf(fp, "#=GC %-*.*s %s\n", namewidth+typewidth, namewidth+typewidth, 
 		msa->gc_tag[j], buf);
       }
     }
   fprintf(fp, "//\n");
   free(buf);
 }
 
 
 
 
 
 /* Format of a GF line:
  *    #=GF <featurename> <text>
  */
 static int
 parse_gf(MSA *msa, char *buf)
 {
   char *gf;
   char *featurename;
   char *text;
   char *s;
 
   s = buf;
   if ((gf          = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
   if ((featurename = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
   if ((text        = sre_strtok(&s, "\n",       NULL)) == NULL) return 0;
   while (*text && (*text == ' ' || *text == '\t')) text++;
 
   if      (strcmp(featurename, "ID") == 0) 
     msa->name                 = sre_strdup(text, -1);
   else if (strcmp(featurename, "AC") == 0) 
     msa->acc                  = sre_strdup(text, -1);
   else if (strcmp(featurename, "DE") == 0) 
     msa->desc                 = sre_strdup(text, -1);
   else if (strcmp(featurename, "AU") == 0) 
     msa->au                   = sre_strdup(text, -1);
   else if (strcmp(featurename, "GA") == 0) 
     {				/* Pfam has GA1, GA2. Rfam just has GA1. */
       s = text;
       if ((text = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
       msa->cutoff[MSA_CUTOFF_GA1]        = atof(text);
       msa->cutoff_is_set[MSA_CUTOFF_GA1] = TRUE;
       if ((text = sre_strtok(&s, WHITESPACE, NULL)) != NULL) {
 	msa->cutoff[MSA_CUTOFF_GA2]        = atof(text);
 	msa->cutoff_is_set[MSA_CUTOFF_GA2] = TRUE;
       }
     }
   else if (strcmp(featurename, "NC") == 0) 
     {
       s = text;
       if ((text = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
       msa->cutoff[MSA_CUTOFF_NC1]        = atof(text);
       msa->cutoff_is_set[MSA_CUTOFF_NC1] = TRUE;
       if ((text = sre_strtok(&s, WHITESPACE, NULL)) != NULL) {
 	msa->cutoff[MSA_CUTOFF_NC2]        = atof(text);
 	msa->cutoff_is_set[MSA_CUTOFF_NC2] = TRUE;
       }
     }
   else if (strcmp(featurename, "TC") == 0) 
     {
       s = text;
       if ((text = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
       msa->cutoff[MSA_CUTOFF_TC1]        = atof(text);
       msa->cutoff_is_set[MSA_CUTOFF_TC1] = TRUE;
       if ((text = sre_strtok(&s, WHITESPACE, NULL)) != NULL) {
 	msa->cutoff[MSA_CUTOFF_TC2]        = atof(text);
 	msa->cutoff_is_set[MSA_CUTOFF_TC2] = TRUE;
       }
     }
   else 
     MSAAddGF(msa, featurename, text);
 
   return 1;
 }
 
 
 /* Format of a GS line:
  *    #=GS <seqname> <featurename> <text>
  */
 static int
 parse_gs(MSA *msa, char *buf)
 {
   char *gs;
   char *seqname;
   char *featurename;
   char *text; 
   int   seqidx;
   char *s;
 
   s = buf;
   if ((gs          = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
   if ((seqname     = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
   if ((featurename = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
   if ((text        = sre_strtok(&s, "\n",       NULL)) == NULL) return 0;
   while (*text && (*text == ' ' || *text == '\t')) text++;
   
   /* GS usually follows another GS; guess lastidx+1
    */
   seqidx = MSAGetSeqidx(msa, seqname, msa->lastidx+1);
   msa->lastidx = seqidx;
 
   if (strcmp(featurename, "WT") == 0)
     {
       msa->wgt[seqidx]          = atof(text);
       msa->flags |= MSA_SET_WGT;
     }
 
   else if (strcmp(featurename, "AC") == 0)
     MSASetSeqAccession(msa, seqidx, text);
 
   else if (strcmp(featurename, "DE") == 0)
     MSASetSeqDescription(msa, seqidx, text);
 
   else				
     MSAAddGS(msa, featurename, seqidx, text);
 
   return 1;
 }
 
 /* Format of a GC line:
  *    #=GC <featurename> <text>
  */
 static int 
 parse_gc(MSA *msa, char *buf)
 {
   char *gc;
   char *featurename;
   char *text; 
   char *s;
   int   len;
 
   s = buf;
   if ((gc          = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
   if ((featurename = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
   if ((text        = sre_strtok(&s, WHITESPACE, &len)) == NULL) return 0;
   
   if (strcmp(featurename, "SS_cons") == 0)
     sre_strcat(&(msa->ss_cons), -1, text, len);
   else if (strcmp(featurename, "SA_cons") == 0)
     sre_strcat(&(msa->sa_cons), -1, text, len);
   else if (strcmp(featurename, "RF") == 0)
     sre_strcat(&(msa->rf), -1, text, len);
   else
     MSAAppendGC(msa, featurename, text);
 
   return 1;
 }
 
 /* Format of a GR line:
  *    #=GR <seqname> <featurename> <text>
  */
 static int
 parse_gr(MSA *msa, char *buf)
 {
   char *gr;
   char *seqname;
   char *featurename;
   char *text;
   int   seqidx;
   int   len;
   int   j;
   char *s;
 
   s = buf;
   if ((gr          = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
   if ((seqname     = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
   if ((featurename = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
   if ((text        = sre_strtok(&s, WHITESPACE, &len)) == NULL) return 0;
 
   /* GR usually follows sequence it refers to; guess msa->lastidx */
   seqidx = MSAGetSeqidx(msa, seqname, msa->lastidx);
   msa->lastidx = seqidx;
 
   if (strcmp(featurename, "SS") == 0) 
     {
       if (msa->ss == NULL)
 	{
 	  msa->ss    = MallocOrDie(sizeof(char *) * msa->nseqalloc);
 	  msa->sslen = MallocOrDie(sizeof(int)    * msa->nseqalloc);
 	  for (j = 0; j < msa->nseqalloc; j++)
 	    {
 	      msa->ss[j]    = NULL;
 	      msa->sslen[j] = 0;
 	    }
 	}
       msa->sslen[seqidx] = sre_strcat(&(msa->ss[seqidx]), msa->sslen[seqidx], text, len);
     }
   else if (strcmp(featurename, "SA") == 0)
     {
       if (msa->sa == NULL)
 	{
 	  msa->sa    = MallocOrDie(sizeof(char *) * msa->nseqalloc);
 	  msa->salen = MallocOrDie(sizeof(int)    * msa->nseqalloc);
 	  for (j = 0; j < msa->nseqalloc; j++) 
 	    {
 	      msa->sa[j]    = NULL;
 	      msa->salen[j] = 0;
 	    }
 	}
       msa->salen[seqidx] = sre_strcat(&(msa->sa[seqidx]), msa->salen[seqidx], text, len);
     }
   else 
     MSAAppendGR(msa, featurename, seqidx, text);
 
   return 1;
 }
 
 
 /* comments are simply stored verbatim, not parsed
  */
 static int
 parse_comment(MSA *msa, char *buf)
 {
   char *s;
   char *comment;
 
   s = buf + 1;			               /* skip leading '#' */
   if (*s == '\n') { *s = '\0'; comment = s; }  /* deal with blank comment */
   else if ((comment = sre_strtok(&s, "\n", NULL)) == NULL) return 0;
   
   MSAAddComment(msa, comment);
   return 1;
 }
 
 static int
 parse_sequence(MSA *msa, char *buf)
 {
   char *s;
   char *seqname;
   char *text;
   int   seqidx;
   int   len;
 
   s = buf;
   if ((seqname     = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
   if ((text        = sre_strtok(&s, WHITESPACE, &len)) == NULL) return 0; 
   
   /* seq usually follows another seq; guess msa->lastidx +1 */
   seqidx = MSAGetSeqidx(msa, seqname, msa->lastidx+1);
   msa->lastidx = seqidx;
 
   msa->sqlen[seqidx] = sre_strcat(&(msa->aseq[seqidx]), msa->sqlen[seqidx], text, len);
   return 1;
 }