Browse code

add package to the repository

msa


git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/msa@102253 bc3139a8-67e5-0310-9ffc-ced21a209358

Sonali Arora authored on 10/04/2015 00:12:33
Showing 1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,639 @@
1
+/*****************************************************************
2
+ * SQUID - a library of functions for biological sequence analysis
3
+ * Copyright (C) 1992-2002 Washington University School of Medicine
4
+ * 
5
+ *     This source code is freely distributed under the terms of the
6
+ *     GNU General Public License. See the files COPYRIGHT and LICENSE
7
+ *     for details.
8
+ *****************************************************************/
9
+
10
+/* alignio.c
11
+ * SRE, Mon Jul 12 11:57:37 1993
12
+ * RCS $Id: alignio.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: alignio.c,v 1.11 2002/10/09 14:26:09 eddy Exp)
13
+ * 
14
+ * Input/output of sequence alignments.
15
+ */
16
+
17
+#include <stdio.h>
18
+#include <stdlib.h>
19
+#include <string.h>
20
+#include <ctype.h>
21
+#include "squid.h"
22
+#include "sre_random.h"
23
+
24
+/* Function: AllocAlignment()
25
+ * 
26
+ * Purpose:  Allocate space for an alignment, given the number
27
+ *           of sequences and the alignment length in columns.
28
+ *           
29
+ * Args:     nseq     - number of sequences
30
+ *           alen     - width of alignment
31
+ *           ret_aseq - RETURN: alignment itself
32
+ *           ainfo    - RETURN: other info associated with alignment
33
+ *           
34
+ * Return:   (void)
35
+ *           aseq, ainfo free'd by caller: FreeAlignment(aseq, &ainfo).
36
+ *           note that ainfo itself is alloc'ed in caller, usually
37
+ *           just by a "AINFO ainfo" definition.
38
+ */
39
+void
40
+AllocAlignment(int nseq, int alen, char ***ret_aseq, AINFO *ainfo)
41
+{
42
+  char **aseq;
43
+  int idx;
44
+
45
+  InitAinfo(ainfo);
46
+
47
+  aseq = (char **) MallocOrDie (sizeof(char *) * nseq);
48
+  for (idx = 0; idx < nseq; idx++)
49
+    aseq[idx] = (char *) MallocOrDie (sizeof(char) * (alen+1));
50
+
51
+  ainfo->alen  = alen;
52
+  ainfo->nseq  = nseq;
53
+
54
+  ainfo->wgt   = (float *) MallocOrDie (sizeof(float) * nseq);
55
+  FSet(ainfo->wgt, nseq, 1.0);
56
+
57
+  ainfo->sqinfo = (SQINFO *) MallocOrDie (sizeof(SQINFO) * nseq);
58
+  for (idx = 0; idx < nseq; idx++)
59
+    ainfo->sqinfo[idx].flags = 0;
60
+
61
+  *ret_aseq = aseq;
62
+}
63
+ 
64
+
65
+/* Function: InitAinfo()
66
+ * Date:     SRE, Tue Jan 19 10:16:02 1999 [St. Louis]
67
+ *
68
+ * Purpose:  Initialize the fields in ainfo structure to
69
+ *           default (null) values. Does nothing with 
70
+ *           fields that are dependent on nseq or alen.
71
+ *
72
+ * Args:     ainfo  - optional info structure for an alignment
73
+ *
74
+ * Returns:  (void). ainfo is modified.
75
+ */
76
+void
77
+InitAinfo(AINFO *ainfo)
78
+{
79
+  ainfo->name  = NULL;
80
+  ainfo->desc  = NULL;
81
+  ainfo->cs    = NULL;
82
+  ainfo->rf    = NULL;
83
+  ainfo->acc   = NULL;
84
+  ainfo->au    = NULL;
85
+  ainfo->flags = 0;
86
+
87
+  ainfo->tc1  = ainfo->tc2 = 0.0;
88
+  ainfo->nc1  = ainfo->nc2 = 0.0;
89
+  ainfo->ga1  = ainfo->ga2 = 0.0;
90
+}
91
+
92
+
93
+/* Function: FreeAlignment()
94
+ * 
95
+ * Purpose:  Free the space allocated to alignment, names, and optional
96
+ *           information. 
97
+ *           
98
+ * Args:     aseqs - sequence alignment
99
+ *           ainfo - associated alignment data.
100
+ */                  
101
+void
102
+FreeAlignment(char **aseqs, AINFO *ainfo)
103
+{
104
+  int i;
105
+
106
+  for (i = 0; i < ainfo->nseq; i++)
107
+    {
108
+      if (ainfo->sqinfo[i].flags & SQINFO_SS) free(ainfo->sqinfo[i].ss);
109
+      if (ainfo->sqinfo[i].flags & SQINFO_SA) free(ainfo->sqinfo[i].sa);
110
+    }
111
+  if (ainfo->cs   != NULL) free(ainfo->cs);
112
+  if (ainfo->rf   != NULL) free(ainfo->rf);
113
+  if (ainfo->name != NULL) free(ainfo->name);
114
+  if (ainfo->desc != NULL) free(ainfo->desc);
115
+  if (ainfo->acc  != NULL) free(ainfo->acc);
116
+  if (ainfo->au   != NULL) free(ainfo->au);
117
+
118
+  free(ainfo->sqinfo);
119
+  free(ainfo->wgt);
120
+  Free2DArray((void **) aseqs, ainfo->nseq);
121
+}
122
+
123
+
124
+
125
+/* Function: SAMizeAlignment()
126
+ * Date:     SRE, Tue Jun 30 09:49:40 1998 [St. Louis]
127
+ *
128
+ * Purpose:  Make a "best effort" attempt to convert an alignment
129
+ *           to SAM gap format: - in delete col, . in insert col.
130
+ *           Only works if alignment adheres to SAM's upper/lower
131
+ *           case convention, which is true for instance of old
132
+ *           HMMER alignments.
133
+ *
134
+ * Args:     aseq  - alignment to convert
135
+ *           nseq  - number of seqs in alignment
136
+ *           alen  - length of alignment
137
+ *
138
+ * Returns:  (void)
139
+ */
140
+void
141
+SAMizeAlignment(char **aseq, int nseq, int alen)
142
+{
143
+  int col;			/* counter for aligned columns */
144
+  int i;			/* counter for seqs */
145
+  int sawlower, sawupper, sawgap;
146
+  char gapchar;
147
+
148
+  for (col = 0; col < alen; col++)
149
+    {
150
+      sawlower = sawupper = sawgap = 0;
151
+				/* pass 1: do we see only upper or lower? */
152
+      for (i = 0; i < nseq; i++)
153
+	{
154
+	  if (isgap(aseq[i][col]))         { sawgap   = 1; continue; }
155
+	  if (isupper((int) aseq[i][col])) { sawupper = 1; continue; }
156
+	  if (islower((int) aseq[i][col]))   sawlower = 1;
157
+	}
158
+				/* select gap character for column */
159
+      gapchar = '-';		/* default */
160
+      if (sawlower && ! sawupper) gapchar = '.';
161
+
162
+				/* pass 2: set gap char */
163
+      for (i = 0; i < nseq; i++)
164
+	if (isgap(aseq[i][col])) aseq[i][col] = gapchar;
165
+    }
166
+}
167
+
168
+
169
+/* Function: SAMizeAlignmentByGapFrac()
170
+ * Date:     SRE, Tue Jun 30 10:58:38 1998 [St. Louis]
171
+ *
172
+ * Purpose:  Convert an alignment to SAM's gap and case
173
+ *           conventions, using gap fraction in a column
174
+ *           to choose match versus insert columns. In match columns,
175
+ *           residues are upper case and gaps are '-'.
176
+ *           In insert columns, residues are lower case and
177
+ *           gaps are '.'
178
+ *
179
+ * Args:     aseq   - aligned sequences
180
+ *           nseq   - number of sequences
181
+ *           alen   - length of alignment
182
+ *           maxgap - if more gaps than this fraction, column is insert.
183
+ *
184
+ * Returns:  (void) Characters in aseq may be altered.
185
+ */
186
+void
187
+SAMizeAlignmentByGapFrac(char **aseq, int nseq, int alen, float maxgap)
188
+{
189
+  int apos;			/* counter over columns */
190
+  int idx;			/* counter over sequences */
191
+  int ngap;			/* number of gaps seen */
192
+
193
+  for (apos = 0; apos < alen; apos++)
194
+    {
195
+				/* count gaps */
196
+      ngap = 0;
197
+      for (idx = 0; idx < nseq; idx++)
198
+	if (isgap(aseq[idx][apos])) ngap++;
199
+      
200
+				/* convert to SAM conventions */
201
+      if ((float) ngap / (float) nseq > maxgap)
202
+	{			/* insert column */
203
+	  for (idx = 0; idx < nseq; idx++)
204
+	    if (isgap(aseq[idx][apos])) aseq[idx][apos] = '.';
205
+	    else aseq[idx][apos] = (char) tolower((int) aseq[idx][apos]);
206
+	}
207
+      else			
208
+	{			/* match column */
209
+	  for (idx = 0; idx < nseq; idx++)
210
+	    if (isgap(aseq[idx][apos])) aseq[idx][apos] = '-';
211
+	    else aseq[idx][apos] = (char) toupper((int) aseq[idx][apos]);
212
+	}
213
+    }
214
+}
215
+
216
+
217
+
218
+
219
+/* Function: MakeAlignedString()
220
+ * 
221
+ * Purpose:  Given a raw string of some type (secondary structure, say),
222
+ *           align it to a given aseq by putting gaps wherever the
223
+ *           aseq has gaps. 
224
+ *           
225
+ * Args:     aseq:  template for alignment
226
+ *           alen:  length of aseq
227
+ *           ss:    raw string to align to aseq
228
+ *           ret_s: RETURN: aligned ss
229
+ *           
230
+ * Return:   1 on success, 0 on failure (and squid_errno is set.)
231
+ *           ret_ss is malloc'ed here and must be free'd by caller.
232
+ */
233
+int
234
+MakeAlignedString(char *aseq, int alen, char *ss, char **ret_s)
235
+{
236
+  char *new; 
237
+  int   apos, rpos;
238
+
239
+  new = (char *) MallocOrDie ((alen+1) * sizeof(char));
240
+  for (apos = rpos = 0; apos < alen; apos++)
241
+    if (! isgap(aseq[apos]))
242
+      {
243
+	new[apos] = ss[rpos];
244
+	rpos++;
245
+      }
246
+    else
247
+      new[apos] = '.';
248
+  new[apos] = '\0';
249
+
250
+  if (rpos != strlen(ss))
251
+    { squid_errno = SQERR_PARAMETER; free(new); return 0; }
252
+  *ret_s = new;
253
+  return 1;
254
+}
255
+
256
+
257
+/* Function: MakeDealignedString()
258
+ * 
259
+ * Purpose:  Given an aligned string of some type (either sequence or 
260
+ *           secondary structure, for instance), dealign it relative
261
+ *           to a given aseq. Return a ptr to the new string.
262
+ *           
263
+ * Args:     aseq  : template alignment 
264
+ *           alen  : length of aseq
265
+ *           ss:   : string to make dealigned copy of; same length as aseq
266
+ *           ret_s : RETURN: dealigned copy of ss
267
+ *           
268
+ * Return:   1 on success, 0 on failure (and squid_errno is set)
269
+ *           ret_s is alloc'ed here and must be freed by caller
270
+ */
271
+int
272
+MakeDealignedString(char *aseq, int alen, char *ss, char **ret_s)
273
+{
274
+  char *new; 
275
+  int   apos, rpos;
276
+
277
+  new = (char *) MallocOrDie ((alen+1) * sizeof(char));
278
+  for (apos = rpos = 0; apos < alen; apos++)
279
+    if (! isgap(aseq[apos]))
280
+      {
281
+	new[rpos] = ss[apos];
282
+	rpos++;
283
+      }
284
+  new[rpos] = '\0';
285
+  if (alen != strlen(ss))
286
+    { squid_errno = SQERR_PARAMETER; free(new); return 0; }
287
+  *ret_s = new;
288
+  return 1;
289
+}
290
+
291
+
292
+/* Function: DealignedLength()
293
+ *
294
+ * Purpose:  Count the number of non-gap symbols in seq.
295
+ *           (i.e. find the length of the unaligned sequence)
296
+ * 
297
+ * Args:     aseq - aligned sequence to count symbols in, \0 terminated
298
+ * 
299
+ * Return:   raw length of seq.
300
+ */
301
+int
302
+DealignedLength(char *aseq)
303
+{
304
+  int rlen; 
305
+  for (rlen = 0; *aseq; aseq++)
306
+    if (! isgap(*aseq)) rlen++;
307
+  return rlen;
308
+}
309
+
310
+
311
+/* Function: WritePairwiseAlignment()
312
+ * 
313
+ * Purpose:  Write a nice formatted pairwise alignment out,
314
+ *           with a BLAST-style middle line showing identities
315
+ *           as themselves (single letter) and conservative
316
+ *           changes as '+'.
317
+ *           
318
+ * Args:     ofp          - open fp to write to (stdout, perhaps)
319
+ *           aseq1, aseq2 - alignments to write (not necessarily 
320
+ *                          flushed right with gaps)
321
+ *           name1, name2 - names of sequences
322
+ *           spos1, spos2 - starting position in each (raw) sequence
323
+ *           pam          - PAM matrix; positive values define
324
+ *                          conservative changes
325
+ *           indent       - how many extra spaces to print on left
326
+ *           
327
+ * Return:  1 on success, 0 on failure
328
+ */
329
+int
330
+WritePairwiseAlignment(FILE *ofp,
331
+		       char *aseq1, char *name1, int spos1,
332
+		       char *aseq2, char *name2, int spos2,
333
+		       int **pam, int indent)
334
+{
335
+  char sname1[11];              /* shortened name               */
336
+  char sname2[11];             
337
+  int  still_going;		/* True if writing another block */
338
+  char buf1[61];		/* buffer for writing seq1; CPL+1*/
339
+  char bufmid[61];              /* buffer for writing consensus  */
340
+  char buf2[61];
341
+  char *s1, *s2;                /* ptrs into each sequence          */
342
+  int  count1, count2;		/* number of symbols we're writing  */
343
+  int  rpos1, rpos2;		/* position in raw seqs             */
344
+  int  rawcount1, rawcount2;	/* number of nongap symbols written */
345
+  int  apos;
346
+
347
+  strncpy(sname1, name1, 10);
348
+  sname1[10] = '\0';
349
+  strtok(sname1, WHITESPACE);
350
+
351
+  strncpy(sname2, name2, 10);
352
+  sname2[10] = '\0';
353
+  strtok(sname2, WHITESPACE);
354
+
355
+  s1 = aseq1; 
356
+  s2 = aseq2;
357
+  rpos1 = spos1;
358
+  rpos2 = spos2;
359
+
360
+  still_going = TRUE;
361
+  while (still_going)
362
+    {
363
+      still_going = FALSE;
364
+      
365
+				/* get next line's worth from both */
366
+      strncpy(buf1, s1, 60); buf1[60] = '\0';
367
+      strncpy(buf2, s2, 60); buf2[60] = '\0';
368
+      count1 = strlen(buf1);
369
+      count2 = strlen(buf2);
370
+
371
+				/* is there still more to go? */
372
+      if ((count1 == 60 && s1[60] != '\0') ||
373
+	  (count2 == 60 && s2[60] != '\0'))
374
+	still_going = TRUE;
375
+
376
+				/* shift seq ptrs by a line */
377
+      s1 += count1;
378
+      s2 += count2;
379
+
380
+				/* assemble the consensus line */
381
+      for (apos = 0; apos < count1 && apos < count2; apos++)
382
+	{
383
+	  if (!isgap(buf1[apos]) && !isgap(buf2[apos]))
384
+	    {
385
+	      if (buf1[apos] == buf2[apos])
386
+		bufmid[apos] = buf1[apos];
387
+	      else if (pam[buf1[apos] - 'A'][buf2[apos] - 'A'] > 0)
388
+		bufmid[apos] = '+';
389
+	      else
390
+		bufmid[apos] = ' ';
391
+	    }
392
+	  else
393
+	    bufmid[apos] = ' ';
394
+	}
395
+      bufmid[apos] = '\0';
396
+
397
+      rawcount1 = 0;
398
+      for (apos = 0; apos < count1; apos++)
399
+	if (!isgap(buf1[apos])) rawcount1++;
400
+      
401
+      rawcount2 = 0;
402
+      for (apos = 0; apos < count2; apos++)
403
+	if (!isgap(buf2[apos])) rawcount2++;
404
+
405
+      (void) fprintf(ofp, "%*s%-10.10s %5d %s %5d\n", indent, "", 
406
+		     sname1, rpos1, buf1, rpos1 + rawcount1 -1);
407
+      (void) fprintf(ofp, "%*s                 %s\n", indent, "",
408
+		     bufmid);
409
+      (void) fprintf(ofp, "%*s%-10.10s %5d %s %5d\n", indent, "", 
410
+		     sname2, rpos2, buf2, rpos2 + rawcount2 -1);
411
+      (void) fprintf(ofp, "\n");
412
+
413
+      rpos1 += rawcount1;
414
+      rpos2 += rawcount2;
415
+    }
416
+
417
+  return 1;
418
+}
419
+
420
+
421
+/* Function: MingapAlignment()
422
+ * 
423
+ * Purpose:  Remove all-gap columns from a multiple sequence alignment
424
+ *           and its associated data. The alignment is assumed to be
425
+ *           flushed (all aseqs the same length).
426
+ */
427
+int
428
+MingapAlignment(char **aseqs, AINFO *ainfo)
429
+{
430
+  int apos;			/* position in original alignment */
431
+  int mpos;			/* position in new alignment      */
432
+  int idx;
433
+
434
+  /* We overwrite aseqs, using its allocated memory.
435
+   */
436
+  for (apos = 0, mpos = 0; aseqs[0][apos] != '\0'; apos++)
437
+    {
438
+				/* check for all-gap in column */
439
+      for (idx = 0; idx < ainfo->nseq; idx++)
440
+	if (! isgap(aseqs[idx][apos]))
441
+	  break;
442
+      if (idx == ainfo->nseq) continue;
443
+	
444
+				/* shift alignment and ainfo */
445
+      if (mpos != apos)
446
+	{
447
+	  for (idx = 0; idx < ainfo->nseq; idx++)
448
+	    aseqs[idx][mpos] = aseqs[idx][apos];
449
+	  
450
+	  if (ainfo->cs != NULL) ainfo->cs[mpos] = ainfo->cs[apos];
451
+	  if (ainfo->rf != NULL) ainfo->rf[mpos] = ainfo->rf[apos];
452
+	}
453
+      mpos++;
454
+    }
455
+				/* null terminate everything */
456
+  for (idx = 0; idx < ainfo->nseq; idx++)
457
+    aseqs[idx][mpos] = '\0';
458
+  ainfo->alen = mpos;	/* set new length */
459
+  if (ainfo->cs != NULL) ainfo->cs[mpos] = '\0';
460
+  if (ainfo->rf != NULL) ainfo->rf[mpos] = '\0';
461
+  return 1;
462
+}
463
+
464
+
465
+
466
+/* Function: RandomAlignment()
467
+ * 
468
+ * Purpose:  Create a random alignment from raw sequences.
469
+ * 
470
+ *           Ideally, we would like to sample an alignment from the
471
+ *           space of possible alignments according to its probability,
472
+ *           given a prior probability distribution for alignments.
473
+ *           I don't see how to describe such a distribution, let alone
474
+ *           sample it.
475
+ *           
476
+ *           This is a rough approximation that tries to capture some
477
+ *           desired properties. We assume the alignment is generated
478
+ *           by a simple HMM composed of match and insert states.
479
+ *           Given parameters (pop, pex) for the probability of opening
480
+ *           and extending an insertion, we can find the expected number
481
+ *           of match states, M, in the underlying model for each sequence.
482
+ *           We use an average M taken over all the sequences (this is
483
+ *           an approximation. The expectation of M given all the sequence
484
+ *           lengths is a nasty-looking summation.)
485
+ *           
486
+ *           M = len / ( 1 + pop ( 1 + 1/ (1-pex) ) ) 
487
+ *           
488
+ *           Then, we assign positions in each raw sequence onto the M match
489
+ *           states and M+1 insert states of this "HMM", by rolling random
490
+ *           numbers and inserting the (rlen-M) inserted positions randomly
491
+ *           into the insert slots, taking into account the relative probability
492
+ *           of open vs. extend.
493
+ *           
494
+ *           The resulting alignment has two desired properties: insertions
495
+ *           tend to follow the HMM-like exponential distribution, and
496
+ *           the "sparseness" of the alignment is controllable through
497
+ *           pop and pex.
498
+ *           
499
+ * Args:     rseqs     - raw sequences to "align", 0..nseq-1
500
+ *           sqinfo    - array of 0..nseq-1 info structures for the sequences
501
+ *           nseq      - number of sequences   
502
+ *           pop       - probability to open insertion (0<pop<1)
503
+ *           pex       - probability to extend insertion (0<pex<1)
504
+ *           ret_aseqs - RETURN: alignment (flushed)
505
+ *           ainfo     - fill in: alignment info
506
+ * 
507
+ * Return:   1 on success, 0 on failure. Sets squid_errno to indicate cause
508
+ *           of failure.
509
+ */                      
510
+int
511
+RandomAlignment(char **rseqs, SQINFO *sqinfo, int nseq, float pop, float pex,
512
+		char ***ret_aseqs, AINFO *ainfo)
513
+{
514
+  char **aseqs;                 /* RETURN: alignment   */
515
+  int    alen;			/* length of alignment */
516
+  int   *rlen;                  /* lengths of each raw sequence */
517
+  int    M;			/* length of "model"   */
518
+  int  **ins;                   /* insertion counts, 0..nseq-1 by 0..M */
519
+  int   *master_ins;            /* max insertion counts, 0..M */
520
+  int    apos, rpos, idx;
521
+  int    statepos;
522
+  int    count;
523
+  int    minlen;
524
+
525
+  /* calculate expected length of model, M
526
+   */
527
+  rlen = (int *) MallocOrDie (sizeof(int) * nseq);
528
+  M = 0;
529
+  minlen = 9999999;
530
+  for (idx = 0; idx < nseq; idx++)
531
+    {
532
+      rlen[idx] = strlen(rseqs[idx]);
533
+      M += rlen[idx];
534
+      minlen = (rlen[idx] < minlen) ? rlen[idx] : minlen;
535
+    }
536
+  M = (int) ((float) M / (1.0 + pop * (1.0 + 1.0 / (1.0 - pex))));
537
+  M /= nseq;
538
+  if (M > minlen) M = minlen;
539
+
540
+  /* make arrays that count insertions in M+1 possible insert states
541
+   */
542
+  ins = (int **) MallocOrDie (sizeof(int *) * nseq);
543
+  master_ins = (int *) MallocOrDie (sizeof(int) * (M+1));
544
+  for (idx = 0; idx < nseq; idx++)
545
+    {
546
+      ins[idx] = (int *) MallocOrDie (sizeof(int) * (M+1));
547
+      for (rpos = 0; rpos <= M; rpos++)
548
+	ins[idx][rpos] = 0;
549
+    }
550
+				/* normalize */
551
+  pop = pop / (pop+pex);
552
+  pex = 1.0 - pop;
553
+				/* make insertions for individual sequences */
554
+  for (idx = 0; idx < nseq; idx++)
555
+    {
556
+      apos = -1;
557
+      for (rpos = 0; rpos < rlen[idx]-M; rpos++)
558
+	{
559
+	  if (sre_random() < pop || apos == -1)	/* open insertion */
560
+	    apos = CHOOSE(M+1);        /* choose 0..M */
561
+	  ins[idx][apos]++;
562
+	}
563
+    }
564
+				/* calculate master_ins, max inserts */
565
+  alen = M;
566
+  for (apos = 0; apos <= M; apos++)
567
+    {
568
+      master_ins[apos] = 0;
569
+      for (idx = 0; idx < nseq; idx++)
570
+	if (ins[idx][apos] > master_ins[apos])
571
+	  master_ins[apos] = ins[idx][apos];
572
+      alen += master_ins[apos];
573
+    }
574
+
575
+
576
+  /* Now, construct alignment
577
+   */
578
+  aseqs = (char **) MallocOrDie (sizeof (char *) * nseq);
579
+  for (idx = 0; idx < nseq; idx++)
580
+    aseqs[idx] = (char *) MallocOrDie (sizeof(char) * (alen+1));
581
+  for (idx = 0; idx < nseq; idx++)
582
+    {
583
+      apos = rpos = 0;
584
+
585
+      for (statepos = 0; statepos <= M; statepos++)
586
+	{
587
+	  for (count = 0; count < ins[idx][statepos]; count++)
588
+	    aseqs[idx][apos++] = rseqs[idx][rpos++];
589
+	  for (; count < master_ins[statepos]; count++)
590
+	    aseqs[idx][apos++] = ' ';
591
+
592
+	  if (statepos != M)
593
+	    aseqs[idx][apos++] = rseqs[idx][rpos++];
594
+	}
595
+      aseqs[idx][alen] = '\0';
596
+    }
597
+  ainfo->flags = 0;
598
+  ainfo->alen  = alen; 
599
+  ainfo->nseq  = nseq;
600
+  ainfo->sqinfo = (SQINFO *) MallocOrDie (sizeof(SQINFO) * nseq);
601
+  for (idx = 0; idx < nseq; idx++)
602
+    SeqinfoCopy(&(ainfo->sqinfo[idx]), &(sqinfo[idx]));
603
+
604
+  free(rlen);
605
+  free(master_ins);
606
+  Free2DArray((void **) ins, nseq);
607
+  *ret_aseqs = aseqs;
608
+  return 1;
609
+}
610
+
611
+/* Function: AlignmentHomogenousGapsym()
612
+ * Date:     SRE, Sun Mar 19 19:37:12 2000 [wren, St. Louis]
613
+ *
614
+ * Purpose:  Sometimes we've got to convert alignments to 
615
+ *           a lowest common denominator, and we need
616
+ *           a single specific gap character -- for example,
617
+ *           PSI-BLAST blastpgp -B takes a very simplistic
618
+ *           alignment input format which appears to only
619
+ *           allow '-' as a gap symbol.
620
+ *           
621
+ *           Anything matching the isgap() macro is
622
+ *           converted.
623
+ *
624
+ * Args:     aseq   - aligned character strings, [0..nseq-1][0..alen-1]
625
+ *           nseq   - number of aligned strings
626
+ *           alen   - length of alignment
627
+ *           gapsym - character to use for gaps.         
628
+ *
629
+ * Returns:  void ("never fails")
630
+ */
631
+void
632
+AlignmentHomogenousGapsym(char **aseq, int nseq, int alen, char gapsym)
633
+{
634
+  int i, apos;
635
+
636
+  for (i = 0; i < nseq; i++)
637
+    for (apos = 0; apos < alen; apos++)
638
+      if (isgap(aseq[i][apos])) aseq[i][apos] = gapsym;
639
+}