Browse code

add package to the repository

msa


git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/msa@102253 bc3139a8-67e5-0310-9ffc-ced21a209358

Sonali Arora authored on 10/04/2015 00:12:33
Showing1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,192 @@
1
+/* -*- mode: c; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2
+
3
+/*********************************************************************
4
+ * Clustal Omega - Multiple sequence alignment
5
+ *
6
+ * Copyright (C) 2010 University College Dublin
7
+ *
8
+ * Clustal-Omega is free software; you can redistribute it and/or
9
+ * modify it under the terms of the GNU General Public License as
10
+ * published by the Free Software Foundation; either version 2 of the
11
+ * License, or (at your option) any later version.
12
+ *
13
+ * This file is part of Clustal-Omega.
14
+ *
15
+ ********************************************************************/
16
+
17
+/*
18
+ * RCS $Id: hhhit.h 243 2011-05-31 13:49:19Z fabian $
19
+ */
20
+
21
+// hhhit.h
22
+
23
+//////////////////////////////////////////////////////////////////////////////
24
+/* Describes an alignment of two profiles. 
25
+   Used as list element in Hits : List<Hit> */
26
+//////////////////////////////////////////////////////////////////////////////
27
+class Hit
28
+{
29
+ public:  
30
+  char* longname;       // Name of HMM
31
+  char* name;           // One-word name of HMM
32
+  char* file;           // Basename (with path, without extension) of alignment file that was used to construct the HMM
33
+                        // (path from db-file is prepended to FILE record in HMM file!)
34
+  char fam[IDLEN];      // family ID (derived from name) (FAM field)
35
+  char sfam[IDLEN];     // superfamily ID (derived from name) 
36
+  char fold[IDLEN];     // fold ID (derived from name)
37
+  char cl[IDLEN];       // class ID (derived from name)
38
+  int index;            // index of HMM in order of reading in (first=0)
39
+  char* dbfile;         // full database file name from which HMM was read
40
+  long ftellpos;        // start position of HMM in database file
41
+
42
+  float score;          // Score of alignment (i.e. of Viterbi path)
43
+  float score_sort;     // score to sort hits in output list (negative means first/best!)
44
+  float score_aass;     // first: just hit.score, then hit.logPval-SSSCORE2NATLOG*hit.score_ss;(negative means best!)
45
+  float score_ss;       // Part of score due to secondary structure
46
+  float Pval;           // P-value for whole protein based on score distribution of query
47
+  float Pvalt;          // P-value for whole protein based on score distribution of template
48
+  float logPval;        // natural logarithm of Pval
49
+  float logPvalt;       // natural logarithm of Pvalt
50
+  float Eval;           // E-value for whole protein
51
+  float Probab;         // probability in % for a positive (depends only on score)
52
+  float weight;         // weight of hit for P-value calculation (= 1/#HMMs-in-family/#families-in-superfamily)
53
+  double Pforward;      // scaled total forward probability : Pforward * Product_{i=1}^{Lq+1}(scale[i])
54
+  
55
+/*   float score_comp;     // compositional similarity score */
56
+/*   float logPcomp;       // natural logarithm of Pvalue for compositional similarity score */
57
+/*   float Prep;           // P-value for single-repeat hit */
58
+/*   float Erep;           // E-value for single-repeat hit */
59
+/*   float logPrep;        // natural logarithm of P-value for single-repeat hit */
60
+  float E1val;          // E-value for whole protein from transitive scoring
61
+  float logP1val;       // natural logarithm of P1val, the transitive P-value
62
+
63
+  int L;                // Number of match states in template
64
+  int irep;             // Index  of single-repeat hit (1: highest scoring repeat hit)
65
+  int nrep;             // Number of single-repeat hits with one template
66
+  
67
+  int n_display;        // number of sequences stored for display of alignment 
68
+  char** sname;         // names of stored sequences 
69
+  char** seq;           // residues of stored sequences (first at pos 1)
70
+  int nss_dssp;         // index of dssp secondary structure sequence in seq[]
71
+  int nsa_dssp;         // index of of dssp solvent accessibility in seq[]
72
+  int nss_pred;         // index of dssp secondary structure sequence in seq[]
73
+  int nss_conf;         // index of dssp secondary structure sequence in seq[]
74
+  int nfirst;           // index of query sequence in seq[]
75
+  int ncons;            // index of consensus sequence
76
+  
77
+  int nsteps;           // index for last step in Viterbi path; (first=1)
78
+  int* i;               // i[step] = query match state at step of Viterbi path
79
+  int* j;               // j[step] = template match state at step of Viterbi path
80
+  char* states;         // state at step of Viterbi path  0: Start  1: M(MM)  2: A(-D)  3: B(IM)  4: C(D-)  5 D(MI)
81
+  float* S;             // S[step] = match-match score contribution at alignment step
82
+  float* S_ss;          // S_ss[step] = secondary structure score contribution
83
+  float* P_posterior;   // P_posterior[step] = posterior prob for MM states (otherwise zero)
84
+  char* Xcons;          // consensus sequence for aligned states in internal representation (A=0 R=1 N=2 D=3 ...)
85
+  int i1;               // First aligned residue in query
86
+  int i2;               // Last aligned residue in query
87
+  int j1;               // First aligned residue in template 
88
+  int j2;               // Last aligned residue in template
89
+  int matched_cols;     // number of matched columns in alignment against query
90
+  int ssm1;             // SS scoring AFTER  alignment? 0:no  1:yes; t->dssp q->psipred  2:yes; q->dssp t->psipred
91
+  int ssm2;             // SS scoring DURING alignment? 0:no  1:yes; t->dssp q->psipred  2:yes; q->dssp t->psipred
92
+  char self;            // 0: align two different HMMs  1: align HMM with itself
93
+  int min_overlap;      // Minimum overlap between query and template
94
+  float sum_of_probs;   // sum of probabilities for Maximum ACcuracy alignment (if dssp states defined, only aligned pairs with defined dssp state contribute to sum)
95
+  float Neff_HMM;       // Diversity of underlying alignment
96
+
97
+  // Constructor (only set pointers to NULL)
98
+  Hit();
99
+  ~Hit(){};
100
+  
101
+  // Free all allocated memory (to delete list of hits)
102
+  void Delete();
103
+
104
+  // Allocate/delete memory for dynamic programming matrix
105
+  void AllocateBacktraceMatrix(int Nq, int Nt);
106
+  void DeleteBacktraceMatrix(int Nq);
107
+  void AllocateForwardMatrix(int Nq, int Nt);
108
+  void DeleteForwardMatrix(int Nq);
109
+  void AllocateBackwardMatrix(int Nq, int Nt);
110
+  void DeleteBackwardMatrix(int Nq);
111
+  
112
+  // Compare an HMM with overlapping subalignments
113
+  void Viterbi(HMM& q, HMM& t, float** Sstruc=NULL);
114
+
115
+  // Compare two HMMs with each other in lin space
116
+  int Forward(HMM& q, HMM& t, float** Pstruc=NULL);
117
+
118
+  // Compare two HMMs with each other in lin space
119
+  int Backward(HMM& q, HMM& t);
120
+
121
+   // Find maximum accuracy alignment (after running Forward and Backward algorithms)
122
+  void MACAlignment(HMM& q, HMM& t);
123
+
124
+  // Trace back alignment of two profiles based on matrices bXX[][]
125
+  void Backtrace(HMM& q, HMM& t);
126
+
127
+  // Trace back alignment of two profiles based on matrices bXX[][]
128
+  void StochasticBacktrace(HMM& q, HMM& t, char maximize=0);
129
+
130
+  // Trace back MAC alignment of two profiles based on matrix bMM[][]
131
+  void BacktraceMAC(HMM& q, HMM& t);
132
+
133
+  // Calculate secondary structure score between columns i and j of two HMMs (query and template)
134
+  inline float ScoreSS(HMM& q, HMM& t, int i, int j, int ssm);
135
+
136
+  // Calculate secondary structure score between columns i and j of two HMMs (query and template)
137
+  inline float ScoreSS(HMM& q, HMM& t, int i, int j);
138
+
139
+  // Calculate total score (including secondary structure score and compositional bias correction
140
+  inline float ScoreTot(HMM& q, HMM& t, int i, int j);
141
+
142
+  // Calculate score (excluding secondary structure score and compositional bias correction
143
+  inline float ScoreAA(HMM& q, HMM& t, int i, int j);
144
+
145
+  // Comparison (used to sort list of hits)
146
+  int operator<(const Hit& hit2)  {return score_sort<hit2.score_sort;}
147
+
148
+  // Merge HMM with next aligned HMM  
149
+  void MergeHMM(HMM& Q, HMM& t, float wk[]);
150
+
151
+#ifdef CLUSTALO
152
+  void ClobberGlobal(void);
153
+#endif  
154
+
155
+
156
+  double** B_MM;        // Backward matrices
157
+  
158
+private:
159
+  char state;          // 0: Start/STOP state  1: MM state  2: GD state (-D)  3: IM state  4: DG state (D-)  5 MI state
160
+  char** bMM;          // (backtracing) bMM[i][j] = STOP:start of alignment  MM:prev was MM  GD:prev was GD etc
161
+  char** bGD;          // (backtracing) bMM[i][j] = STOP:start of alignment  MM:prev was MM  SAME:prev was GD
162
+  char** bDG;          // (backtracing)
163
+  char** bIM;          // (backtracing)
164
+  char** bMI;          // (backtracing)
165
+  char** cell_off;     // cell_off[i][j]=1 means this cell will get score -infinity
166
+
167
+  double** F_MM;        // Forward matrices 
168
+  double** F_GD;        // F_XY[i][j] * Prod_1^i(scale[i]) 
169
+  double** F_DG;        //   = Sum_x1..xl{ P(HMMs aligned up to Xi||Yj co-emmitted x1..xl ) / (Prod_k=1^l f(x_k)) }   
170
+  double** F_IM;        // end gaps are not penalized!
171
+  double** F_MI;        // 
172
+  double* scale;        // 
173
+
174
+  double** B_GD;        // B_XY[i][j] * Prod_i+1^(L+1) (scale[i])
175
+  double** B_DG;        //   = Sum_x2..xl{ P(HMMs aligned from Xi||Yj to end co-emmitted x2..xl ) / (Prod_k=2^l f(x_k)) }   
176
+  double** B_IM;        // end gaps are not penalized!
177
+  double** B_MI;        // 
178
+
179
+  void InitializeBacktrace(HMM& q, HMM& t);
180
+  void InitializeForAlignment(HMM& q, HMM& t);
181
+};
182
+
183
+
184
+double Pvalue(double x, double a[]);
185
+double Pvalue(float x, float lamda, float mu);
186
+double logPvalue(float x, float lamda, float mu);
187
+double logPvalue(float x, double a[]);
188
+double Probab(Hit& hit);
189
+
190
+
191
+
192
+