Browse code

add package to the repository

msa


git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/msa@102253 bc3139a8-67e5-0310-9ffc-ced21a209358

Sonali Arora authored on 10/04/2015 00:12:33
Showing1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,763 @@
1
+/* -*- mode: c; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2
+
3
+/*********************************************************************
4
+ * Clustal Omega - Multiple sequence alignment
5
+ *
6
+ * Copyright (C) 2010 University College Dublin
7
+ *
8
+ * Clustal-Omega is free software; you can redistribute it and/or
9
+ * modify it under the terms of the GNU General Public License as
10
+ * published by the Free Software Foundation; either version 2 of the
11
+ * License, or (at your option) any later version.
12
+ *
13
+ * This file is part of Clustal-Omega.
14
+ *
15
+ ********************************************************************/
16
+
17
+/*
18
+ * RCS $Id: hhmatrices-C.h 274 2012-04-24 23:28:24Z dave $
19
+ */
20
+
21
+// Substitution matrices and their background frequencies
22
+
23
+// The following background frequencies were calculated by the formula pa = (P[a,b]/(pa*pb))^(-1) * (1,...,1)
24
+// For the Blousum50-matrix this becomes  pb[a]= SUM_(b=1,20) (2^(BLOSUM50[a,b]/3))^(-1) 
25
+//                     A    R    N    D    C    Q    E    G    H    I    L     K    M    F    P    S    T    W    Y    V 
26
+// Gonnet            7.68,5.14,4.02,5.41,1.89,3.27,5.99,7.56,3.69,5.06,10.01,5.97,2.20,3.50,4.54,4.67,7.12,1.25,3.95,7.28
27
+// BLOSUM50          8.24,6.24,4.46,4.77,2.03,2.90,6.78,6.69,2.53,6.89,10.7 ,5.04,1.49,4.93,3.97,5.95,6.13,1.34,3.45,6.28
28
+
29
+const float Gonnet[]={
30
+//  A     R     N     D     C     Q     E     G     H     I     L     K     M     F     P     S     T     W     Y     V 
31
+ 10227, 3430, 2875, 3869, 1625, 2393, 4590, 6500, 2352, 3225, 5819, 4172, 1435, 1579, 3728, 4610, 6264,  418, 1824, 5709, // A
32
+  3430, 7780, 2209, 2589,  584, 2369, 3368, 3080, 2173, 1493, 3093, 5701,  763,  859, 1893, 2287, 3487,  444, 1338, 2356, // R 
33
+  2875, 2209, 3868, 3601,  501, 1541, 2956, 3325, 1951, 1065, 2012, 2879,  532,  688, 1480, 2304, 3204,  219, 1148, 1759, // N
34
+  3869, 2589, 3601, 8618,  488, 2172, 6021, 4176, 2184, 1139, 2151, 3616,  595,  670, 2086, 2828, 3843,  204, 1119, 2015, // D
35
+  1625,  584,  501,  488, 5034,  355,  566,  900,  516,  741, 1336,  591,  337,  549,  419,  901, 1197,  187,  664, 1373, // C
36
+  2393, 2369, 1541, 2172,  355, 1987, 2891, 1959, 1587, 1066, 2260, 2751,  570,  628, 1415, 1595, 2323,  219,  871, 1682, // Q
37
+  4590, 3368, 2956, 6021,  566, 2891, 8201, 3758, 2418, 1624, 3140, 4704,  830,  852, 2418, 2923, 4159,  278, 1268, 2809, // E
38
+  6500, 3080, 3325, 4176,  900, 1959, 3758,26066, 2016, 1354, 2741, 3496,  741,  797, 2369, 3863, 4169,  375, 1186, 2569, // G
39
+  2352, 2173, 1951, 2184,  516, 1587, 2418, 2016, 5409, 1123, 2380, 2524,  600, 1259, 1298, 1642, 2446,  383,  876, 1691, // H
40
+  3225, 1493, 1065, 1139,  741, 1066, 1624, 1354, 1123, 6417, 9630, 1858, 1975, 2225, 1260, 1558, 3131,  417, 1697, 7504, // I
41
+  5819, 3093, 2012, 2151, 1336, 2260, 3140, 2741, 2380, 9630,25113, 3677, 4187, 5540, 2670, 2876, 5272, 1063, 3945,11005, // L
42
+  4172, 5701, 2879, 3616,  591, 2751, 4704, 3496, 2524, 1858, 3677, 7430,  949,  975, 2355, 2847, 4340,  333, 1451, 2932, // K
43
+  1435,  763,  532,  595,  337,  570,  830,  741,  600, 1975, 4187,  949, 1300, 1111,  573,  743, 1361,  218,  828, 2310, // M
44
+  1579,  859,  688,  670,  549,  628,  852,  797, 1259, 2225, 5540,  975, 1111, 6126,  661,  856, 1498, 1000, 4464, 2602, // F
45
+  3728, 1893, 1480, 2086,  419, 1415, 2418, 2369, 1298, 1260, 2670, 2355,  573,  661,11834, 2320, 3300,  179,  876, 2179, // P
46
+  4610, 2287, 2304, 2828,  901, 1595, 2923, 3863, 1642, 1558, 2876, 2847,  743,  856, 2320, 3611, 4686,  272, 1188, 2695, // S
47
+  6264, 3487, 3204, 3843, 1197, 2323, 4159, 4169, 2446, 3131, 5272, 4340, 1361, 1498, 3300, 4686, 8995,  397, 1812, 5172, // T
48
+   418,  444,  219,  204,  187,  219,  278,  375,  383,  417, 1063,  333,  218, 1000,  179,  272,  397, 4101, 1266,  499, // W
49
+  1824, 1338, 1148, 1119,  664,  871, 1268, 1186,  876, 1697, 3945, 1451,  828, 4464,  876, 1188, 1812, 1266, 9380, 2227, // Y
50
+  5709, 2356, 1759, 2015, 1373, 1682, 2809, 2569, 1691, 7504,11005, 2932, 2310, 2602, 2179, 2695, 5172,  499, 2227,11569};// V
51
+
52
+const float Blosum30[]= {
53
+//    A      R      N      D      C      Q      E      G      H      I      L      K      M      F      P      S      T      W      Y      V 
54
+  0.0096,
55
+  0.0038,0.0109,
56
+  0.0031,0.0019,0.0055,
57
+  0.0043,0.0026,0.0027,0.0095,
58
+  0.0014,0.0011,0.0010,0.0010,0.0070,
59
+  0.0031,0.0031,0.0014,0.0018,0.0007,0.0039,
60
+  0.0044,0.0031,0.0024,0.0037,0.0018,0.0028,0.0094,
61
+  0.0052,0.0032,0.0032,0.0035,0.0011,0.0020,0.0035,0.0173,
62
+  0.0016,0.0014,0.0011,0.0011,0.0004,0.0010,0.0018,0.0015,0.0060,
63
+  0.0040,0.0022,0.0024,0.0018,0.0012,0.0016,0.0023,0.0036,0.0012,0.0072,
64
+  0.0056,0.0039,0.0032,0.0043,0.0023,0.0027,0.0051,0.0051,0.0022,0.0066,0.0139,
65
+  0.0044,0.0043,0.0027,0.0032,0.0010,0.0021,0.0053,0.0039,0.0013,0.0026,0.0044,0.0063,
66
+  0.0018,0.0012,0.0009,0.0008,0.0004,0.0007,0.0012,0.0013,0.0008,0.0014,0.0027,0.0017,0.0012,
67
+  0.0027,0.0023,0.0017,0.0011,0.0008,0.0010,0.0016,0.0022,0.0008,0.0027,0.0055,0.0023,0.0008,0.0077,
68
+  0.0028,0.0021,0.0012,0.0021,0.0007,0.0015,0.0030,0.0030,0.0014,0.0017,0.0027,0.0029,0.0005,0.0011,0.0091,
69
+  0.0056,0.0035,0.0028,0.0034,0.0013,0.0021,0.0038,0.0051,0.0017,0.0033,0.0047,0.0042,0.0010,0.0027,0.0024,0.0075,
70
+  0.0040,0.0019,0.0024,0.0024,0.0009,0.0016,0.0023,0.0028,0.0010,0.0027,0.0046,0.0025,0.0010,0.0016,0.0020,0.0041,0.0046,
71
+  0.0005,0.0007,0.0002,0.0004,0.0003,0.0004,0.0007,0.0011,0.0002,0.0005,0.0009,0.0006,0.0002,0.0007,0.0004,0.0005,0.0003,0.0027,
72
+  0.0014,0.0022,0.0008,0.0015,0.0004,0.0011,0.0016,0.0016,0.0010,0.0018,0.0045,0.0017,0.0007,0.0024,0.0011,0.0017,0.0014,0.0009,0.0044,
73
+  0.0056,0.0031,0.0022,0.0027,0.0012,0.0015,0.0026,0.0033,0.0012,0.0063,0.0074,0.0030,0.0015,0.0032,0.0017,0.0036,0.0036,0.0005,0.0024,0.0083};
74
+
75
+const float Blosum40[]= {
76
+//    A      R      N      D      C      Q      E      G      H      I      L      K      M      F      P      S      T      W      Y      V 
77
+  0.0148,
78
+  0.0029,0.0109,
79
+  0.0029,0.0019,0.0069,
80
+  0.0032,0.0021,0.0031,0.0126,
81
+  0.0015,0.0007,0.0007,0.0009,0.0093,
82
+  0.0026,0.0024,0.0017,0.0016,0.0004,0.0048,
83
+  0.0040,0.0026,0.0023,0.0048,0.0010,0.0032,0.0118,
84
+  0.0066,0.0023,0.0035,0.0031,0.0012,0.0020,0.0028,0.0260,
85
+  0.0014,0.0012,0.0013,0.0014,0.0003,0.0010,0.0015,0.0014,0.0060,
86
+  0.0037,0.0017,0.0017,0.0016,0.0008,0.0012,0.0018,0.0024,0.0009,0.0105,
87
+  0.0050,0.0030,0.0021,0.0027,0.0015,0.0023,0.0036,0.0034,0.0017,0.0082,0.0209,
88
+  0.0041,0.0051,0.0027,0.0030,0.0010,0.0026,0.0045,0.0035,0.0013,0.0023,0.0037,0.0099,
89
+  0.0017,0.0010,0.0007,0.0007,0.0003,0.0007,0.0010,0.0013,0.0007,0.0018,0.0037,0.0012,0.0018,
90
+  0.0022,0.0016,0.0013,0.0013,0.0008,0.0008,0.0015,0.0021,0.0009,0.0031,0.0055,0.0018,0.0011,0.0105,
91
+  0.0027,0.0014,0.0014,0.0019,0.0005,0.0013,0.0026,0.0028,0.0008,0.0020,0.0021,0.0023,0.0007,0.0010,0.0151,
92
+  0.0060,0.0025,0.0030,0.0030,0.0013,0.0026,0.0034,0.0052,0.0013,0.0025,0.0034,0.0034,0.0011,0.0019,0.0022,0.0089,
93
+  0.0040,0.0019,0.0022,0.0024,0.0011,0.0015,0.0025,0.0029,0.0009,0.0028,0.0039,0.0029,0.0011,0.0019,0.0022,0.0043,0.0070,
94
+  0.0007,0.0005,0.0003,0.0003,0.0001,0.0004,0.0005,0.0008,0.0002,0.0005,0.0009,0.0005,0.0002,0.0008,0.0003,0.0004,0.0003,0.0045,
95
+  0.0019,0.0016,0.0010,0.0011,0.0004,0.0010,0.0014,0.0017,0.0012,0.0020,0.0031,0.0017,0.0010,0.0032,0.0009,0.0015,0.0014,0.0008,0.0060,
96
+  0.0054,0.0023,0.0017,0.0022,0.0012,0.0014,0.0025,0.0028,0.0008,0.0083,0.0082,0.0027,0.0018,0.0031,0.0018,0.0032,0.0040,0.0005,0.0020,0.0113};
97
+ 
98
+const float Blosum50[]= {
99
+//    A      R      N      D      C      Q      E      G      H      I      L      K      M      F      P      S      T      W      Y      V 
100
+  0.0192,
101
+  0.0027,0.0152,
102
+  0.0024,0.0020,0.0101,
103
+  0.0026,0.0019,0.0035,0.0161,
104
+  0.0015,0.0005,0.0006,0.0005,0.0091,
105
+  0.0022,0.0025,0.0016,0.0017,0.0004,0.0057,
106
+  0.0034,0.0029,0.0023,0.0048,0.0006,0.0033,0.0141,
107
+  0.0062,0.0020,0.0031,0.0028,0.0009,0.0017,0.0023,0.0316,
108
+  0.0012,0.0013,0.0015,0.0011,0.0003,0.0010,0.0013,0.0011,0.0064,
109
+  0.0035,0.0015,0.0013,0.0012,0.0008,0.0011,0.0015,0.0018,0.0007,0.0140,
110
+  0.0048,0.0028,0.0017,0.0018,0.0014,0.0019,0.0026,0.0027,0.0013,0.0104,0.0304,
111
+  0.0033,0.0064,0.0027,0.0026,0.0006,0.0029,0.0043,0.0028,0.0014,0.0017,0.0027,0.0130,
112
+  0.0016,0.0009,0.0007,0.0005,0.0004,0.0008,0.0008,0.0009,0.0005,0.0022,0.0042,0.0010,0.0029,
113
+  0.0020,0.0012,0.0009,0.0008,0.0006,0.0007,0.0012,0.0015,0.0009,0.0030,0.0058,0.0012,0.0012,0.0154,
114
+  0.0022,0.0011,0.0011,0.0015,0.0004,0.0011,0.0019,0.0019,0.0006,0.0013,0.0017,0.0018,0.0005,0.0007,0.0171,
115
+  0.0062,0.0025,0.0032,0.0028,0.0011,0.0022,0.0030,0.0044,0.0012,0.0021,0.0029,0.0031,0.0010,0.0016,0.0018,0.0111,
116
+  0.0039,0.0021,0.0026,0.0022,0.0010,0.0015,0.0024,0.0025,0.0009,0.0029,0.0038,0.0026,0.0011,0.0015,0.0016,0.0047,0.0100,
117
+  0.0005,0.0004,0.0002,0.0002,0.0001,0.0003,0.0004,0.0005,0.0002,0.0005,0.0008,0.0004,0.0003,0.0009,0.0002,0.0003,0.0004,0.0059,
118
+  0.0015,0.0013,0.0009,0.0009,0.0004,0.0009,0.0012,0.0012,0.0013,0.0018,0.0027,0.0013,0.0007,0.0039,0.0006,0.0013,0.0012,0.0008,0.0077,
119
+  0.0054,0.0020,0.0015,0.0016,0.0013,0.0014,0.0020,0.0022,0.0007,0.0107,0.0092,0.0022,0.0021,0.0030,0.0016,0.0029,0.0041,0.0005,0.0018,0.0164};
120
+
121
+const float Blosum65[]= {
122
+//  A      R      N      D      C      Q      E      G      H      I      L      K      M      F      P      S      T      W      Y      V
123
+  0.0222,
124
+  0.0022,0.0181,
125
+  0.0019,0.0019,0.0148,
126
+  0.0021,0.0015,0.0037,0.0225,
127
+  0.0016,0.0004,0.0004,0.0004,0.0127,
128
+  0.0018,0.0024,0.0015,0.0016,0.0003,0.0076,
129
+  0.0029,0.0025,0.0021,0.0049,0.0003,0.0034,0.0168,
130
+  0.0057,0.0016,0.0027,0.0024,0.0007,0.0013,0.0018,0.0396,
131
+  0.0010,0.0013,0.0014,0.0009,0.0002,0.0010,0.0013,0.0009,0.0096,
132
+  0.0031,0.0012,0.0009,0.0011,0.0012,0.0008,0.0012,0.0013,0.0006,0.0191,
133
+  0.0043,0.0023,0.0013,0.0014,0.0016,0.0016,0.0019,0.0020,0.0009,0.0115,0.0388,
134
+  0.0032,0.0062,0.0024,0.0024,0.0005,0.0030,0.0040,0.0024,0.0012,0.0015,0.0024,0.0166,
135
+  0.0013,0.0007,0.0005,0.0004,0.0004,0.0007,0.0006,0.0007,0.0003,0.0025,0.0052,0.0008,0.0045,
136
+  0.0016,0.0009,0.0007,0.0007,0.0005,0.0005,0.0008,0.0011,0.0008,0.0030,0.0056,0.0009,0.0012,0.0186,
137
+  0.0021,0.0009,0.0008,0.0011,0.0003,0.0008,0.0014,0.0013,0.0005,0.0009,0.0013,0.0015,0.0004,0.0005,0.0195,
138
+  0.0065,0.0022,0.0030,0.0027,0.0011,0.0018,0.0029,0.0037,0.0011,0.0017,0.0024,0.0030,0.0008,0.0012,0.0016,0.0137,
139
+  0.0037,0.0017,0.0022,0.0019,0.0009,0.0013,0.0021,0.0022,0.0007,0.0027,0.0033,0.0023,0.0010,0.0012,0.0013,0.0048,0.0133,
140
+  0.0004,0.0003,0.0002,0.0001,0.0002,0.0002,0.0002,0.0004,0.0002,0.0004,0.0007,0.0003,0.0002,0.0009,0.0001,0.0003,0.0003,0.0074,
141
+  0.0013,0.0009,0.0007,0.0006,0.0004,0.0006,0.0008,0.0008,0.0016,0.0015,0.0023,0.0010,0.0006,0.0043,0.0004,0.0010,0.0009,0.0010,0.0113,
142
+  0.0049,0.0015,0.0011,0.0012,0.0014,0.0011,0.0016,0.0017,0.0006,0.0120,0.0094,0.0019,0.0023,0.0025,0.0012,0.0023,0.0035,0.0004,0.0015,0.0206};
143
+
144
+
145
+const float Blosum80[]= {
146
+//  A      R      N      D      C      Q      E      G      H      I      L      K      M      F      P      S      T      W      Y      V
147
+  0.0252,
148
+  0.0020,0.0210,
149
+  0.0016,0.0017,0.0166,
150
+  0.0018,0.0013,0.0037,0.0262,
151
+  0.0015,0.0003,0.0004,0.0003,0.0172,
152
+  0.0017,0.0024,0.0014,0.0014,0.0003,0.0094,
153
+  0.0028,0.0023,0.0019,0.0048,0.0003,0.0035,0.0208,
154
+  0.0053,0.0015,0.0025,0.0022,0.0006,0.0011,0.0017,0.0463,
155
+  0.0009,0.0012,0.0012,0.0008,0.0002,0.0011,0.0012,0.0008,0.0104,
156
+  0.0027,0.0010,0.0007,0.0008,0.0011,0.0007,0.0010,0.0009,0.0004,0.0220,
157
+  0.0036,0.0018,0.0011,0.0011,0.0014,0.0014,0.0015,0.0016,0.0008,0.0111,0.0442,
158
+  0.0029,0.0061,0.0022,0.0020,0.0004,0.0028,0.0036,0.0020,0.0010,0.0012,0.0019,0.0190,
159
+  0.0011,0.0006,0.0004,0.0003,0.0004,0.0007,0.0006,0.0005,0.0003,0.0025,0.0052,0.0007,0.0053,
160
+  0.0014,0.0007,0.0006,0.0006,0.0005,0.0005,0.0006,0.0009,0.0007,0.0027,0.0052,0.0007,0.0010,0.0211,
161
+  0.0021,0.0009,0.0007,0.0009,0.0003,0.0007,0.0012,0.0010,0.0004,0.0007,0.0012,0.0012,0.0003,0.0004,0.0221,
162
+  0.0064,0.0020,0.0029,0.0024,0.0010,0.0017,0.0026,0.0034,0.0010,0.0015,0.0021,0.0026,0.0007,0.0010,0.0014,0.0167,
163
+  0.0036,0.0015,0.0020,0.0016,0.0009,0.0012,0.0019,0.0019,0.0007,0.0024,0.0028,0.0020,0.0009,0.0011,0.0011,0.0048,0.0156,
164
+  0.0003,0.0002,0.0001,0.0001,0.0001,0.0002,0.0002,0.0003,0.0001,0.0003,0.0006,0.0002,0.0002,0.0007,0.0001,0.0002,0.0002,0.0087,
165
+  0.0011,0.0007,0.0006,0.0005,0.0003,0.0005,0.0006,0.0006,0.0016,0.0013,0.0020,0.0008,0.0005,0.0046,0.0003,0.0009,0.0008,0.0010,0.0148,
166
+  0.0046,0.0013,0.0009,0.0010,0.0013,0.0010,0.0015,0.0014,0.0005,0.0123,0.0089,0.0015,0.0022,0.0022,0.0010,0.0021,0.0033,0.0004,0.0012,0.0246};
167
+
168
+const float mism = 2000; // standard mismatch score
169
+const float matc = 7000;
170
+
171
+// RNA matrix, Gonnet type format  
172
+const float ribosum[]={
173
+//  A     C     G     T     U     R     Y     M     K     S     W     B     D     H     V     N     ?     ?     ?     ? 
174
+  9500,  600,  700,  900,  900, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // A
175
+   600, 4000,  300, 1200, 1200, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // C 
176
+   700,  300, 3500,  700,  700, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // G
177
+   900, 1200,  700, 8200, 8200, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // T
178
+   900, 1200,  700, 8200, 8200, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // U
179
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // R
180
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // Y
181
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // M
182
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // K
183
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // S
184
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // W
185
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // B
186
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // D
187
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // H
188
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // V
189
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // N
190
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // ?
191
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // ?
192
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // ?
193
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism};// ?
194
+
195
+const float dna_basic[]={
196
+//  A     C     G     T     U     R     Y     M     K     S     W     B     D     H     V     N     ?     ?     ?     ? 
197
+  matc, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // A
198
+  mism, matc, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // C 
199
+  mism, mism, matc, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // G
200
+  mism, mism, mism, matc, matc, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // T
201
+  mism, mism, mism, matc, matc, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // U
202
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // R
203
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // Y
204
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // M
205
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // K
206
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // S
207
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // W
208
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // B
209
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // D
210
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // H
211
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // V
212
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // N
213
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // ?
214
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // ?
215
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // ?
216
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism};// ?
217
+  
218
+const float dave_nucleo[]={
219
+//  A     C     G     T     U     R     Y     M     K     S     W     B     D     H     V     N     ?     ?     ?     ? 
220
+  8000, 1500, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // A
221
+  1500, 6500, 1000, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // C 
222
+  mism, 1000, 6500, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // G
223
+  mism, mism, mism, 7000, 7000, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // T
224
+  mism, mism, mism, 7000, 7000, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // U
225
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // R
226
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // Y
227
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // M
228
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // K
229
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // S
230
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // W
231
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // B
232
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // D
233
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // H
234
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // V
235
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // N
236
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // ?
237
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // ?
238
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // ?
239
+  mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism};// ?
240
+  
241
+// prediction accuracy of Psipred: 
242
+// Ppred[cf][B][A] = P(A,B,cf)/P(A)/P(B,cf) = P(A|B,cf)/P(A)
243
+// A = observed ss state  B = predicted ss state  cf = confidence value of prediction 
244
+//float Ppred[MAXCF][NSSPRED][NDSSP]= 
245
+const float Ppred[]= 
246
+  {
247
+//pred/obs  -      H      E      ~      S      T      G      B
248
+        1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , // - conf=-
249
+        1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , // H
250
+        1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , // E
251
+        1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , // ~
252
+        1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , // - conf=0
253
+        1.000, 1.128, 0.519, 0.834, 0.957, 1.488, 2.106, 1.085 , // H
254
+        1.000, 0.233, 2.240, 1.216, 0.913, 0.519, 0.923, 1.759 , // E
255
+        1.000, 0.640, 1.017, 1.122, 1.069, 1.242, 2.140, 1.999 , // ~
256
+        1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , // - conf=1
257
+        1.000, 1.251, 0.485, 0.771, 0.847, 1.371, 2.266, 0.864 , // H
258
+        1.000, 0.222, 2.542, 1.069, 0.804, 0.428, 0.671, 1.728 , // E
259
+        1.000, 0.474, 1.103, 1.295, 1.232, 1.214, 1.835, 1.989 , // ~
260
+        1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , // - conf=2
261
+        1.000, 1.383, 0.426, 0.637, 0.778, 1.349, 2.436, 0.824 , // H
262
+        1.000, 0.202, 2.769, 0.999, 0.714, 0.320, 0.551, 1.566 , // E
263
+        1.000, 0.395, 1.005, 1.407, 1.376, 1.336, 1.725, 2.063 , // ~
264
+        1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , // - conf=3
265
+        1.000, 1.531, 0.369, 0.552, 0.682, 1.280, 2.420, 0.698 , // H
266
+        1.000, 0.169, 2.970, 0.954, 0.556, 0.273, 0.489, 1.504 , // E
267
+        1.000, 0.352, 0.843, 1.515, 1.542, 1.456, 1.684, 1.958 , // ~
268
+        1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , // - conf=4
269
+        1.000, 1.750, 0.305, 0.444, 0.537, 1.134, 2.295, 0.600 , // H
270
+        1.000, 0.124, 3.179, 0.847, 0.513, 0.228, 0.413, 1.897 , // E
271
+        1.000, 0.282, 0.718, 1.664, 1.630, 1.577, 1.625, 1.877 , // ~
272
+        1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , // - conf=5
273
+        1.000, 1.952, 0.250, 0.353, 0.456, 0.982, 2.050, 0.466 , // H
274
+        1.000, 0.102, 3.464, 0.699, 0.453, 0.174, 0.284, 1.357 , // E
275
+        1.000, 0.227, 0.574, 1.782, 1.846, 1.681, 1.418, 1.885 , // ~
276
+        1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , // - conf=6
277
+        1.000, 2.183, 0.171, 0.263, 0.319, 0.792, 1.933, 0.345 , // H
278
+        1.000, 0.079, 3.712, 0.612, 0.281, 0.133, 0.196, 1.089 , // E
279
+        1.000, 0.173, 0.458, 1.915, 2.007, 1.766, 1.220, 1.704 , // ~
280
+        1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , // - conf=7
281
+        1.000, 2.389, 0.132, 0.192, 0.224, 0.605, 1.605, 0.183 , // H
282
+        1.000, 0.053, 3.997, 0.449, 0.201, 0.072, 0.141, 0.919 , // E
283
+        1.000, 0.109, 0.328, 2.013, 2.304, 1.882, 0.960, 1.512 , // ~
284
+        1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , // - conf=8
285
+        1.000, 2.668, 0.065, 0.098, 0.144, 0.354, 1.059, 0.102 , // H
286
+        1.000, 0.029, 4.285, 0.284, 0.113, 0.044, 0.059, 0.522 , // E
287
+        1.000, 0.053, 0.200, 2.099, 2.444, 2.133, 0.671, 1.290 , // ~
288
+        1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , // - conf=9
289
+        1.000, 2.966, 0.009, 0.023, 0.036, 0.113, 0.214, 0.017 , // H
290
+        1.000, 0.010, 4.555, 0.119, 0.027, 0.010, 0.013, 0.209 , // E
291
+        1.000, 0.026, 0.101, 2.576, 1.853, 2.204, 0.308, 0.499   // ~
292
+  };
293
+
294
+// float Ppred[]= 
295
+//   {
296
+// //pred/obs  -      H      E      ~      S      T      G      B
297
+//         1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , // - conf=-
298
+//         1.000, 2.000, 0.500, 1.000, 1.000, 1.000, 1.000, 0.500,  // H
299
+//         1.000, 0.500, 2.000, 0.500, 1.000, 1.000, 0.500, 1.000,  // E
300
+//         1.000, 1.000, 1.000, 2.000, 2.000, 2.000, 1.000, 1.000,  // ~
301
+//         1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , // - conf=0
302
+//         1.000, 2.000, 0.500, 1.000, 1.000, 1.000, 1.000, 0.500,  // H
303
+//         1.000, 0.500, 2.000, 0.500, 1.000, 1.000, 0.500, 1.000,  // E
304
+//         1.000, 1.000, 1.000, 2.000, 2.000, 2.000, 1.000, 1.000,  // ~
305
+//         1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , // - conf=1
306
+//         1.000, 2.000, 0.500, 1.000, 1.000, 1.000, 1.000, 0.500,  // H
307
+//         1.000, 0.500, 2.000, 0.500, 1.000, 1.000, 0.500, 1.000,  // E
308
+//         1.000, 1.000, 1.000, 2.000, 2.000, 2.000, 1.000, 1.000,  // ~
309
+//         1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , // - conf=2
310
+//         1.000, 2.000, 0.500, 1.000, 1.000, 1.000, 1.000, 0.500,  // H
311
+//         1.000, 0.500, 2.000, 0.500, 1.000, 1.000, 0.500, 1.000,  // E
312
+//         1.000, 1.000, 1.000, 2.000, 2.000, 2.000, 1.000, 1.000,  // ~
313
+//         1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , // - conf=3
314
+//         1.000, 2.000, 0.500, 1.000, 1.000, 1.000, 1.000, 0.500,  // H
315
+//         1.000, 0.500, 2.000, 0.500, 1.000, 1.000, 0.500, 1.000,  // E
316
+//         1.000, 1.000, 1.000, 2.000, 2.000, 2.000, 1.000, 1.000,  // ~
317
+//         1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , // - conf=4
318
+//         1.000, 2.000, 0.500, 1.000, 1.000, 1.000, 1.000, 0.500,  // H
319
+//         1.000, 0.500, 2.000, 0.500, 1.000, 1.000, 0.500, 1.000,  // E
320
+//         1.000, 1.000, 1.000, 2.000, 2.000, 2.000, 1.000, 1.000,  // ~
321
+//         1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , // - conf=5
322
+//         1.000, 2.000, 0.500, 1.000, 1.000, 1.000, 1.000, 0.500,  // H
323
+//         1.000, 0.500, 2.000, 0.500, 1.000, 1.000, 0.500, 1.000,  // E
324
+//         1.000, 1.000, 1.000, 2.000, 2.000, 2.000, 1.000, 1.000,  // ~
325
+//         1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , // - conf=6
326
+//         1.000, 2.000, 0.500, 1.000, 1.000, 1.000, 1.000, 0.500,  // H
327
+//         1.000, 0.500, 2.000, 0.500, 1.000, 1.000, 0.500, 1.000,  // E
328
+//         1.000, 1.000, 1.000, 2.000, 2.000, 2.000, 1.000, 1.000,  // ~
329
+//         1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , // - conf=7
330
+//         1.000, 2.000, 0.500, 1.000, 1.000, 1.000, 1.000, 0.500,  // H
331
+//         1.000, 0.500, 2.000, 0.500, 1.000, 1.000, 0.500, 1.000,  // E
332
+//         1.000, 1.000, 1.000, 2.000, 2.000, 2.000, 1.000, 1.000,  // ~
333
+//         1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , // - conf=8
334
+//         1.000, 2.000, 0.500, 1.000, 1.000, 1.000, 1.000, 0.500,  // H
335
+//         1.000, 0.500, 2.000, 0.500, 1.000, 1.000, 0.500, 1.000,  // E
336
+//         1.000, 1.000, 1.000, 2.000, 2.000, 2.000, 1.000, 1.000,  // ~
337
+//         1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , // - conf=9
338
+//         1.000, 2.000, 0.500, 1.000, 1.000, 1.000, 1.000, 0.500,  // H
339
+//         1.000, 0.500, 2.000, 0.500, 1.000, 1.000, 0.500, 1.000,  // E
340
+//         1.000, 1.000, 1.000, 2.000, 2.000, 2.000, 1.000, 1.000,  // ~
341
+
342
+
343
+float Pobs[]={0, 0.3268,0.2119,0.2061,0.0913,0.1143,0.0376,0.0120};
344
+
345
+float Pevo_full[]=
346
+  {
347
+//     -     H     E     C    
348
+    1.00, 0.00, 0.00, 0.00,
349
+    0.00, 0.94, 0.00, 0.04,  
350
+    0.00, 0.00, 0.92, 0.04,
351
+    0.00, 0.06, 0.08, 0.92
352
+  };
353
+
354
+//psipred accuracy for confidence values 0-9
355
+const float p_acc[]={0.00,0.47,0.53,0.56,0.58,0.62,0.69,0.74,0.82,0.88,0.96}; 
356
+
357
+/**
358
+ * @brief
359
+ */
360
+void 
361
+SetBlosumMatrix(const float BlosumXX[])
362
+{
363
+  int a,b,n=0;
364
+  if (v>=3) printf("Using the BLOSUM%2i matrix\n",par.matrix);
365
+  for (a=0; a<20; ++a)
366
+    for (pb[a]=0.0f, b=0; b<=a; ++b,++n)
367
+      P[a][b] = BlosumXX[n];
368
+  for (a=0; a<19; a++)
369
+    for (b=a+1; b<20; ++b)
370
+      P[a][b] = P[b][a];
371
+  for (a=0; a<20; ++a) P[a][20]=P[20][a]=1.0f;
372
+  return;
373
+}
374
+
375
+/////////////////////////////////////////////////////////////////////////////////////
376
+/**
377
+ * @brief Set (global variable) substitution matrix with derived matrices and background frequencies
378
+ */
379
+void 
380
+SetSubstitutionMatrix()
381
+{
382
+  int a,b;
383
+  switch (par.matrix)
384
+    {
385
+    default:
386
+    case 0:  //Gonnet matrix
387
+      if (v>=3) cout<<"Using the Gonnet matrix ";
388
+      for (a=0; a<20; ++a)
389
+	for (pb[a]=0.0f, b=0; b<20; ++b)
390
+	  P[a][b] = 0.000001f*Gonnet[a*20+b];
391
+      for (a=0; a<20; ++a) P[a][20]=P[20][a]=1.0f;
392
+      break;
393
+
394
+    case 30:  //BLOSUM30
395
+      SetBlosumMatrix(Blosum30);
396
+      break;
397
+    case 40:  //BLOSUM40
398
+      SetBlosumMatrix(Blosum40);
399
+      break;
400
+    case 50:  //BLOSUM50
401
+      SetBlosumMatrix(Blosum50);
402
+      break;
403
+    case 65:  //BLOSUM65
404
+      SetBlosumMatrix(Blosum65);
405
+      break;
406
+    case 80:  //BLOSUM80
407
+      SetBlosumMatrix(Blosum80);
408
+      break;
409
+   }
410
+  
411
+  // Check transition probability matrix, renormalize P and calculate pb[a]
412
+  float sumab=0.0f;
413
+  for (a=0; a<20; a++)
414
+    for (b=0; b<20; ++b) sumab+=P[a][b];
415
+  for (a=0; a<20; a++)
416
+    for (b=0; b<20; ++b) P[a][b]/=sumab;
417
+  for (a=0; a<20; a++)
418
+    for (pb[a]=0.0f, b=0; b<20; ++b) pb[a]+=P[a][b];
419
+
420
+  //Compute similarity matrix for amino acid pairs (for calculating consensus sequence)
421
+  for (a=0; a<20; ++a)
422
+    for (b=0; b<20; ++b)
423
+      Sim[a][b] = P[a][b]*P[a][b]/P[a][a]/P[b][b];
424
+
425
+  //Precompute matrix R for amino acid pseudocounts:
426
+  for (a=0; a<20; ++a)
427
+    for (b=0; b<20; ++b)   
428
+      R[a][b] = P[a][b]/pb[b]; //R[a][b]=P(a|b)
429
+  
430
+  //Precompute matrix R for amino acid pseudocounts:
431
+  for (a=0; a<20; ++a)
432
+    for (b=0; b<20; ++b)   
433
+      S[a][b] = log2(R[a][b]/pb[a]); // S[a][b] = log2(P(a,b)/P(a)/P(b))
434
+  
435
+  // Evaluate sequence identity underlying substitution matrix
436
+  if (v>=3)
437
+    {
438
+      float id=0.0f;
439
+      float entropy=0.0f; 
440
+      float entropy_pb=0.0f;
441
+      float mut_info=0.0f;
442
+      for (a=0; a<20; ++a) id+=P[a][a];
443
+      for (a=0; a<20; ++a) entropy_pb-=pb[a]*log2(pb[a]);
444
+      for (a=0; a<20; ++a) 
445
+	  for (b=0; b<20; ++b) 
446
+	    {
447
+	      entropy-=P[a][b]*log2(R[a][b]);
448
+	      mut_info += P[a][b]*S[a][b];
449
+	    }
450
+      
451
+      printf(": sequence identity = %2.0f%%; entropy per column = %4.2f bits (out of %4.2f); mutual information = %4.2f bits\n",100*id,entropy,entropy_pb,mut_info);
452
+    }
453
+
454
+  if (v>=4) //Debugging: probability matrix and dissimilarity matrix 
455
+    {
456
+      cout<<"Check matrix: before renormalization sum P(a,b)= "<<sumab<<"...\n";//PRINT
457
+      cout<<"      A    R    N    D    C    Q    E    G    H    I    L    K    M    F    P    S    T    W    Y    V\n";
458
+      cout<<"p[] ";
459
+      for (a=0; a<20; a++)  printf("%4.1f ",100*pb[a]);
460
+      cout<<endl<<"\nSubstitution matrix log2( P(a,b)/p(a)/p(b) ) (in bits):\n";
461
+      cout<<"      A    R    N    D    C    Q    E    G    H    I    L    K    M    F    P    S    T    W    Y    V\n";
462
+      for (b=0; b<20; b++)
463
+	{
464
+	  cout<<i2aa(b)<<"   ";
465
+	  for (a=0; a<20; a++)  printf("%4.1f ",S[a][b]);
466
+	  cout<<endl;
467
+	}
468
+      cout<<endl<<"\nOdds matrix P(a,b)/p(a)/p(b):\n";
469
+      cout<<"      A    R    N    D    C    Q    E    G    H    I    L    K    M    F    P    S    T    W    Y    V\n";
470
+      for (b=0; b<20; b++)
471
+	{
472
+	  cout<<i2aa(b)<<"   ";
473
+	  for (a=0; a<20; a++)  printf("%4.1f ",P[b][a]/pb[a]/pb[b]);
474
+	  cout<<endl;
475
+	}
476
+      cout<<endl<<"\nMatrix of conditional probabilities P(a|b) = P(a,b)/p(b) (in %):\n";
477
+      cout<<"      A    R    N    D    C    Q    E    G    H    I    L    K    M    F    P    S    T    W    Y    V\n";
478
+      for (b=0; b<20; b++)
479
+	{
480
+	  cout<<i2aa(b)<<"   ";
481
+	  for (a=0; a<20; a++)  printf("%4.1f ",100*R[b][a]);
482
+	  cout<<endl;
483
+	}
484
+      cout<<endl<<"\nProbability matrix P(a,b) (in %):\n";
485
+      cout<<"      A     R     N     D     C     Q     E     G     H     I     L     K     M     F     P     S     T     W     Y     V\n";
486
+      for (b=0; b<20; b++)
487
+	{
488
+	  cout<<i2aa(b)<<"   ";
489
+	  for (a=0; a<20; a++)  printf("%5.0f ",1000000*P[b][a]);
490
+	  cout<<endl;
491
+	}
492
+      cout<<endl<<"Similarity matrix P(a,b)^2/P(a,a,)/P(b,b) (in %):\n";
493
+      cout<<"      A    R    N    D    C    Q    E    G    H    I    L    K    M    F    P    S    T    W    Y    V\n";
494
+      for (b=0; b<20; b++)
495
+	{
496
+	  cout<<i2aa(b)<<"   ";
497
+	  for (a=0; a<20; a++)  printf("%4.0f ",100*Sim[b][a]);
498
+	  cout<<endl;
499
+	}
500
+      cout<<endl;
501
+
502
+
503
+    }
504
+}
505
+ 
506
+/////////////////////////////////////////////////////////////////////////////////////
507
+/**
508
+ * @brief Set (global variable) substitution matrix with derived matrices and background frequencies
509
+ */
510
+void 
511
+SetRnaSubstitutionMatrix()
512
+{
513
+	printf("SET RNA SUBSTITUTION MATRIX ....");
514
+  int a,b;
515
+  for (a=0; a<20; ++a)
516
+	for (pb[a]=0.0f, b=0; b<20; ++b)
517
+	  P[a][b] = 0.000001f*ribosum[a*20+b];
518
+      for (a=0; a<20; ++a) P[a][20]=P[20][a]=1.0f;
519
+  
520
+  // Check transition probability matrix, renormalize P and calculate pb[a]
521
+  float sumab=0.0f;
522
+  for (a=0; a<20; a++)
523
+    for (b=0; b<20; ++b) sumab+=P[a][b];
524
+  for (a=0; a<20; a++)
525
+    for (b=0; b<20; ++b) P[a][b]/=sumab;
526
+  for (a=0; a<20; a++)
527
+    for (pb[a]=0.0f, b=0; b<20; ++b) pb[a]+=P[a][b];
528
+
529
+  //Compute similarity matrix for amino acid pairs (for calculating consensus sequence)
530
+  for (a=0; a<20; ++a)
531
+    for (b=0; b<20; ++b)
532
+      Sim[a][b] = P[a][b]*P[a][b]/P[a][a]/P[b][b];
533
+
534
+  //Precompute matrix R for amino acid pseudocounts:
535
+  for (a=0; a<20; ++a)
536
+    for (b=0; b<20; ++b)   
537
+      R[a][b] = P[a][b]/pb[b]; //R[a][b]=P(a|b)
538
+  
539
+  //Precompute matrix R for amino acid pseudocounts:
540
+  for (a=0; a<20; ++a)
541
+    for (b=0; b<20; ++b)   
542
+      S[a][b] = log2(R[a][b]/pb[a]); // S[a][b] = log2(P(a,b)/P(a)/P(b))
543
+  
544
+  // Evaluate sequence identity underlying substitution matrix
545
+  if (v>=3)
546
+    {
547
+      float id=0.0f;
548
+      float entropy=0.0f; 
549
+      float entropy_pb=0.0f;
550
+      float mut_info=0.0f;
551
+      for (a=0; a<20; ++a) id+=P[a][a];
552
+      for (a=0; a<20; ++a) entropy_pb-=pb[a]*log2(pb[a]);
553
+      for (a=0; a<20; ++a) 
554
+	  for (b=0; b<20; ++b) 
555
+	    {
556
+	      entropy-=P[a][b]*log2(R[a][b]);
557
+	      mut_info += P[a][b]*S[a][b];
558
+	    }
559
+      
560
+      printf(": sequence identity = %2.0f%%; entropy per column = %4.2f bits (out of %4.2f); mutual information = %4.2f bits\n",100*id,entropy,entropy_pb,mut_info);
561
+    }
562
+
563
+  if (v>=4) //Debugging: probability matrix and dissimilarity matrix 
564
+    {
565
+      cout<<"Check matrix: before renormalization sum P(a,b)= "<<sumab<<"...\n";//PRINT
566
+      cout<<"      A    C    G    T    U    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?\n";
567
+      cout<<"p[] ";
568
+      for (a=0; a<20; a++)  printf("%4.1f ",100*pb[a]);
569
+      cout<<endl<<"\nSubstitution matrix log2( P(a,b)/p(a)/p(b) ) (in bits):\n";
570
+      cout<<"      A    C    G    T    U    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?\n";
571
+      for (b=0; b<20; b++)
572
+	{
573
+	  cout<<i2aa(b)<<"   ";
574
+	  for (a=0; a<20; a++)  printf("%4.1f ",S[a][b]);
575
+	  cout<<endl;
576
+	}
577
+      cout<<endl<<"\nOdds matrix P(a,b)/p(a)/p(b):\n";
578
+      cout<<"      A    C    G    T    U    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?\n";
579
+      for (b=0; b<20; b++)
580
+	{
581
+	  cout<<i2aa(b)<<"   ";
582
+	  for (a=0; a<20; a++)  printf("%4.1f ",P[b][a]/pb[a]/pb[b]);
583
+	  cout<<endl;
584
+	}
585
+      cout<<endl<<"\nMatrix of conditional probabilities P(a|b) = P(a,b)/p(b) (in %):\n";
586
+      cout<<"      A    C    G    T    U    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?\n";
587
+      for (b=0; b<20; b++)
588
+	{
589
+	  cout<<i2aa(b)<<"   ";
590
+	  for (a=0; a<20; a++)  printf("%4.1f ",100*R[b][a]);
591
+	  cout<<endl;
592
+	}
593
+      cout<<endl<<"\nProbability matrix P(a,b) (in %):\n";
594
+      cout<<"      A    C    G    T    U    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?\n";
595
+      for (b=0; b<20; b++)
596
+	{
597
+	  cout<<i2aa(b)<<"   ";
598
+	  for (a=0; a<20; a++)  printf("%5.0f ",1000000*P[b][a]);
599
+	  cout<<endl;
600
+	}
601
+      cout<<endl<<"Similarity matrix P(a,b)^2/P(a,a,)/P(b,b) (in %):\n";
602
+      cout<<"      A    C    G    T    U    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?\n";
603
+      for (b=0; b<20; b++)
604
+	{
605
+	  cout<<i2aa(b)<<"   ";
606
+	  for (a=0; a<20; a++)  printf("%4.0f ",100*Sim[b][a]);
607
+	  cout<<endl;
608
+	}
609
+      cout<<endl;
610
+
611
+
612
+    }
613
+}
614
+ 
615
+/////////////////////////////////////////////////////////////////////////////////////
616
+/**
617
+ * @brief Set (global variable) substitution matrix with derived matrices and background frequencies
618
+ */
619
+void 
620
+SetDnaSubstitutionMatrix()
621
+{
622
+
623
+  int a,b;
624
+  for (a=0; a<20; ++a)
625
+	for (pb[a]=0.0f, b=0; b<20; ++b)
626
+	  P[a][b] = 0.000001f*dna_basic[a*20+b];
627
+      for (a=0; a<20; ++a) P[a][20]=P[20][a]=1.0f;
628
+  
629
+  // Check transition probability matrix, renormalize P and calculate pb[a]
630
+  float sumab=0.0f;
631
+  for (a=0; a<20; a++)
632
+    for (b=0; b<20; ++b) sumab+=P[a][b];
633
+  for (a=0; a<20; a++)
634
+    for (b=0; b<20; ++b) P[a][b]/=sumab;
635
+  for (a=0; a<20; a++)
636
+    for (pb[a]=0.0f, b=0; b<20; ++b) pb[a]+=P[a][b];
637
+
638
+  //Compute similarity matrix for amino acid pairs (for calculating consensus sequence)
639
+  for (a=0; a<20; ++a)
640
+    for (b=0; b<20; ++b)
641
+      Sim[a][b] = P[a][b]*P[a][b]/P[a][a]/P[b][b];
642
+
643
+  //Precompute matrix R for amino acid pseudocounts:
644
+  for (a=0; a<20; ++a)
645
+    for (b=0; b<20; ++b)   
646
+      R[a][b] = P[a][b]/pb[b]; //R[a][b]=P(a|b)
647
+  
648
+  //Precompute matrix R for amino acid pseudocounts:
649
+  for (a=0; a<20; ++a)
650
+    for (b=0; b<20; ++b)   
651
+      S[a][b] = log2(R[a][b]/pb[a]); // S[a][b] = log2(P(a,b)/P(a)/P(b))
652
+  
653
+  // Evaluate sequence identity underlying substitution matrix
654
+  if (v>=3)
655
+    {
656
+      float id=0.0f;
657
+      float entropy=0.0f; 
658
+      float entropy_pb=0.0f;
659
+      float mut_info=0.0f;
660
+      for (a=0; a<20; ++a) id+=P[a][a];
661
+      for (a=0; a<20; ++a) entropy_pb-=pb[a]*log2(pb[a]);
662
+      for (a=0; a<20; ++a) 
663
+	  for (b=0; b<20; ++b) 
664
+	    {
665
+	      entropy-=P[a][b]*log2(R[a][b]);
666
+	      mut_info += P[a][b]*S[a][b];
667
+	    }
668
+      
669
+      printf(": sequence identity = %2.0f%%; entropy per column = %4.2f bits (out of %4.2f); mutual information = %4.2f bits\n",100*id,entropy,entropy_pb,mut_info);
670
+    }
671
+
672
+  if (v>=4) //Debugging: probability matrix and dissimilarity matrix 
673
+    {
674
+      cout<<"Check matrix: before renormalization sum P(a,b)= "<<sumab<<"...\n";//PRINT
675
+      cout<<"      A    C    G    T    U    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?\n";
676
+      cout<<"p[] ";
677
+      for (a=0; a<20; a++)  printf("%4.1f ",100*pb[a]);
678
+      cout<<endl<<"\nSubstitution matrix log2( P(a,b)/p(a)/p(b) ) (in bits):\n";
679
+      cout<<"      A    C    G    T    U    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?\n";
680
+      for (b=0; b<20; b++)
681
+	{
682
+	  cout<<i2aa(b)<<"   ";
683
+	  for (a=0; a<20; a++)  printf("%4.1f ",S[a][b]);
684
+	  cout<<endl;
685
+	}
686
+      cout<<endl<<"\nOdds matrix P(a,b)/p(a)/p(b):\n";
687
+      cout<<"      A    C    G    T    U    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?\n";
688
+      for (b=0; b<20; b++)
689
+	{
690
+	  cout<<i2aa(b)<<"   ";
691
+	  for (a=0; a<20; a++)  printf("%4.1f ",P[b][a]/pb[a]/pb[b]);
692
+	  cout<<endl;
693
+	}
694
+      cout<<endl<<"\nMatrix of conditional probabilities P(a|b) = P(a,b)/p(b) (in %):\n";
695
+      cout<<"      A    C    G    T    U    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?\n";
696
+      for (b=0; b<20; b++)
697
+	{
698
+	  cout<<i2aa(b)<<"   ";
699
+	  for (a=0; a<20; a++)  printf("%4.1f ",100*R[b][a]);
700
+	  cout<<endl;
701
+	}
702
+      cout<<endl<<"\nProbability matrix P(a,b) (in %):\n";
703
+      cout<<"      A    C    G    T    U    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?\n";
704
+      for (b=0; b<20; b++)
705
+	{
706
+	  cout<<i2aa(b)<<"   ";
707
+	  for (a=0; a<20; a++)  printf("%5.0f ",1000000*P[b][a]);
708
+	  cout<<endl;
709
+	}
710
+      cout<<endl<<"Similarity matrix P(a,b)^2/P(a,a,)/P(b,b) (in %):\n";
711
+      cout<<"      A    C    G    T    U    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?    ?\n";
712
+      for (b=0; b<20; b++)
713
+	{
714
+	  cout<<i2aa(b)<<"   ";
715
+	  for (a=0; a<20; a++)  printf("%4.0f ",100*Sim[b][a]);
716
+	  cout<<endl;
717
+	}
718
+      cout<<endl;
719
+
720
+
721
+    }
722
+}
723
+
724
+/////////////////////////////////////////////////////////////////////////////////////
725
+/**
726
+ * @brief Set secondary structure substitution matrix
727
+ */
728
+void 
729
+SetSecStrucSubstitutionMatrix()
730
+{
731
+  int A;        //observed ss state (determined dssp)
732
+  int B,BB;     //predicted ss states (by psipred)
733
+  int cf,ccf;   //confidence value of prediction
734
+  float P73[NDSSP][NSSPRED][MAXCF];  //P73[cf][B][A] = P(A,B,cf)/P(A)/P(B,cf) = P(A|B,cf)/P(A)
735
+  float sum;
736
+
737
+  // S73[A][B][cf][b] = score for matching observed ss state A in query with state B in template
738
+  // predicted with confidence cf, when query and template columns are diverged by b units 
739
+  for (cf=0; cf<MAXCF; cf++)
740
+    for (A=0; A<NDSSP; A++)
741
+      for (B=0; B<NSSPRED; B++)
742
+	{
743
+	  P73[A][B][cf] = 1.-par.ssa + par.ssa*Ppred[cf*NSSPRED*NDSSP + B*NDSSP + A];
744
+	  S73[A][B][cf] = log2(P73[A][B][cf]);
745
+	}
746
+
747
+  for (B=0; B<NSSPRED; B++)
748
+    for (cf=0; cf<MAXCF; cf++)
749
+      for (BB=0; BB<NSSPRED; BB++)
750
+	for (ccf=0; ccf<MAXCF; ccf++)
751
+	  {
752
+	    sum=0;
753
+	    for (A=1; A<NDSSP; A++)
754
+	      sum += P73[A][B][cf] * P73[A][BB][ccf] * Pobs[A];
755
+	    S33[B][cf][BB][ccf] = log2(sum);
756
+	  }  
757
+} /* this is the end of SetSecStrucSubstitutionMatrix() */
758
+
759
+
760
+
761
+/*
762
+ * EOF hhmatrices-C.h
763
+ */