msa
gitsvnid: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/msa@102253 bc3139a867e503109ffcced21a209358
1  1 
new file mode 100644 
...  ... 
@@ 0,0 +1,763 @@ 
1 
+/* * mode: c; tabwidth: 4; cbasicoffset: 4; indenttabsmode: nil * */ 

2 
+ 

3 
+/********************************************************************* 

4 
+ * Clustal Omega  Multiple sequence alignment 

5 
+ * 

6 
+ * Copyright (C) 2010 University College Dublin 

7 
+ * 

8 
+ * ClustalOmega is free software; you can redistribute it and/or 

9 
+ * modify it under the terms of the GNU General Public License as 

10 
+ * published by the Free Software Foundation; either version 2 of the 

11 
+ * License, or (at your option) any later version. 

12 
+ * 

13 
+ * This file is part of ClustalOmega. 

14 
+ * 

15 
+ ********************************************************************/ 

16 
+ 

17 
+/* 

18 
+ * RCS $Id: hhmatricesC.h 274 20120424 23:28:24Z dave $ 

19 
+ */ 

20 
+ 

21 
+// Substitution matrices and their background frequencies 

22 
+ 

23 
+// The following background frequencies were calculated by the formula pa = (P[a,b]/(pa*pb))^(1) * (1,...,1) 

24 
+// For the Blousum50matrix this becomes pb[a]= SUM_(b=1,20) (2^(BLOSUM50[a,b]/3))^(1) 

25 
+// A R N D C Q E G H I L K M F P S T W Y V 

26 
+// Gonnet 7.68,5.14,4.02,5.41,1.89,3.27,5.99,7.56,3.69,5.06,10.01,5.97,2.20,3.50,4.54,4.67,7.12,1.25,3.95,7.28 

27 
+// BLOSUM50 8.24,6.24,4.46,4.77,2.03,2.90,6.78,6.69,2.53,6.89,10.7 ,5.04,1.49,4.93,3.97,5.95,6.13,1.34,3.45,6.28 

28 
+ 

29 
+const float Gonnet[]={ 

30 
+// A R N D C Q E G H I L K M F P S T W Y V 

31 
+ 10227, 3430, 2875, 3869, 1625, 2393, 4590, 6500, 2352, 3225, 5819, 4172, 1435, 1579, 3728, 4610, 6264, 418, 1824, 5709, // A 

32 
+ 3430, 7780, 2209, 2589, 584, 2369, 3368, 3080, 2173, 1493, 3093, 5701, 763, 859, 1893, 2287, 3487, 444, 1338, 2356, // R 

33 
+ 2875, 2209, 3868, 3601, 501, 1541, 2956, 3325, 1951, 1065, 2012, 2879, 532, 688, 1480, 2304, 3204, 219, 1148, 1759, // N 

34 
+ 3869, 2589, 3601, 8618, 488, 2172, 6021, 4176, 2184, 1139, 2151, 3616, 595, 670, 2086, 2828, 3843, 204, 1119, 2015, // D 

35 
+ 1625, 584, 501, 488, 5034, 355, 566, 900, 516, 741, 1336, 591, 337, 549, 419, 901, 1197, 187, 664, 1373, // C 

36 
+ 2393, 2369, 1541, 2172, 355, 1987, 2891, 1959, 1587, 1066, 2260, 2751, 570, 628, 1415, 1595, 2323, 219, 871, 1682, // Q 

37 
+ 4590, 3368, 2956, 6021, 566, 2891, 8201, 3758, 2418, 1624, 3140, 4704, 830, 852, 2418, 2923, 4159, 278, 1268, 2809, // E 

38 
+ 6500, 3080, 3325, 4176, 900, 1959, 3758,26066, 2016, 1354, 2741, 3496, 741, 797, 2369, 3863, 4169, 375, 1186, 2569, // G 

39 
+ 2352, 2173, 1951, 2184, 516, 1587, 2418, 2016, 5409, 1123, 2380, 2524, 600, 1259, 1298, 1642, 2446, 383, 876, 1691, // H 

40 
+ 3225, 1493, 1065, 1139, 741, 1066, 1624, 1354, 1123, 6417, 9630, 1858, 1975, 2225, 1260, 1558, 3131, 417, 1697, 7504, // I 

41 
+ 5819, 3093, 2012, 2151, 1336, 2260, 3140, 2741, 2380, 9630,25113, 3677, 4187, 5540, 2670, 2876, 5272, 1063, 3945,11005, // L 

42 
+ 4172, 5701, 2879, 3616, 591, 2751, 4704, 3496, 2524, 1858, 3677, 7430, 949, 975, 2355, 2847, 4340, 333, 1451, 2932, // K 

43 
+ 1435, 763, 532, 595, 337, 570, 830, 741, 600, 1975, 4187, 949, 1300, 1111, 573, 743, 1361, 218, 828, 2310, // M 

44 
+ 1579, 859, 688, 670, 549, 628, 852, 797, 1259, 2225, 5540, 975, 1111, 6126, 661, 856, 1498, 1000, 4464, 2602, // F 

45 
+ 3728, 1893, 1480, 2086, 419, 1415, 2418, 2369, 1298, 1260, 2670, 2355, 573, 661,11834, 2320, 3300, 179, 876, 2179, // P 

46 
+ 4610, 2287, 2304, 2828, 901, 1595, 2923, 3863, 1642, 1558, 2876, 2847, 743, 856, 2320, 3611, 4686, 272, 1188, 2695, // S 

47 
+ 6264, 3487, 3204, 3843, 1197, 2323, 4159, 4169, 2446, 3131, 5272, 4340, 1361, 1498, 3300, 4686, 8995, 397, 1812, 5172, // T 

48 
+ 418, 444, 219, 204, 187, 219, 278, 375, 383, 417, 1063, 333, 218, 1000, 179, 272, 397, 4101, 1266, 499, // W 

49 
+ 1824, 1338, 1148, 1119, 664, 871, 1268, 1186, 876, 1697, 3945, 1451, 828, 4464, 876, 1188, 1812, 1266, 9380, 2227, // Y 

50 
+ 5709, 2356, 1759, 2015, 1373, 1682, 2809, 2569, 1691, 7504,11005, 2932, 2310, 2602, 2179, 2695, 5172, 499, 2227,11569};// V 

51 
+ 

52 
+const float Blosum30[]= { 

53 
+// A R N D C Q E G H I L K M F P S T W Y V 

54 
+ 0.0096, 

55 
+ 0.0038,0.0109, 

56 
+ 0.0031,0.0019,0.0055, 

57 
+ 0.0043,0.0026,0.0027,0.0095, 

58 
+ 0.0014,0.0011,0.0010,0.0010,0.0070, 

59 
+ 0.0031,0.0031,0.0014,0.0018,0.0007,0.0039, 

60 
+ 0.0044,0.0031,0.0024,0.0037,0.0018,0.0028,0.0094, 

61 
+ 0.0052,0.0032,0.0032,0.0035,0.0011,0.0020,0.0035,0.0173, 

62 
+ 0.0016,0.0014,0.0011,0.0011,0.0004,0.0010,0.0018,0.0015,0.0060, 

63 
+ 0.0040,0.0022,0.0024,0.0018,0.0012,0.0016,0.0023,0.0036,0.0012,0.0072, 

64 
+ 0.0056,0.0039,0.0032,0.0043,0.0023,0.0027,0.0051,0.0051,0.0022,0.0066,0.0139, 

65 
+ 0.0044,0.0043,0.0027,0.0032,0.0010,0.0021,0.0053,0.0039,0.0013,0.0026,0.0044,0.0063, 

66 
+ 0.0018,0.0012,0.0009,0.0008,0.0004,0.0007,0.0012,0.0013,0.0008,0.0014,0.0027,0.0017,0.0012, 

67 
+ 0.0027,0.0023,0.0017,0.0011,0.0008,0.0010,0.0016,0.0022,0.0008,0.0027,0.0055,0.0023,0.0008,0.0077, 

68 
+ 0.0028,0.0021,0.0012,0.0021,0.0007,0.0015,0.0030,0.0030,0.0014,0.0017,0.0027,0.0029,0.0005,0.0011,0.0091, 

69 
+ 0.0056,0.0035,0.0028,0.0034,0.0013,0.0021,0.0038,0.0051,0.0017,0.0033,0.0047,0.0042,0.0010,0.0027,0.0024,0.0075, 

70 
+ 0.0040,0.0019,0.0024,0.0024,0.0009,0.0016,0.0023,0.0028,0.0010,0.0027,0.0046,0.0025,0.0010,0.0016,0.0020,0.0041,0.0046, 

71 
+ 0.0005,0.0007,0.0002,0.0004,0.0003,0.0004,0.0007,0.0011,0.0002,0.0005,0.0009,0.0006,0.0002,0.0007,0.0004,0.0005,0.0003,0.0027, 

72 
+ 0.0014,0.0022,0.0008,0.0015,0.0004,0.0011,0.0016,0.0016,0.0010,0.0018,0.0045,0.0017,0.0007,0.0024,0.0011,0.0017,0.0014,0.0009,0.0044, 

73 
+ 0.0056,0.0031,0.0022,0.0027,0.0012,0.0015,0.0026,0.0033,0.0012,0.0063,0.0074,0.0030,0.0015,0.0032,0.0017,0.0036,0.0036,0.0005,0.0024,0.0083}; 

74 
+ 

75 
+const float Blosum40[]= { 

76 
+// A R N D C Q E G H I L K M F P S T W Y V 

77 
+ 0.0148, 

78 
+ 0.0029,0.0109, 

79 
+ 0.0029,0.0019,0.0069, 

80 
+ 0.0032,0.0021,0.0031,0.0126, 

81 
+ 0.0015,0.0007,0.0007,0.0009,0.0093, 

82 
+ 0.0026,0.0024,0.0017,0.0016,0.0004,0.0048, 

83 
+ 0.0040,0.0026,0.0023,0.0048,0.0010,0.0032,0.0118, 

84 
+ 0.0066,0.0023,0.0035,0.0031,0.0012,0.0020,0.0028,0.0260, 

85 
+ 0.0014,0.0012,0.0013,0.0014,0.0003,0.0010,0.0015,0.0014,0.0060, 

86 
+ 0.0037,0.0017,0.0017,0.0016,0.0008,0.0012,0.0018,0.0024,0.0009,0.0105, 

87 
+ 0.0050,0.0030,0.0021,0.0027,0.0015,0.0023,0.0036,0.0034,0.0017,0.0082,0.0209, 

88 
+ 0.0041,0.0051,0.0027,0.0030,0.0010,0.0026,0.0045,0.0035,0.0013,0.0023,0.0037,0.0099, 

89 
+ 0.0017,0.0010,0.0007,0.0007,0.0003,0.0007,0.0010,0.0013,0.0007,0.0018,0.0037,0.0012,0.0018, 

90 
+ 0.0022,0.0016,0.0013,0.0013,0.0008,0.0008,0.0015,0.0021,0.0009,0.0031,0.0055,0.0018,0.0011,0.0105, 

91 
+ 0.0027,0.0014,0.0014,0.0019,0.0005,0.0013,0.0026,0.0028,0.0008,0.0020,0.0021,0.0023,0.0007,0.0010,0.0151, 

92 
+ 0.0060,0.0025,0.0030,0.0030,0.0013,0.0026,0.0034,0.0052,0.0013,0.0025,0.0034,0.0034,0.0011,0.0019,0.0022,0.0089, 

93 
+ 0.0040,0.0019,0.0022,0.0024,0.0011,0.0015,0.0025,0.0029,0.0009,0.0028,0.0039,0.0029,0.0011,0.0019,0.0022,0.0043,0.0070, 

94 
+ 0.0007,0.0005,0.0003,0.0003,0.0001,0.0004,0.0005,0.0008,0.0002,0.0005,0.0009,0.0005,0.0002,0.0008,0.0003,0.0004,0.0003,0.0045, 

95 
+ 0.0019,0.0016,0.0010,0.0011,0.0004,0.0010,0.0014,0.0017,0.0012,0.0020,0.0031,0.0017,0.0010,0.0032,0.0009,0.0015,0.0014,0.0008,0.0060, 

96 
+ 0.0054,0.0023,0.0017,0.0022,0.0012,0.0014,0.0025,0.0028,0.0008,0.0083,0.0082,0.0027,0.0018,0.0031,0.0018,0.0032,0.0040,0.0005,0.0020,0.0113}; 

97 
+ 

98 
+const float Blosum50[]= { 

99 
+// A R N D C Q E G H I L K M F P S T W Y V 

100 
+ 0.0192, 

101 
+ 0.0027,0.0152, 

102 
+ 0.0024,0.0020,0.0101, 

103 
+ 0.0026,0.0019,0.0035,0.0161, 

104 
+ 0.0015,0.0005,0.0006,0.0005,0.0091, 

105 
+ 0.0022,0.0025,0.0016,0.0017,0.0004,0.0057, 

106 
+ 0.0034,0.0029,0.0023,0.0048,0.0006,0.0033,0.0141, 

107 
+ 0.0062,0.0020,0.0031,0.0028,0.0009,0.0017,0.0023,0.0316, 

108 
+ 0.0012,0.0013,0.0015,0.0011,0.0003,0.0010,0.0013,0.0011,0.0064, 

109 
+ 0.0035,0.0015,0.0013,0.0012,0.0008,0.0011,0.0015,0.0018,0.0007,0.0140, 

110 
+ 0.0048,0.0028,0.0017,0.0018,0.0014,0.0019,0.0026,0.0027,0.0013,0.0104,0.0304, 

111 
+ 0.0033,0.0064,0.0027,0.0026,0.0006,0.0029,0.0043,0.0028,0.0014,0.0017,0.0027,0.0130, 

112 
+ 0.0016,0.0009,0.0007,0.0005,0.0004,0.0008,0.0008,0.0009,0.0005,0.0022,0.0042,0.0010,0.0029, 

113 
+ 0.0020,0.0012,0.0009,0.0008,0.0006,0.0007,0.0012,0.0015,0.0009,0.0030,0.0058,0.0012,0.0012,0.0154, 

114 
+ 0.0022,0.0011,0.0011,0.0015,0.0004,0.0011,0.0019,0.0019,0.0006,0.0013,0.0017,0.0018,0.0005,0.0007,0.0171, 

115 
+ 0.0062,0.0025,0.0032,0.0028,0.0011,0.0022,0.0030,0.0044,0.0012,0.0021,0.0029,0.0031,0.0010,0.0016,0.0018,0.0111, 

116 
+ 0.0039,0.0021,0.0026,0.0022,0.0010,0.0015,0.0024,0.0025,0.0009,0.0029,0.0038,0.0026,0.0011,0.0015,0.0016,0.0047,0.0100, 

117 
+ 0.0005,0.0004,0.0002,0.0002,0.0001,0.0003,0.0004,0.0005,0.0002,0.0005,0.0008,0.0004,0.0003,0.0009,0.0002,0.0003,0.0004,0.0059, 

118 
+ 0.0015,0.0013,0.0009,0.0009,0.0004,0.0009,0.0012,0.0012,0.0013,0.0018,0.0027,0.0013,0.0007,0.0039,0.0006,0.0013,0.0012,0.0008,0.0077, 

119 
+ 0.0054,0.0020,0.0015,0.0016,0.0013,0.0014,0.0020,0.0022,0.0007,0.0107,0.0092,0.0022,0.0021,0.0030,0.0016,0.0029,0.0041,0.0005,0.0018,0.0164}; 

120 
+ 

121 
+const float Blosum65[]= { 

122 
+// A R N D C Q E G H I L K M F P S T W Y V 

123 
+ 0.0222, 

124 
+ 0.0022,0.0181, 

125 
+ 0.0019,0.0019,0.0148, 

126 
+ 0.0021,0.0015,0.0037,0.0225, 

127 
+ 0.0016,0.0004,0.0004,0.0004,0.0127, 

128 
+ 0.0018,0.0024,0.0015,0.0016,0.0003,0.0076, 

129 
+ 0.0029,0.0025,0.0021,0.0049,0.0003,0.0034,0.0168, 

130 
+ 0.0057,0.0016,0.0027,0.0024,0.0007,0.0013,0.0018,0.0396, 

131 
+ 0.0010,0.0013,0.0014,0.0009,0.0002,0.0010,0.0013,0.0009,0.0096, 

132 
+ 0.0031,0.0012,0.0009,0.0011,0.0012,0.0008,0.0012,0.0013,0.0006,0.0191, 

133 
+ 0.0043,0.0023,0.0013,0.0014,0.0016,0.0016,0.0019,0.0020,0.0009,0.0115,0.0388, 

134 
+ 0.0032,0.0062,0.0024,0.0024,0.0005,0.0030,0.0040,0.0024,0.0012,0.0015,0.0024,0.0166, 

135 
+ 0.0013,0.0007,0.0005,0.0004,0.0004,0.0007,0.0006,0.0007,0.0003,0.0025,0.0052,0.0008,0.0045, 

136 
+ 0.0016,0.0009,0.0007,0.0007,0.0005,0.0005,0.0008,0.0011,0.0008,0.0030,0.0056,0.0009,0.0012,0.0186, 

137 
+ 0.0021,0.0009,0.0008,0.0011,0.0003,0.0008,0.0014,0.0013,0.0005,0.0009,0.0013,0.0015,0.0004,0.0005,0.0195, 

138 
+ 0.0065,0.0022,0.0030,0.0027,0.0011,0.0018,0.0029,0.0037,0.0011,0.0017,0.0024,0.0030,0.0008,0.0012,0.0016,0.0137, 

139 
+ 0.0037,0.0017,0.0022,0.0019,0.0009,0.0013,0.0021,0.0022,0.0007,0.0027,0.0033,0.0023,0.0010,0.0012,0.0013,0.0048,0.0133, 

140 
+ 0.0004,0.0003,0.0002,0.0001,0.0002,0.0002,0.0002,0.0004,0.0002,0.0004,0.0007,0.0003,0.0002,0.0009,0.0001,0.0003,0.0003,0.0074, 

141 
+ 0.0013,0.0009,0.0007,0.0006,0.0004,0.0006,0.0008,0.0008,0.0016,0.0015,0.0023,0.0010,0.0006,0.0043,0.0004,0.0010,0.0009,0.0010,0.0113, 

142 
+ 0.0049,0.0015,0.0011,0.0012,0.0014,0.0011,0.0016,0.0017,0.0006,0.0120,0.0094,0.0019,0.0023,0.0025,0.0012,0.0023,0.0035,0.0004,0.0015,0.0206}; 

143 
+ 

144 
+ 

145 
+const float Blosum80[]= { 

146 
+// A R N D C Q E G H I L K M F P S T W Y V 

147 
+ 0.0252, 

148 
+ 0.0020,0.0210, 

149 
+ 0.0016,0.0017,0.0166, 

150 
+ 0.0018,0.0013,0.0037,0.0262, 

151 
+ 0.0015,0.0003,0.0004,0.0003,0.0172, 

152 
+ 0.0017,0.0024,0.0014,0.0014,0.0003,0.0094, 

153 
+ 0.0028,0.0023,0.0019,0.0048,0.0003,0.0035,0.0208, 

154 
+ 0.0053,0.0015,0.0025,0.0022,0.0006,0.0011,0.0017,0.0463, 

155 
+ 0.0009,0.0012,0.0012,0.0008,0.0002,0.0011,0.0012,0.0008,0.0104, 

156 
+ 0.0027,0.0010,0.0007,0.0008,0.0011,0.0007,0.0010,0.0009,0.0004,0.0220, 

157 
+ 0.0036,0.0018,0.0011,0.0011,0.0014,0.0014,0.0015,0.0016,0.0008,0.0111,0.0442, 

158 
+ 0.0029,0.0061,0.0022,0.0020,0.0004,0.0028,0.0036,0.0020,0.0010,0.0012,0.0019,0.0190, 

159 
+ 0.0011,0.0006,0.0004,0.0003,0.0004,0.0007,0.0006,0.0005,0.0003,0.0025,0.0052,0.0007,0.0053, 

160 
+ 0.0014,0.0007,0.0006,0.0006,0.0005,0.0005,0.0006,0.0009,0.0007,0.0027,0.0052,0.0007,0.0010,0.0211, 

161 
+ 0.0021,0.0009,0.0007,0.0009,0.0003,0.0007,0.0012,0.0010,0.0004,0.0007,0.0012,0.0012,0.0003,0.0004,0.0221, 

162 
+ 0.0064,0.0020,0.0029,0.0024,0.0010,0.0017,0.0026,0.0034,0.0010,0.0015,0.0021,0.0026,0.0007,0.0010,0.0014,0.0167, 

163 
+ 0.0036,0.0015,0.0020,0.0016,0.0009,0.0012,0.0019,0.0019,0.0007,0.0024,0.0028,0.0020,0.0009,0.0011,0.0011,0.0048,0.0156, 

164 
+ 0.0003,0.0002,0.0001,0.0001,0.0001,0.0002,0.0002,0.0003,0.0001,0.0003,0.0006,0.0002,0.0002,0.0007,0.0001,0.0002,0.0002,0.0087, 

165 
+ 0.0011,0.0007,0.0006,0.0005,0.0003,0.0005,0.0006,0.0006,0.0016,0.0013,0.0020,0.0008,0.0005,0.0046,0.0003,0.0009,0.0008,0.0010,0.0148, 

166 
+ 0.0046,0.0013,0.0009,0.0010,0.0013,0.0010,0.0015,0.0014,0.0005,0.0123,0.0089,0.0015,0.0022,0.0022,0.0010,0.0021,0.0033,0.0004,0.0012,0.0246}; 

167 
+ 

168 
+const float mism = 2000; // standard mismatch score 

169 
+const float matc = 7000; 

170 
+ 

171 
+// RNA matrix, Gonnet type format 

172 
+const float ribosum[]={ 

173 
+// A C G T U R Y M K S W B D H V N ? ? ? ? 

174 
+ 9500, 600, 700, 900, 900, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // A 

175 
+ 600, 4000, 300, 1200, 1200, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // C 

176 
+ 700, 300, 3500, 700, 700, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // G 

177 
+ 900, 1200, 700, 8200, 8200, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // T 

178 
+ 900, 1200, 700, 8200, 8200, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // U 

179 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // R 

180 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // Y 

181 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // M 

182 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // K 

183 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // S 

184 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // W 

185 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // B 

186 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // D 

187 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // H 

188 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // V 

189 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // N 

190 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // ? 

191 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // ? 

192 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // ? 

193 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism};// ? 

194 
+ 

195 
+const float dna_basic[]={ 

196 
+// A C G T U R Y M K S W B D H V N ? ? ? ? 

197 
+ matc, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // A 

198 
+ mism, matc, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // C 

199 
+ mism, mism, matc, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // G 

200 
+ mism, mism, mism, matc, matc, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // T 

201 
+ mism, mism, mism, matc, matc, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // U 

202 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // R 

203 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // Y 

204 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // M 

205 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // K 

206 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // S 

207 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // W 

208 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // B 

209 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // D 

210 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // H 

211 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // V 

212 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // N 

213 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // ? 

214 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // ? 

215 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // ? 

216 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism};// ? 

217 
+ 

218 
+const float dave_nucleo[]={ 

219 
+// A C G T U R Y M K S W B D H V N ? ? ? ? 

220 
+ 8000, 1500, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // A 

221 
+ 1500, 6500, 1000, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // C 

222 
+ mism, 1000, 6500, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // G 

223 
+ mism, mism, mism, 7000, 7000, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // T 

224 
+ mism, mism, mism, 7000, 7000, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // U 

225 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // R 

226 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // Y 

227 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // M 

228 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // K 

229 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // S 

230 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // W 

231 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // B 

232 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // D 

233 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // H 

234 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // V 

235 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // N 

236 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // ? 

237 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // ? 

238 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, // ? 

239 
+ mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism, mism};// ? 

240 
+ 

241 
+// prediction accuracy of Psipred: 

242 
+// Ppred[cf][B][A] = P(A,B,cf)/P(A)/P(B,cf) = P(AB,cf)/P(A) 

243 
+// A = observed ss state B = predicted ss state cf = confidence value of prediction 

244 
+//float Ppred[MAXCF][NSSPRED][NDSSP]= 

245 
+const float Ppred[]= 

246 
+ { 

247 
+//pred/obs  H E ~ S T G B 

248 
+ 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , //  conf= 

249 
+ 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , // H 

250 
+ 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , // E 

251 
+ 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , // ~ 

252 
+ 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , //  conf=0 

253 
+ 1.000, 1.128, 0.519, 0.834, 0.957, 1.488, 2.106, 1.085 , // H 

254 
+ 1.000, 0.233, 2.240, 1.216, 0.913, 0.519, 0.923, 1.759 , // E 

255 
+ 1.000, 0.640, 1.017, 1.122, 1.069, 1.242, 2.140, 1.999 , // ~ 

256 
+ 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , //  conf=1 

257 
+ 1.000, 1.251, 0.485, 0.771, 0.847, 1.371, 2.266, 0.864 , // H 

258 
+ 1.000, 0.222, 2.542, 1.069, 0.804, 0.428, 0.671, 1.728 , // E 

259 
+ 1.000, 0.474, 1.103, 1.295, 1.232, 1.214, 1.835, 1.989 , // ~ 

260 
+ 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , //  conf=2 

261 
+ 1.000, 1.383, 0.426, 0.637, 0.778, 1.349, 2.436, 0.824 , // H 

262 
+ 1.000, 0.202, 2.769, 0.999, 0.714, 0.320, 0.551, 1.566 , // E 

263 
+ 1.000, 0.395, 1.005, 1.407, 1.376, 1.336, 1.725, 2.063 , // ~ 

264 
+ 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , //  conf=3 

265 
+ 1.000, 1.531, 0.369, 0.552, 0.682, 1.280, 2.420, 0.698 , // H 

266 
+ 1.000, 0.169, 2.970, 0.954, 0.556, 0.273, 0.489, 1.504 , // E 

267 
+ 1.000, 0.352, 0.843, 1.515, 1.542, 1.456, 1.684, 1.958 , // ~ 

268 
+ 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , //  conf=4 

269 
+ 1.000, 1.750, 0.305, 0.444, 0.537, 1.134, 2.295, 0.600 , // H 

270 
+ 1.000, 0.124, 3.179, 0.847, 0.513, 0.228, 0.413, 1.897 , // E 

271 
+ 1.000, 0.282, 0.718, 1.664, 1.630, 1.577, 1.625, 1.877 , // ~ 

272 
+ 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , //  conf=5 

273 
+ 1.000, 1.952, 0.250, 0.353, 0.456, 0.982, 2.050, 0.466 , // H 

274 
+ 1.000, 0.102, 3.464, 0.699, 0.453, 0.174, 0.284, 1.357 , // E 

275 
+ 1.000, 0.227, 0.574, 1.782, 1.846, 1.681, 1.418, 1.885 , // ~ 

276 
+ 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , //  conf=6 

277 
+ 1.000, 2.183, 0.171, 0.263, 0.319, 0.792, 1.933, 0.345 , // H 

278 
+ 1.000, 0.079, 3.712, 0.612, 0.281, 0.133, 0.196, 1.089 , // E 

279 
+ 1.000, 0.173, 0.458, 1.915, 2.007, 1.766, 1.220, 1.704 , // ~ 

280 
+ 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , //  conf=7 

281 
+ 1.000, 2.389, 0.132, 0.192, 0.224, 0.605, 1.605, 0.183 , // H 

282 
+ 1.000, 0.053, 3.997, 0.449, 0.201, 0.072, 0.141, 0.919 , // E 

283 
+ 1.000, 0.109, 0.328, 2.013, 2.304, 1.882, 0.960, 1.512 , // ~ 

284 
+ 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , //  conf=8 

285 
+ 1.000, 2.668, 0.065, 0.098, 0.144, 0.354, 1.059, 0.102 , // H 

286 
+ 1.000, 0.029, 4.285, 0.284, 0.113, 0.044, 0.059, 0.522 , // E 

287 
+ 1.000, 0.053, 0.200, 2.099, 2.444, 2.133, 0.671, 1.290 , // ~ 

288 
+ 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , //  conf=9 

289 
+ 1.000, 2.966, 0.009, 0.023, 0.036, 0.113, 0.214, 0.017 , // H 

290 
+ 1.000, 0.010, 4.555, 0.119, 0.027, 0.010, 0.013, 0.209 , // E 

291 
+ 1.000, 0.026, 0.101, 2.576, 1.853, 2.204, 0.308, 0.499 // ~ 

292 
+ }; 

293 
+ 

294 
+// float Ppred[]= 

295 
+// { 

296 
+// //pred/obs  H E ~ S T G B 

297 
+// 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , //  conf= 

298 
+// 1.000, 2.000, 0.500, 1.000, 1.000, 1.000, 1.000, 0.500, // H 

299 
+// 1.000, 0.500, 2.000, 0.500, 1.000, 1.000, 0.500, 1.000, // E 

300 
+// 1.000, 1.000, 1.000, 2.000, 2.000, 2.000, 1.000, 1.000, // ~ 

301 
+// 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , //  conf=0 

302 
+// 1.000, 2.000, 0.500, 1.000, 1.000, 1.000, 1.000, 0.500, // H 

303 
+// 1.000, 0.500, 2.000, 0.500, 1.000, 1.000, 0.500, 1.000, // E 

304 
+// 1.000, 1.000, 1.000, 2.000, 2.000, 2.000, 1.000, 1.000, // ~ 

305 
+// 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , //  conf=1 

306 
+// 1.000, 2.000, 0.500, 1.000, 1.000, 1.000, 1.000, 0.500, // H 

307 
+// 1.000, 0.500, 2.000, 0.500, 1.000, 1.000, 0.500, 1.000, // E 

308 
+// 1.000, 1.000, 1.000, 2.000, 2.000, 2.000, 1.000, 1.000, // ~ 

309 
+// 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , //  conf=2 

310 
+// 1.000, 2.000, 0.500, 1.000, 1.000, 1.000, 1.000, 0.500, // H 

311 
+// 1.000, 0.500, 2.000, 0.500, 1.000, 1.000, 0.500, 1.000, // E 

312 
+// 1.000, 1.000, 1.000, 2.000, 2.000, 2.000, 1.000, 1.000, // ~ 

313 
+// 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , //  conf=3 

314 
+// 1.000, 2.000, 0.500, 1.000, 1.000, 1.000, 1.000, 0.500, // H 

315 
+// 1.000, 0.500, 2.000, 0.500, 1.000, 1.000, 0.500, 1.000, // E 

316 
+// 1.000, 1.000, 1.000, 2.000, 2.000, 2.000, 1.000, 1.000, // ~ 

317 
+// 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , //  conf=4 

318 
+// 1.000, 2.000, 0.500, 1.000, 1.000, 1.000, 1.000, 0.500, // H 

319 
+// 1.000, 0.500, 2.000, 0.500, 1.000, 1.000, 0.500, 1.000, // E 

320 
+// 1.000, 1.000, 1.000, 2.000, 2.000, 2.000, 1.000, 1.000, // ~ 

321 
+// 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , //  conf=5 

322 
+// 1.000, 2.000, 0.500, 1.000, 1.000, 1.000, 1.000, 0.500, // H 

323 
+// 1.000, 0.500, 2.000, 0.500, 1.000, 1.000, 0.500, 1.000, // E 

324 
+// 1.000, 1.000, 1.000, 2.000, 2.000, 2.000, 1.000, 1.000, // ~ 

325 
+// 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , //  conf=6 

326 
+// 1.000, 2.000, 0.500, 1.000, 1.000, 1.000, 1.000, 0.500, // H 

327 
+// 1.000, 0.500, 2.000, 0.500, 1.000, 1.000, 0.500, 1.000, // E 

328 
+// 1.000, 1.000, 1.000, 2.000, 2.000, 2.000, 1.000, 1.000, // ~ 

329 
+// 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , //  conf=7 

330 
+// 1.000, 2.000, 0.500, 1.000, 1.000, 1.000, 1.000, 0.500, // H 

331 
+// 1.000, 0.500, 2.000, 0.500, 1.000, 1.000, 0.500, 1.000, // E 

332 
+// 1.000, 1.000, 1.000, 2.000, 2.000, 2.000, 1.000, 1.000, // ~ 

333 
+// 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , //  conf=8 

334 
+// 1.000, 2.000, 0.500, 1.000, 1.000, 1.000, 1.000, 0.500, // H 

335 
+// 1.000, 0.500, 2.000, 0.500, 1.000, 1.000, 0.500, 1.000, // E 

336 
+// 1.000, 1.000, 1.000, 2.000, 2.000, 2.000, 1.000, 1.000, // ~ 

337 
+// 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000 , //  conf=9 

338 
+// 1.000, 2.000, 0.500, 1.000, 1.000, 1.000, 1.000, 0.500, // H 

339 
+// 1.000, 0.500, 2.000, 0.500, 1.000, 1.000, 0.500, 1.000, // E 

340 
+// 1.000, 1.000, 1.000, 2.000, 2.000, 2.000, 1.000, 1.000, // ~ 

341 
+ 

342 
+ 

343 
+float Pobs[]={0, 0.3268,0.2119,0.2061,0.0913,0.1143,0.0376,0.0120}; 

344 
+ 

345 
+float Pevo_full[]= 

346 
+ { 

347 
+//  H E C 

348 
+ 1.00, 0.00, 0.00, 0.00, 

349 
+ 0.00, 0.94, 0.00, 0.04, 

350 
+ 0.00, 0.00, 0.92, 0.04, 

351 
+ 0.00, 0.06, 0.08, 0.92 

352 
+ }; 

353 
+ 

354 
+//psipred accuracy for confidence values 09 

355 
+const float p_acc[]={0.00,0.47,0.53,0.56,0.58,0.62,0.69,0.74,0.82,0.88,0.96}; 

356 
+ 

357 
+/** 

358 
+ * @brief 

359 
+ */ 

360 
+void 

361 
+SetBlosumMatrix(const float BlosumXX[]) 

362 
+{ 

363 
+ int a,b,n=0; 

364 
+ if (v>=3) printf("Using the BLOSUM%2i matrix\n",par.matrix); 

365 
+ for (a=0; a<20; ++a) 

366 
+ for (pb[a]=0.0f, b=0; b<=a; ++b,++n) 

367 
+ P[a][b] = BlosumXX[n]; 

368 
+ for (a=0; a<19; a++) 

369 
+ for (b=a+1; b<20; ++b) 

370 
+ P[a][b] = P[b][a]; 

371 
+ for (a=0; a<20; ++a) P[a][20]=P[20][a]=1.0f; 

372 
+ return; 

373 
+} 

374 
+ 

375 
+///////////////////////////////////////////////////////////////////////////////////// 

376 
+/** 

377 
+ * @brief Set (global variable) substitution matrix with derived matrices and background frequencies 

378 
+ */ 

379 
+void 

380 
+SetSubstitutionMatrix() 

381 
+{ 

382 
+ int a,b; 

383 
+ switch (par.matrix) 

384 
+ { 

385 
+ default: 

386 
+ case 0: //Gonnet matrix 

387 
+ if (v>=3) cout<<"Using the Gonnet matrix "; 

388 
+ for (a=0; a<20; ++a) 

389 
+ for (pb[a]=0.0f, b=0; b<20; ++b) 

390 
+ P[a][b] = 0.000001f*Gonnet[a*20+b]; 

391 
+ for (a=0; a<20; ++a) P[a][20]=P[20][a]=1.0f; 

392 
+ break; 

393 
+ 

394 
+ case 30: //BLOSUM30 

395 
+ SetBlosumMatrix(Blosum30); 

396 
+ break; 

397 
+ case 40: //BLOSUM40 

398 
+ SetBlosumMatrix(Blosum40); 

399 
+ break; 

400 
+ case 50: //BLOSUM50 

401 
+ SetBlosumMatrix(Blosum50); 

402 
+ break; 

403 
+ case 65: //BLOSUM65 

404 
+ SetBlosumMatrix(Blosum65); 

405 
+ break; 

406 
+ case 80: //BLOSUM80 

407 
+ SetBlosumMatrix(Blosum80); 

408 
+ break; 

409 
+ } 

410 
+ 

411 
+ // Check transition probability matrix, renormalize P and calculate pb[a] 

412 
+ float sumab=0.0f; 

413 
+ for (a=0; a<20; a++) 

414 
+ for (b=0; b<20; ++b) sumab+=P[a][b]; 

415 
+ for (a=0; a<20; a++) 

416 
+ for (b=0; b<20; ++b) P[a][b]/=sumab; 

417 
+ for (a=0; a<20; a++) 

418 
+ for (pb[a]=0.0f, b=0; b<20; ++b) pb[a]+=P[a][b]; 

419 
+ 

420 
+ //Compute similarity matrix for amino acid pairs (for calculating consensus sequence) 

421 
+ for (a=0; a<20; ++a) 

422 
+ for (b=0; b<20; ++b) 

423 
+ Sim[a][b] = P[a][b]*P[a][b]/P[a][a]/P[b][b]; 

424 
+ 

425 
+ //Precompute matrix R for amino acid pseudocounts: 

426 
+ for (a=0; a<20; ++a) 

427 
+ for (b=0; b<20; ++b) 

428 
+ R[a][b] = P[a][b]/pb[b]; //R[a][b]=P(ab) 

429 
+ 

430 
+ //Precompute matrix R for amino acid pseudocounts: 

431 
+ for (a=0; a<20; ++a) 

432 
+ for (b=0; b<20; ++b) 

433 
+ S[a][b] = log2(R[a][b]/pb[a]); // S[a][b] = log2(P(a,b)/P(a)/P(b)) 

434 
+ 

435 
+ // Evaluate sequence identity underlying substitution matrix 

436 
+ if (v>=3) 

437 
+ { 

438 
+ float id=0.0f; 

439 
+ float entropy=0.0f; 

440 
+ float entropy_pb=0.0f; 

441 
+ float mut_info=0.0f; 

442 
+ for (a=0; a<20; ++a) id+=P[a][a]; 

443 
+ for (a=0; a<20; ++a) entropy_pb=pb[a]*log2(pb[a]); 

444 
+ for (a=0; a<20; ++a) 

445 
+ for (b=0; b<20; ++b) 

446 
+ { 

447 
+ entropy=P[a][b]*log2(R[a][b]); 

448 
+ mut_info += P[a][b]*S[a][b]; 

449 
+ } 

450 
+ 

451 
+ printf(": sequence identity = %2.0f%%; entropy per column = %4.2f bits (out of %4.2f); mutual information = %4.2f bits\n",100*id,entropy,entropy_pb,mut_info); 

452 
+ } 

453 
+ 

454 
+ if (v>=4) //Debugging: probability matrix and dissimilarity matrix 

455 
+ { 

456 
+ cout<<"Check matrix: before renormalization sum P(a,b)= "<<sumab<<"...\n";//PRINT 

457 
+ cout<<" A R N D C Q E G H I L K M F P S T W Y V\n"; 

458 
+ cout<<"p[] "; 

459 
+ for (a=0; a<20; a++) printf("%4.1f ",100*pb[a]); 

460 
+ cout<<endl<<"\nSubstitution matrix log2( P(a,b)/p(a)/p(b) ) (in bits):\n"; 

461 
+ cout<<" A R N D C Q E G H I L K M F P S T W Y V\n"; 

462 
+ for (b=0; b<20; b++) 

463 
+ { 

464 
+ cout<<i2aa(b)<<" "; 

465 
+ for (a=0; a<20; a++) printf("%4.1f ",S[a][b]); 

466 
+ cout<<endl; 

467 
+ } 

468 
+ cout<<endl<<"\nOdds matrix P(a,b)/p(a)/p(b):\n"; 

469 
+ cout<<" A R N D C Q E G H I L K M F P S T W Y V\n"; 

470 
+ for (b=0; b<20; b++) 

471 
+ { 

472 
+ cout<<i2aa(b)<<" "; 

473 
+ for (a=0; a<20; a++) printf("%4.1f ",P[b][a]/pb[a]/pb[b]); 

474 
+ cout<<endl; 

475 
+ } 

476 
+ cout<<endl<<"\nMatrix of conditional probabilities P(ab) = P(a,b)/p(b) (in %):\n"; 

477 
+ cout<<" A R N D C Q E G H I L K M F P S T W Y V\n"; 

478 
+ for (b=0; b<20; b++) 

479 
+ { 

480 
+ cout<<i2aa(b)<<" "; 

481 
+ for (a=0; a<20; a++) printf("%4.1f ",100*R[b][a]); 

482 
+ cout<<endl; 

483 
+ } 

484 
+ cout<<endl<<"\nProbability matrix P(a,b) (in %):\n"; 

485 
+ cout<<" A R N D C Q E G H I L K M F P S T W Y V\n"; 

486 
+ for (b=0; b<20; b++) 

487 
+ { 

488 
+ cout<<i2aa(b)<<" "; 

489 
+ for (a=0; a<20; a++) printf("%5.0f ",1000000*P[b][a]); 

490 
+ cout<<endl; 

491 
+ } 

492 
+ cout<<endl<<"Similarity matrix P(a,b)^2/P(a,a,)/P(b,b) (in %):\n"; 

493 
+ cout<<" A R N D C Q E G H I L K M F P S T W Y V\n"; 

494 
+ for (b=0; b<20; b++) 

495 
+ { 

496 
+ cout<<i2aa(b)<<" "; 

497 
+ for (a=0; a<20; a++) printf("%4.0f ",100*Sim[b][a]); 

498 
+ cout<<endl; 

499 
+ } 

500 
+ cout<<endl; 

501 
+ 

502 
+ 

503 
+ } 

504 
+} 

505 
+ 

506 
+///////////////////////////////////////////////////////////////////////////////////// 

507 
+/** 

508 
+ * @brief Set (global variable) substitution matrix with derived matrices and background frequencies 

509 
+ */ 

510 
+void 

511 
+SetRnaSubstitutionMatrix() 

512 
+{ 

513 
+ printf("SET RNA SUBSTITUTION MATRIX ...."); 

514 
+ int a,b; 

515 
+ for (a=0; a<20; ++a) 

516 
+ for (pb[a]=0.0f, b=0; b<20; ++b) 

517 
+ P[a][b] = 0.000001f*ribosum[a*20+b]; 

518 
+ for (a=0; a<20; ++a) P[a][20]=P[20][a]=1.0f; 

519 
+ 

520 
+ // Check transition probability matrix, renormalize P and calculate pb[a] 

521 
+ float sumab=0.0f; 

522 
+ for (a=0; a<20; a++) 

523 
+ for (b=0; b<20; ++b) sumab+=P[a][b]; 

524 
+ for (a=0; a<20; a++) 

525 
+ for (b=0; b<20; ++b) P[a][b]/=sumab; 

526 
+ for (a=0; a<20; a++) 

527 
+ for (pb[a]=0.0f, b=0; b<20; ++b) pb[a]+=P[a][b]; 

528 
+ 

529 
+ //Compute similarity matrix for amino acid pairs (for calculating consensus sequence) 

530 
+ for (a=0; a<20; ++a) 

531 
+ for (b=0; b<20; ++b) 

532 
+ Sim[a][b] = P[a][b]*P[a][b]/P[a][a]/P[b][b]; 

533 
+ 

534 
+ //Precompute matrix R for amino acid pseudocounts: 

535 
+ for (a=0; a<20; ++a) 

536 
+ for (b=0; b<20; ++b) 

537 
+ R[a][b] = P[a][b]/pb[b]; //R[a][b]=P(ab) 

538 
+ 

539 
+ //Precompute matrix R for amino acid pseudocounts: 

540 
+ for (a=0; a<20; ++a) 

541 
+ for (b=0; b<20; ++b) 

542 
+ S[a][b] = log2(R[a][b]/pb[a]); // S[a][b] = log2(P(a,b)/P(a)/P(b)) 

543 
+ 

544 
+ // Evaluate sequence identity underlying substitution matrix 

545 
+ if (v>=3) 

546 
+ { 

547 
+ float id=0.0f; 

548 
+ float entropy=0.0f; 

549 
+ float entropy_pb=0.0f; 

550 
+ float mut_info=0.0f; 

551 
+ for (a=0; a<20; ++a) id+=P[a][a]; 

552 
+ for (a=0; a<20; ++a) entropy_pb=pb[a]*log2(pb[a]); 

553 
+ for (a=0; a<20; ++a) 

554 
+ for (b=0; b<20; ++b) 

555 
+ { 

556 
+ entropy=P[a][b]*log2(R[a][b]); 

557 
+ mut_info += P[a][b]*S[a][b]; 

558 
+ } 

559 
+ 

560 
+ printf(": sequence identity = %2.0f%%; entropy per column = %4.2f bits (out of %4.2f); mutual information = %4.2f bits\n",100*id,entropy,entropy_pb,mut_info); 

561 
+ } 

562 
+ 

563 
+ if (v>=4) //Debugging: probability matrix and dissimilarity matrix 

564 
+ { 

565 
+ cout<<"Check matrix: before renormalization sum P(a,b)= "<<sumab<<"...\n";//PRINT 

566 
+ cout<<" A C G T U ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\n"; 

567 
+ cout<<"p[] "; 

568 
+ for (a=0; a<20; a++) printf("%4.1f ",100*pb[a]); 

569 
+ cout<<endl<<"\nSubstitution matrix log2( P(a,b)/p(a)/p(b) ) (in bits):\n"; 

570 
+ cout<<" A C G T U ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\n"; 

571 
+ for (b=0; b<20; b++) 

572 
+ { 

573 
+ cout<<i2aa(b)<<" "; 

574 
+ for (a=0; a<20; a++) printf("%4.1f ",S[a][b]); 

575 
+ cout<<endl; 

576 
+ } 

577 
+ cout<<endl<<"\nOdds matrix P(a,b)/p(a)/p(b):\n"; 

578 
+ cout<<" A C G T U ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\n"; 

579 
+ for (b=0; b<20; b++) 

580 
+ { 

581 
+ cout<<i2aa(b)<<" "; 

582 
+ for (a=0; a<20; a++) printf("%4.1f ",P[b][a]/pb[a]/pb[b]); 

583 
+ cout<<endl; 

584 
+ } 

585 
+ cout<<endl<<"\nMatrix of conditional probabilities P(ab) = P(a,b)/p(b) (in %):\n"; 

586 
+ cout<<" A C G T U ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\n"; 

587 
+ for (b=0; b<20; b++) 

588 
+ { 

589 
+ cout<<i2aa(b)<<" "; 

590 
+ for (a=0; a<20; a++) printf("%4.1f ",100*R[b][a]); 

591 
+ cout<<endl; 

592 
+ } 

593 
+ cout<<endl<<"\nProbability matrix P(a,b) (in %):\n"; 

594 
+ cout<<" A C G T U ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\n"; 

595 
+ for (b=0; b<20; b++) 

596 
+ { 

597 
+ cout<<i2aa(b)<<" "; 

598 
+ for (a=0; a<20; a++) printf("%5.0f ",1000000*P[b][a]); 

599 
+ cout<<endl; 

600 
+ } 

601 
+ cout<<endl<<"Similarity matrix P(a,b)^2/P(a,a,)/P(b,b) (in %):\n"; 

602 
+ cout<<" A C G T U ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\n"; 

603 
+ for (b=0; b<20; b++) 

604 
+ { 

605 
+ cout<<i2aa(b)<<" "; 

606 
+ for (a=0; a<20; a++) printf("%4.0f ",100*Sim[b][a]); 

607 
+ cout<<endl; 

608 
+ } 

609 
+ cout<<endl; 

610 
+ 

611 
+ 

612 
+ } 

613 
+} 

614 
+ 

615 
+///////////////////////////////////////////////////////////////////////////////////// 

616 
+/** 

617 
+ * @brief Set (global variable) substitution matrix with derived matrices and background frequencies 

618 
+ */ 

619 
+void 

620 
+SetDnaSubstitutionMatrix() 

621 
+{ 

622 
+ 

623 
+ int a,b; 

624 
+ for (a=0; a<20; ++a) 

625 
+ for (pb[a]=0.0f, b=0; b<20; ++b) 

626 
+ P[a][b] = 0.000001f*dna_basic[a*20+b]; 

627 
+ for (a=0; a<20; ++a) P[a][20]=P[20][a]=1.0f; 

628 
+ 

629 
+ // Check transition probability matrix, renormalize P and calculate pb[a] 

630 
+ float sumab=0.0f; 

631 
+ for (a=0; a<20; a++) 

632 
+ for (b=0; b<20; ++b) sumab+=P[a][b]; 

633 
+ for (a=0; a<20; a++) 

634 
+ for (b=0; b<20; ++b) P[a][b]/=sumab; 

635 
+ for (a=0; a<20; a++) 

636 
+ for (pb[a]=0.0f, b=0; b<20; ++b) pb[a]+=P[a][b]; 

637 
+ 

638 
+ //Compute similarity matrix for amino acid pairs (for calculating consensus sequence) 

639 
+ for (a=0; a<20; ++a) 

640 
+ for (b=0; b<20; ++b) 

641 
+ Sim[a][b] = P[a][b]*P[a][b]/P[a][a]/P[b][b]; 

642 
+ 

643 
+ //Precompute matrix R for amino acid pseudocounts: 

644 
+ for (a=0; a<20; ++a) 

645 
+ for (b=0; b<20; ++b) 

646 
+ R[a][b] = P[a][b]/pb[b]; //R[a][b]=P(ab) 

647 
+ 

648 
+ //Precompute matrix R for amino acid pseudocounts: 

649 
+ for (a=0; a<20; ++a) 

650 
+ for (b=0; b<20; ++b) 

651 
+ S[a][b] = log2(R[a][b]/pb[a]); // S[a][b] = log2(P(a,b)/P(a)/P(b)) 

652 
+ 

653 
+ // Evaluate sequence identity underlying substitution matrix 

654 
+ if (v>=3) 

655 
+ { 

656 
+ float id=0.0f; 

657 
+ float entropy=0.0f; 

658 
+ float entropy_pb=0.0f; 

659 
+ float mut_info=0.0f; 

660 
+ for (a=0; a<20; ++a) id+=P[a][a]; 

661 
+ for (a=0; a<20; ++a) entropy_pb=pb[a]*log2(pb[a]); 

662 
+ for (a=0; a<20; ++a) 

663 
+ for (b=0; b<20; ++b) 

664 
+ { 

665 
+ entropy=P[a][b]*log2(R[a][b]); 

666 
+ mut_info += P[a][b]*S[a][b]; 

667 
+ } 

668 
+ 

669 
+ printf(": sequence identity = %2.0f%%; entropy per column = %4.2f bits (out of %4.2f); mutual information = %4.2f bits\n",100*id,entropy,entropy_pb,mut_info); 

670 
+ } 

671 
+ 

672 
+ if (v>=4) //Debugging: probability matrix and dissimilarity matrix 

673 
+ { 

674 
+ cout<<"Check matrix: before renormalization sum P(a,b)= "<<sumab<<"...\n";//PRINT 

675 
+ cout<<" A C G T U ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\n"; 

676 
+ cout<<"p[] "; 

677 
+ for (a=0; a<20; a++) printf("%4.1f ",100*pb[a]); 

678 
+ cout<<endl<<"\nSubstitution matrix log2( P(a,b)/p(a)/p(b) ) (in bits):\n"; 

679 
+ cout<<" A C G T U ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\n"; 

680 
+ for (b=0; b<20; b++) 

681 
+ { 

682 
+ cout<<i2aa(b)<<" "; 

683 
+ for (a=0; a<20; a++) printf("%4.1f ",S[a][b]); 

684 
+ cout<<endl; 

685 
+ } 

686 
+ cout<<endl<<"\nOdds matrix P(a,b)/p(a)/p(b):\n"; 

687 
+ cout<<" A C G T U ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\n"; 

688 
+ for (b=0; b<20; b++) 

689 
+ { 

690 
+ cout<<i2aa(b)<<" "; 

691 
+ for (a=0; a<20; a++) printf("%4.1f ",P[b][a]/pb[a]/pb[b]); 

692 
+ cout<<endl; 

693 
+ } 

694 
+ cout<<endl<<"\nMatrix of conditional probabilities P(ab) = P(a,b)/p(b) (in %):\n"; 

695 
+ cout<<" A C G T U ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\n"; 

696 
+ for (b=0; b<20; b++) 

697 
+ { 

698 
+ cout<<i2aa(b)<<" "; 

699 
+ for (a=0; a<20; a++) printf("%4.1f ",100*R[b][a]); 

700 
+ cout<<endl; 

701 
+ } 

702 
+ cout<<endl<<"\nProbability matrix P(a,b) (in %):\n"; 

703 
+ cout<<" A C G T U ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\n"; 

704 
+ for (b=0; b<20; b++) 

705 
+ { 

706 
+ cout<<i2aa(b)<<" "; 

707 
+ for (a=0; a<20; a++) printf("%5.0f ",1000000*P[b][a]); 

708 
+ cout<<endl; 

709 
+ } 

710 
+ cout<<endl<<"Similarity matrix P(a,b)^2/P(a,a,)/P(b,b) (in %):\n"; 

711 
+ cout<<" A C G T U ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\n"; 

712 
+ for (b=0; b<20; b++) 

713 
+ { 

714 
+ cout<<i2aa(b)<<" "; 

715 
+ for (a=0; a<20; a++) printf("%4.0f ",100*Sim[b][a]); 

716 
+ cout<<endl; 

717 
+ } 

718 
+ cout<<endl; 

719 
+ 

720 
+ 

721 
+ } 

722 
+} 

723 
+ 

724 
+///////////////////////////////////////////////////////////////////////////////////// 

725 
+/** 

726 
+ * @brief Set secondary structure substitution matrix 

727 
+ */ 

728 
+void 

729 
+SetSecStrucSubstitutionMatrix() 

730 
+{ 

731 
+ int A; //observed ss state (determined dssp) 

732 
+ int B,BB; //predicted ss states (by psipred) 

733 
+ int cf,ccf; //confidence value of prediction 

734 
+ float P73[NDSSP][NSSPRED][MAXCF]; //P73[cf][B][A] = P(A,B,cf)/P(A)/P(B,cf) = P(AB,cf)/P(A) 

735 
+ float sum; 

736 
+ 

737 
+ // S73[A][B][cf][b] = score for matching observed ss state A in query with state B in template 

738 
+ // predicted with confidence cf, when query and template columns are diverged by b units 

739 
+ for (cf=0; cf<MAXCF; cf++) 

740 
+ for (A=0; A<NDSSP; A++) 

741 
+ for (B=0; B<NSSPRED; B++) 

742 
+ { 

743 
+ P73[A][B][cf] = 1.par.ssa + par.ssa*Ppred[cf*NSSPRED*NDSSP + B*NDSSP + A]; 

744 
+ S73[A][B][cf] = log2(P73[A][B][cf]); 

745 
+ } 

746 
+ 

747 
+ for (B=0; B<NSSPRED; B++) 

748 
+ for (cf=0; cf<MAXCF; cf++) 

749 
+ for (BB=0; BB<NSSPRED; BB++) 

750 
+ for (ccf=0; ccf<MAXCF; ccf++) 

751 
+ { 

752 
+ sum=0; 

753 
+ for (A=1; A<NDSSP; A++) 

754 
+ sum += P73[A][B][cf] * P73[A][BB][ccf] * Pobs[A]; 

755 
+ S33[B][cf][BB][ccf] = log2(sum); 

756 
+ } 

757 
+} /* this is the end of SetSecStrucSubstitutionMatrix() */ 

758 
+ 

759 
+ 

760 
+ 

761 
+/* 

762 
+ * EOF hhmatricesC.h 

763 
+ */ 