1
2
3
4
5
6
7 """Reduced alphabets which lump together several amino-acids into one letter.
8
9 Reduced (redundant or simplified) alphabets are used to represent protein sequences using an
10 alternative alphabet which lumps together several amino-acids into one letter, based
11 on physico-chemical traits. For example, all the aliphatics (I,L,V) are usually
12 quite interchangeable, so many sequence studies group them into one letter
13
14 Examples of reduced alphabets are available in:
15
16 http://viscose.ifg.uni-muenster.de/html/alphabets.html
17
18 The Murphy tables are from here:
19
20 Murphy L.R., Wallqvist A, Levy RM. (2000) Simplified amino acid
21 alphabets for protein fold recognition and implications for folding.
22 Protein Eng. 13(3):149-152
23
24 Bio.utils.reduce_sequence is used to take a Protein alphabet, and reduce it using one of
25 the tables here, or a user-defined table.
26 """
27
28 from Bio import Alphabet
29
30
31 murphy_15_tab = {"L": "L",
32 "V": "L",
33 "I": "L",
34 "M": "L",
35 "C": "C",
36 "A": "A",
37 "G": "G",
38 "S": "S",
39 "T": "T",
40 "P": "P",
41 "F": "F",
42 "Y": "F",
43 "W": "W",
44 "E": "E",
45 "D": "D",
46 "N": "N",
47 "Q": "Q",
48 "K": "K",
49 "R": "K",
50 "H": "H"}
51
52
53 -class Murphy15(Alphabet.ProteinAlphabet):
56 murphy_15 = Murphy15()
57
58 murphy_10_tab = {"L": "L",
59 "V": "L",
60 "I": "L",
61 "M": "L",
62 "C": "C",
63 "A": "A",
64 "G": "G",
65 "S": "S",
66 "T": "S",
67 "P": "P",
68 "F": "F",
69 "Y": "F",
70 "W": "F",
71 "E": "E",
72 "D": "E",
73 "N": "E",
74 "Q": "E",
75 "K": "K",
76 "R": "K",
77 "H": "H"}
78
79
80 -class Murphy10(Alphabet.ProteinAlphabet):
83 murphy_10 = Murphy10()
84
85 murphy_8_tab = {"L": "L",
86 "V": "L",
87 "I": "L",
88 "M": "L",
89 "C": "L",
90 "A": "A",
91 "G": "A",
92 "S": "S",
93 "T": "S",
94 "P": "P",
95 "F": "F",
96 "Y": "F",
97 "W": "F",
98 "E": "E",
99 "D": "E",
100 "N": "E",
101 "Q": "E",
102 "K": "K",
103 "R": "K",
104 "H": "H"}
105
106
107 -class Murphy8(Alphabet.ProteinAlphabet):
110 murphy_8 = Murphy8()
111
112 murphy_4_tab = {"L": "L",
113 "V": "L",
114 "I": "L",
115 "M": "L",
116 "C": "L",
117 "A": "A",
118 "G": "A",
119 "S": "A",
120 "T": "A",
121 "P": "A",
122 "F": "F",
123 "Y": "F",
124 "W": "F",
125 "E": "E",
126 "D": "E",
127 "N": "E",
128 "Q": "E",
129 "K": "E",
130 "R": "E",
131 "H": "E"}
132
133
134 -class Murphy4(Alphabet.ProteinAlphabet):
137 murphy_4 = Murphy4()
138
139 hp_model_tab = {"A": "P",
140 "G": "P",
141 "T": "P",
142 "S": "P",
143 "N": "P",
144 "Q": "P",
145 "D": "P",
146 "E": "P",
147 "H": "P",
148 "R": "P",
149 "K": "P",
150 "P": "P",
151 "C": "H",
152 "M": "H",
153 "F": "H",
154 "I": "H",
155 "L": "H",
156 "V": "H",
157 "W": "H",
158 "Y": "H"}
159
160
161 -class HPModel(Alphabet.ProteinAlphabet):
164 hp_model = HPModel()
165
166 pc_5_table = {"I": "A",
167 "V": "A",
168 "L": "A",
169 "F": "R",
170 "Y": "R",
171 "W": "R",
172 "H": "R",
173 "K": "C",
174 "R": "C",
175 "D": "C",
176 "E": "C",
177 "G": "T",
178 "A": "T",
179 "C": "T",
180 "S": "T",
181 "T": "D",
182 "M": "D",
183 "Q": "D",
184 "N": "D",
185 "P": "D"}
186
187
188 -class PC5(Alphabet.ProteinAlphabet):
191 hp_model = HPModel()
192