1
2
3
4
5
6 """Dynamic Programming algorithms for general usage.
7
8 This module contains classes which implement Dynamic Programming
9 algorithms that can be used generally.
10 """
11
12 from Bio._py3k import range
13
14
16 """An abstract class to calculate forward and backward probabilities.
17
18 This class should not be instantiated directly, but should be used
19 through a derived class which implements proper scaling of variables.
20
21 This class is just meant to encapsulate the basic forward and backward
22 algorithms, and allow derived classes to deal with the problems of
23 multiplying probabilities.
24
25 Derived class of this must implement:
26
27 o _forward_recursion -- Calculate the forward values in the recursion
28 using some kind of technique for preventing underflow errors.
29
30 o _backward_recursion -- Calculate the backward values in the recursion
31 step using some technique to prevent underflow errors.
32 """
33 - def __init__(self, markov_model, sequence):
34 """Initialize to calculate forward and backward probabilities.
35
36 Arguments:
37
38 o markov_model -- The current Markov model we are working with.
39
40 o sequence -- A training sequence containing a set of emissions.
41 """
42 self._mm = markov_model
43 self._seq = sequence
44
46 """Calculate the forward recursion value.
47 """
48 raise NotImplementedError("Subclasses must implement")
49
51 """Calculate sequence probability using the forward algorithm.
52
53 This implements the forward algorithm, as described on p57-58 of
54 Durbin et al.
55
56 Returns:
57
58 o A dictionary containing the forward variables. This has keys of the
59 form (state letter, position in the training sequence), and values
60 containing the calculated forward variable.
61
62 o The calculated probability of the sequence.
63 """
64
65 state_letters = self._seq.states.alphabet.letters
66
67
68
69
70
71
72
73 forward_var = {}
74
75 forward_var[(state_letters[0], -1)] = 1
76
77 for k in range(1, len(state_letters)):
78 forward_var[(state_letters[k], -1)] = 0
79
80
81
82
83 for i in range(len(self._seq.emissions)):
84
85 for main_state in state_letters:
86
87
88 forward_value = self._forward_recursion(main_state, i,
89 forward_var)
90
91 if forward_value is not None:
92 forward_var[(main_state, i)] = forward_value
93
94
95 first_state = state_letters[0]
96 seq_prob = 0
97
98 for state_item in state_letters:
99
100 forward_value = forward_var[(state_item,
101 len(self._seq.emissions) - 1)]
102
103 transition_value = self._mm.transition_prob[(state_item,
104 first_state)]
105
106 seq_prob += forward_value * transition_value
107
108 return forward_var, seq_prob
109
111 """Calculate the backward recursion value.
112 """
113 raise NotImplementedError("Subclasses must implement")
114
116 """Calculate sequence probability using the backward algorithm.
117
118 This implements the backward algorithm, as described on p58-59 of
119 Durbin et al.
120
121 Returns:
122
123 o A dictionary containing the backwards variables. This has keys
124 of the form (state letter, position in the training sequence),
125 and values containing the calculated backward variable.
126 """
127
128 state_letters = self._seq.states.alphabet.letters
129
130
131
132
133
134
135
136 backward_var = {}
137
138 first_letter = state_letters[0]
139
140 for state in state_letters:
141 backward_var[(state, len(self._seq.emissions) - 1)] = \
142 self._mm.transition_prob[(state, state_letters[0])]
143
144
145
146
147 all_indexes = list(range(len(self._seq.emissions) - 1))
148 all_indexes.reverse()
149 for i in all_indexes:
150
151 for main_state in state_letters:
152
153
154 backward_value = self._backward_recursion(main_state, i,
155 backward_var)
156
157 if backward_value is not None:
158 backward_var[(main_state, i)] = backward_value
159
160
161
162
163 return backward_var
164
165
167 """Implement forward and backward algorithms using a rescaling approach.
168
169 This scales the f and b variables, so that they remain within a
170 manageable numerical interval during calculations. This approach is
171 described in Durbin et al. on p 78.
172
173 This approach is a little more straightforward then log transformation
174 but may still give underflow errors for some types of models. In these
175 cases, the LogDPAlgorithms class should be used.
176 """
177 - def __init__(self, markov_model, sequence):
178 """Initialize the scaled approach to calculating probabilities.
179 Arguments:
180
181 o markov_model -- The current Markov model we are working with.
182
183 o sequence -- A TrainingSequence object that must have a
184 set of emissions to work with.
185 """
186 AbstractDPAlgorithms.__init__(self, markov_model, sequence)
187
188 self._s_values = {}
189
191 """Calculate the next scaling variable for a sequence position.
192
193 This utilizes the approach of choosing s values such that the
194 sum of all of the scaled f values is equal to 1.
195
196 Arguments:
197
198 o seq_pos -- The current position we are at in the sequence.
199
200 o previous_vars -- All of the forward or backward variables
201 calculated so far.
202
203 Returns:
204
205 o The calculated scaling variable for the sequence item.
206 """
207
208 state_letters = self._seq.states.alphabet.letters
209
210
211 s_value = 0
212 for main_state in state_letters:
213 emission = self._mm.emission_prob[(main_state,
214 self._seq.emissions[seq_pos])]
215
216
217 trans_and_var_sum = 0
218 for second_state in self._mm.transitions_from(main_state):
219
220 var_value = previous_vars[(second_state, seq_pos - 1)]
221
222
223 trans_value = self._mm.transition_prob[(second_state,
224 main_state)]
225
226 trans_and_var_sum += (var_value * trans_value)
227
228 s_value += (emission * trans_and_var_sum)
229
230 return s_value
231
233 """Calculate the value of the forward recursion.
234
235 Arguments:
236
237 o cur_state -- The letter of the state we are calculating the
238 forward variable for.
239
240 o sequence_pos -- The position we are at in the training seq.
241
242 o forward_vars -- The current set of forward variables
243 """
244
245
246 if sequence_pos not in self._s_values:
247 self._s_values[sequence_pos] = \
248 self._calculate_s_value(sequence_pos, forward_vars)
249
250
251 seq_letter = self._seq.emissions[sequence_pos]
252 cur_emission_prob = self._mm.emission_prob[(cur_state, seq_letter)]
253
254 scale_emission_prob = (float(cur_emission_prob) /
255 float(self._s_values[sequence_pos]))
256
257
258 state_pos_sum = 0
259 have_transition = 0
260 for second_state in self._mm.transitions_from(cur_state):
261 have_transition = 1
262
263
264
265 prev_forward = forward_vars[(second_state, sequence_pos - 1)]
266
267
268 cur_trans_prob = self._mm.transition_prob[(second_state,
269 cur_state)]
270 state_pos_sum += prev_forward * cur_trans_prob
271
272
273
274 if have_transition:
275 return (scale_emission_prob * state_pos_sum)
276 else:
277 return None
278
280 """Calculate the value of the backward recursion
281
282 Arguments:
283
284 o cur_state -- The letter of the state we are calculating the
285 forward variable for.
286
287 o sequence_pos -- The position we are at in the training seq.
288
289 o backward_vars -- The current set of backward variables
290 """
291
292
293 if sequence_pos not in self._s_values:
294 self._s_values[sequence_pos] = \
295 self._calculate_s_value(sequence_pos, backward_vars)
296
297
298 state_pos_sum = 0
299 have_transition = 0
300 for second_state in self._mm.transitions_from(cur_state):
301 have_transition = 1
302
303 seq_letter = self._seq.emissions[sequence_pos + 1]
304 cur_emission_prob = self._mm.emission_prob[(cur_state, seq_letter)]
305
306
307
308 prev_backward = backward_vars[(second_state, sequence_pos + 1)]
309
310
311 cur_transition_prob = self._mm.transition_prob[(cur_state,
312 second_state)]
313
314 state_pos_sum += (cur_emission_prob * prev_backward *
315 cur_transition_prob)
316
317
318 if have_transition:
319 return (state_pos_sum / float(self._s_values[sequence_pos]))
320
321
322 else:
323 return None
324
325
327 """Implement forward and backward algorithms using a log approach.
328
329 This uses the approach of calculating the sum of log probabilities
330 using a lookup table for common values.
331
332 XXX This is not implemented yet!
333 """
334 - def __init__(self, markov_model, sequence):
335 raise NotImplementedError("Haven't coded this yet...")
336