1 """Dynamic Programming algorithms for general usage.
2
3 This module contains classes which implement Dynamic Programming
4 algorithms that can be used generally.
5 """
6
7
9 """An abstract class to calculate forward and backward probabilities.
10
11 This class should not be instantiated directly, but should be used
12 through a derived class which implements proper scaling of variables.
13
14 This class is just meant to encapsulate the basic forward and backward
15 algorithms, and allow derived classes to deal with the problems of
16 multiplying probabilities.
17
18 Derived class of this must implement:
19
20 o _forward_recursion -- Calculate the forward values in the recursion
21 using some kind of technique for preventing underflow errors.
22
23 o _backward_recursion -- Calculate the backward values in the recursion
24 step using some technique to prevent underflow errors.
25 """
26 - def __init__(self, markov_model, sequence):
27 """Initialize to calculate forward and backward probabilities.
28
29 Arguments:
30
31 o markov_model -- The current Markov model we are working with.
32
33 o sequence -- A training sequence containing a set of emissions.
34 """
35 self._mm = markov_model
36 self._seq = sequence
37
39 """Calculate the forward recursion value.
40 """
41 raise NotImplementedError("Subclasses must implement")
42
44 """Calculate sequence probability using the forward algorithm.
45
46 This implements the forward algorithm, as described on p57-58 of
47 Durbin et al.
48
49 Returns:
50
51 o A dictionary containing the forward variables. This has keys of the
52 form (state letter, position in the training sequence), and values
53 containing the calculated forward variable.
54
55 o The calculated probability of the sequence.
56 """
57
58 state_letters = self._seq.states.alphabet.letters
59
60
61
62
63
64
65
66 forward_var = {}
67
68 forward_var[(state_letters[0], -1)] = 1
69
70 for k in range(1, len(state_letters)):
71 forward_var[(state_letters[k], -1)] = 0
72
73
74
75
76 for i in range(len(self._seq.emissions)):
77
78 for main_state in state_letters:
79
80
81 forward_value = self._forward_recursion(main_state, i,
82 forward_var)
83
84 if forward_value is not None:
85 forward_var[(main_state, i)] = forward_value
86
87
88 first_state = state_letters[0]
89 seq_prob = 0
90
91 for state_item in state_letters:
92
93 forward_value = forward_var[(state_item,
94 len(self._seq.emissions) - 1)]
95
96 transition_value = self._mm.transition_prob[(state_item,
97 first_state)]
98
99 seq_prob += forward_value * transition_value
100
101 return forward_var, seq_prob
102
104 """Calculate the backward recursion value.
105 """
106 raise NotImplementedError("Subclasses must implement")
107
109 """Calculate sequence probability using the backward algorithm.
110
111 This implements the backward algorithm, as described on p58-59 of
112 Durbin et al.
113
114 Returns:
115
116 o A dictionary containing the backwards variables. This has keys
117 of the form (state letter, position in the training sequence),
118 and values containing the calculated backward variable.
119 """
120
121 state_letters = self._seq.states.alphabet.letters
122
123
124
125
126
127
128
129 backward_var = {}
130
131 first_letter = state_letters[0]
132
133 for state in state_letters:
134 backward_var[(state, len(self._seq.emissions) - 1)] = \
135 self._mm.transition_prob[(state, state_letters[0])]
136
137
138
139
140 all_indexes = range(len(self._seq.emissions) - 1)
141 all_indexes.reverse()
142 for i in all_indexes:
143
144 for main_state in state_letters:
145
146
147 backward_value = self._backward_recursion(main_state, i,
148 backward_var)
149
150 if backward_value is not None:
151 backward_var[(main_state, i)] = backward_value
152
153
154
155
156 return backward_var
157
158
160 """Implement forward and backward algorithms using a rescaling approach.
161
162 This scales the f and b variables, so that they remain within a
163 manageable numerical interval during calculations. This approach is
164 described in Durbin et al. on p 78.
165
166 This approach is a little more straightforward then log transformation
167 but may still give underflow errors for some types of models. In these
168 cases, the LogDPAlgorithms class should be used.
169 """
170 - def __init__(self, markov_model, sequence):
171 """Initialize the scaled approach to calculating probabilities.
172 Arguments:
173
174 o markov_model -- The current Markov model we are working with.
175
176 o sequence -- A TrainingSequence object that must have a
177 set of emissions to work with.
178 """
179 AbstractDPAlgorithms.__init__(self, markov_model, sequence)
180
181 self._s_values = {}
182
184 """Calculate the next scaling variable for a sequence position.
185
186 This utilizes the approach of choosing s values such that the
187 sum of all of the scaled f values is equal to 1.
188
189 Arguments:
190
191 o seq_pos -- The current position we are at in the sequence.
192
193 o previous_vars -- All of the forward or backward variables
194 calculated so far.
195
196 Returns:
197
198 o The calculated scaling variable for the sequence item.
199 """
200
201 state_letters = self._seq.states.alphabet.letters
202
203
204 s_value = 0
205 for main_state in state_letters:
206 emission = self._mm.emission_prob[(main_state,
207 self._seq.emissions[seq_pos])]
208
209
210 trans_and_var_sum = 0
211 for second_state in self._mm.transitions_from(main_state):
212
213 var_value = previous_vars[(second_state, seq_pos - 1)]
214
215
216 trans_value = self._mm.transition_prob[(second_state,
217 main_state)]
218
219 trans_and_var_sum += (var_value * trans_value)
220
221 s_value += (emission * trans_and_var_sum)
222
223 return s_value
224
226 """Calculate the value of the forward recursion.
227
228 Arguments:
229
230 o cur_state -- The letter of the state we are calculating the
231 forward variable for.
232
233 o sequence_pos -- The position we are at in the training seq.
234
235 o forward_vars -- The current set of forward variables
236 """
237
238
239 if sequence_pos not in self._s_values:
240 self._s_values[sequence_pos] = \
241 self._calculate_s_value(sequence_pos, forward_vars)
242
243
244 seq_letter = self._seq.emissions[sequence_pos]
245 cur_emission_prob = self._mm.emission_prob[(cur_state, seq_letter)]
246
247 scale_emission_prob = (float(cur_emission_prob) /
248 float(self._s_values[sequence_pos]))
249
250
251 state_pos_sum = 0
252 have_transition = 0
253 for second_state in self._mm.transitions_from(cur_state):
254 have_transition = 1
255
256
257
258 prev_forward = forward_vars[(second_state, sequence_pos - 1)]
259
260
261 cur_trans_prob = self._mm.transition_prob[(second_state,
262 cur_state)]
263 state_pos_sum += prev_forward * cur_trans_prob
264
265
266
267 if have_transition:
268 return (scale_emission_prob * state_pos_sum)
269 else:
270 return None
271
273 """Calculate the value of the backward recursion
274
275 Arguments:
276
277 o cur_state -- The letter of the state we are calculating the
278 forward variable for.
279
280 o sequence_pos -- The position we are at in the training seq.
281
282 o backward_vars -- The current set of backward variables
283 """
284
285
286 if sequence_pos not in self._s_values:
287 self._s_values[sequence_pos] = \
288 self._calculate_s_value(sequence_pos, backward_vars)
289
290
291 state_pos_sum = 0
292 have_transition = 0
293 for second_state in self._mm.transitions_from(cur_state):
294 have_transition = 1
295
296 seq_letter = self._seq.emissions[sequence_pos + 1]
297 cur_emission_prob = self._mm.emission_prob[(cur_state, seq_letter)]
298
299
300
301 prev_backward = backward_vars[(second_state, sequence_pos + 1)]
302
303
304 cur_transition_prob = self._mm.transition_prob[(cur_state,
305 second_state)]
306
307 state_pos_sum += (cur_emission_prob * prev_backward *
308 cur_transition_prob)
309
310
311 if have_transition:
312 return (state_pos_sum / float(self._s_values[sequence_pos]))
313
314
315 else:
316 return None
317
318
320 """Implement forward and backward algorithms using a log approach.
321
322 This uses the approach of calculating the sum of log probabilities
323 using a lookup table for common values.
324
325 XXX This is not implemented yet!
326 """
327 - def __init__(self, markov_model, sequence):
328 raise NotImplementedError("Haven't coded this yet...")
329