Package Bio :: Package HMM :: Module DynamicProgramming
[hide private]
[frames] | no frames]

Source Code for Module Bio.HMM.DynamicProgramming

  1  """Dynamic Programming algorithms for general usage. 
  2   
  3  This module contains classes which implement Dynamic Programming 
  4  algorithms that can be used generally. 
  5  """ 
  6   
  7   
8 -class AbstractDPAlgorithms(object):
9 """An abstract class to calculate forward and backward probabilities. 10 11 This class should not be instantiated directly, but should be used 12 through a derived class which implements proper scaling of variables. 13 14 This class is just meant to encapsulate the basic forward and backward 15 algorithms, and allow derived classes to deal with the problems of 16 multiplying probabilities. 17 18 Derived class of this must implement: 19 20 o _forward_recursion -- Calculate the forward values in the recursion 21 using some kind of technique for preventing underflow errors. 22 23 o _backward_recursion -- Calculate the backward values in the recursion 24 step using some technique to prevent underflow errors. 25 """
26 - def __init__(self, markov_model, sequence):
27 """Initialize to calculate forward and backward probabilities. 28 29 Arguments: 30 31 o markov_model -- The current Markov model we are working with. 32 33 o sequence -- A training sequence containing a set of emissions. 34 """ 35 self._mm = markov_model 36 self._seq = sequence
37
38 - def _forward_recursion(self, cur_state, sequence_pos, forward_vars):
39 """Calculate the forward recursion value. 40 """ 41 raise NotImplementedError("Subclasses must implement")
42
43 - def forward_algorithm(self):
44 """Calculate sequence probability using the forward algorithm. 45 46 This implements the forward algorithm, as described on p57-58 of 47 Durbin et al. 48 49 Returns: 50 51 o A dictionary containing the forward variables. This has keys of the 52 form (state letter, position in the training sequence), and values 53 containing the calculated forward variable. 54 55 o The calculated probability of the sequence. 56 """ 57 # all of the different letters that the state path can be in 58 state_letters = self._seq.states.alphabet.letters 59 60 # -- initialize the algorithm 61 # 62 # NOTE: My index numbers are one less than what is given in Durbin 63 # et al, since we are indexing the sequence going from 0 to 64 # (Length - 1) not 1 to Length, like in Durbin et al. 65 # 66 forward_var = {} 67 # f_{0}(0) = 1 68 forward_var[(state_letters[0], -1)] = 1 69 # f_{k}(0) = 0, for k > 0 70 for k in range(1, len(state_letters)): 71 forward_var[(state_letters[k], -1)] = 0 72 73 # -- now do the recursion step 74 # loop over the training sequence 75 # Recursion step: (i = 1 .. L) 76 for i in range(len(self._seq.emissions)): 77 # now loop over the letters in the state path 78 for main_state in state_letters: 79 # calculate the forward value using the appropriate 80 # method to prevent underflow errors 81 forward_value = self._forward_recursion(main_state, i, 82 forward_var) 83 84 if forward_value is not None: 85 forward_var[(main_state, i)] = forward_value 86 87 # -- termination step - calculate the probability of the sequence 88 first_state = state_letters[0] 89 seq_prob = 0 90 91 for state_item in state_letters: 92 # f_{k}(L) 93 forward_value = forward_var[(state_item, 94 len(self._seq.emissions) - 1)] 95 # a_{k0} 96 transition_value = self._mm.transition_prob[(state_item, 97 first_state)] 98 99 seq_prob += forward_value * transition_value 100 101 return forward_var, seq_prob
102
103 - def _backward_recursion(self, cur_state, sequence_pos, forward_vars):
104 """Calculate the backward recursion value. 105 """ 106 raise NotImplementedError("Subclasses must implement")
107
108 - def backward_algorithm(self):
109 """Calculate sequence probability using the backward algorithm. 110 111 This implements the backward algorithm, as described on p58-59 of 112 Durbin et al. 113 114 Returns: 115 116 o A dictionary containing the backwards variables. This has keys 117 of the form (state letter, position in the training sequence), 118 and values containing the calculated backward variable. 119 """ 120 # all of the different letters that the state path can be in 121 state_letters = self._seq.states.alphabet.letters 122 123 # -- initialize the algorithm 124 # 125 # NOTE: My index numbers are one less than what is given in Durbin 126 # et al, since we are indexing the sequence going from 0 to 127 # (Length - 1) not 1 to Length, like in Durbin et al. 128 # 129 backward_var = {} 130 131 first_letter = state_letters[0] 132 # b_{k}(L) = a_{k0} for all k 133 for state in state_letters: 134 backward_var[(state, len(self._seq.emissions) - 1)] = \ 135 self._mm.transition_prob[(state, state_letters[0])] 136 137 # -- recursion 138 # first loop over the training sequence backwards 139 # Recursion step: (i = L - 1 ... 1) 140 all_indexes = range(len(self._seq.emissions) - 1) 141 all_indexes.reverse() 142 for i in all_indexes: 143 # now loop over the letters in the state path 144 for main_state in state_letters: 145 # calculate the backward value using the appropriate 146 # method to prevent underflow errors 147 backward_value = self._backward_recursion(main_state, i, 148 backward_var) 149 150 if backward_value is not None: 151 backward_var[(main_state, i)] = backward_value 152 153 # skip the termination step to avoid recalculations -- you should 154 # get sequence probabilities using the forward algorithm 155 156 return backward_var
157 158
159 -class ScaledDPAlgorithms(AbstractDPAlgorithms):
160 """Implement forward and backward algorithms using a rescaling approach. 161 162 This scales the f and b variables, so that they remain within a 163 manageable numerical interval during calculations. This approach is 164 described in Durbin et al. on p 78. 165 166 This approach is a little more straightforward then log transformation 167 but may still give underflow errors for some types of models. In these 168 cases, the LogDPAlgorithms class should be used. 169 """
170 - def __init__(self, markov_model, sequence):
171 """Initialize the scaled approach to calculating probabilities. 172 Arguments: 173 174 o markov_model -- The current Markov model we are working with. 175 176 o sequence -- A TrainingSequence object that must have a 177 set of emissions to work with. 178 """ 179 AbstractDPAlgorithms.__init__(self, markov_model, sequence) 180 181 self._s_values = {}
182
183 - def _calculate_s_value(self, seq_pos, previous_vars):
184 """Calculate the next scaling variable for a sequence position. 185 186 This utilizes the approach of choosing s values such that the 187 sum of all of the scaled f values is equal to 1. 188 189 Arguments: 190 191 o seq_pos -- The current position we are at in the sequence. 192 193 o previous_vars -- All of the forward or backward variables 194 calculated so far. 195 196 Returns: 197 198 o The calculated scaling variable for the sequence item. 199 """ 200 # all of the different letters the state can have 201 state_letters = self._seq.states.alphabet.letters 202 203 # loop over all of the possible states 204 s_value = 0 205 for main_state in state_letters: 206 emission = self._mm.emission_prob[(main_state, 207 self._seq.emissions[seq_pos])] 208 209 # now sum over all of the previous vars and transitions 210 trans_and_var_sum = 0 211 for second_state in self._mm.transitions_from(main_state): 212 # the value of the previous f or b value 213 var_value = previous_vars[(second_state, seq_pos - 1)] 214 215 # the transition probability 216 trans_value = self._mm.transition_prob[(second_state, 217 main_state)] 218 219 trans_and_var_sum += (var_value * trans_value) 220 221 s_value += (emission * trans_and_var_sum) 222 223 return s_value
224
225 - def _forward_recursion(self, cur_state, sequence_pos, forward_vars):
226 """Calculate the value of the forward recursion. 227 228 Arguments: 229 230 o cur_state -- The letter of the state we are calculating the 231 forward variable for. 232 233 o sequence_pos -- The position we are at in the training seq. 234 235 o forward_vars -- The current set of forward variables 236 """ 237 # calculate the s value, if we haven't done so already (ie. during 238 # a previous forward or backward recursion) 239 if sequence_pos not in self._s_values: 240 self._s_values[sequence_pos] = \ 241 self._calculate_s_value(sequence_pos, forward_vars) 242 243 # e_{l}(x_{i}) 244 seq_letter = self._seq.emissions[sequence_pos] 245 cur_emission_prob = self._mm.emission_prob[(cur_state, seq_letter)] 246 # divide by the scaling value 247 scale_emission_prob = (float(cur_emission_prob) / 248 float(self._s_values[sequence_pos])) 249 250 # loop over all of the possible states at the position 251 state_pos_sum = 0 252 have_transition = 0 253 for second_state in self._mm.transitions_from(cur_state): 254 have_transition = 1 255 256 # get the previous forward_var values 257 # f_{k}(i - 1) 258 prev_forward = forward_vars[(second_state, sequence_pos - 1)] 259 260 # a_{kl} 261 cur_trans_prob = self._mm.transition_prob[(second_state, 262 cur_state)] 263 state_pos_sum += prev_forward * cur_trans_prob 264 265 # if we have the possiblity of having a transition 266 # return the recursion value 267 if have_transition: 268 return (scale_emission_prob * state_pos_sum) 269 else: 270 return None
271
272 - def _backward_recursion(self, cur_state, sequence_pos, backward_vars):
273 """Calculate the value of the backward recursion 274 275 Arguments: 276 277 o cur_state -- The letter of the state we are calculating the 278 forward variable for. 279 280 o sequence_pos -- The position we are at in the training seq. 281 282 o backward_vars -- The current set of backward variables 283 """ 284 # calculate the s value, if we haven't done so already (ie. during 285 # a previous forward or backward recursion) 286 if sequence_pos not in self._s_values: 287 self._s_values[sequence_pos] = \ 288 self._calculate_s_value(sequence_pos, backward_vars) 289 290 # loop over all of the possible states at the position 291 state_pos_sum = 0 292 have_transition = 0 293 for second_state in self._mm.transitions_from(cur_state): 294 have_transition = 1 295 # e_{l}(x_{i + 1}) 296 seq_letter = self._seq.emissions[sequence_pos + 1] 297 cur_emission_prob = self._mm.emission_prob[(cur_state, seq_letter)] 298 299 # get the previous backward_var value 300 # b_{l}(i + 1) 301 prev_backward = backward_vars[(second_state, sequence_pos + 1)] 302 303 # the transition probability -- a_{kl} 304 cur_transition_prob = self._mm.transition_prob[(cur_state, 305 second_state)] 306 307 state_pos_sum += (cur_emission_prob * prev_backward * 308 cur_transition_prob) 309 310 # if we have a probability for a transition, return it 311 if have_transition: 312 return (state_pos_sum / float(self._s_values[sequence_pos])) 313 # otherwise we have no probability (ie. we can't do this transition) 314 # and return None 315 else: 316 return None
317 318
319 -class LogDPAlgorithms(AbstractDPAlgorithms):
320 """Implement forward and backward algorithms using a log approach. 321 322 This uses the approach of calculating the sum of log probabilities 323 using a lookup table for common values. 324 325 XXX This is not implemented yet! 326 """
327 - def __init__(self, markov_model, sequence):
328 raise NotImplementedError("Haven't coded this yet...")
329