1
2
3
4
5
6 """
7 Maximum Entropy code.
8
9 Uses Improved Iterative Scaling:
10 XXX ref
11
12 # XXX need to define terminology
13
14 """
15
16 import numpy
17
18
20 """Holds information for a Maximum Entropy classifier.
21
22 Members:
23 classes List of the possible classes of data.
24 alphas List of the weights for each feature.
25 feature_fns List of the feature functions.
26
27 """
29 self.classes = []
30 self.alphas = []
31 self.feature_fns = []
32
33
35 """calculate(me, observation) -> list of log probs
36
37 Calculate the log of the probability for each class. me is a
38 MaxEntropy object that has been trained. observation is a vector
39 representing the observed data. The return value is a list of
40 unnormalized log probabilities for each class.
41
42 """
43 scores = []
44 assert len(me.feature_fns) == len(me.alphas)
45 for klass in me.classes:
46 lprob = 0.0
47 for fn, alpha in zip(me.feature_fns, me.alphas):
48 lprob += fn(observation, klass) * alpha
49 scores.append(lprob)
50 return scores
51
52
54 """classify(me, observation) -> class
55
56 Classify an observation into a class.
57
58 """
59 scores = calculate(me, observation)
60 max_score, klass = scores[0], me.classes[0]
61 for i in range(1, len(scores)):
62 if scores[i] > max_score:
63 max_score, klass = scores[i], me.classes[i]
64 return klass
65
66
68 """_eval_feature_fn(fn, xs, classes) -> dict of values
69
70 Evaluate a feature function on every instance of the training set
71 and class. fn is a callback function that takes two parameters: a
72 training instance and a class. Return a dictionary of (training
73 set index, class index) -> non-zero value. Values of 0 are not
74 stored in the dictionary.
75
76 """
77 values = {}
78 for i in range(len(xs)):
79 for j in range(len(classes)):
80 f = fn(xs[i], classes[j])
81 if f != 0:
82 values[(i, j)] = f
83 return values
84
85
87 """_calc_empirical_expects(xs, ys, classes, features) -> list of expectations
88
89 Calculate the expectation of each function from the data. This is
90 the constraint for the maximum entropy distribution. Return a
91 list of expectations, parallel to the list of features.
92
93 """
94
95
96 class2index = {}
97 for index, key in enumerate(classes):
98 class2index[key] = index
99 ys_i = [class2index[y] for y in ys]
100
101 expect = []
102 N = len(xs)
103 for feature in features:
104 s = 0
105 for i in range(N):
106 s += feature.get((i, ys_i[i]), 0)
107 expect.append(float(s) / N)
108 return expect
109
110
112 """_calc_model_expects(xs, classes, features, alphas) -> list of expectations.
113
114 Calculate the expectation of each feature from the model. This is
115 not used in maximum entropy training, but provides a good function
116 for debugging.
117
118 """
119
120
121 p_yx = _calc_p_class_given_x(xs, classes, features, alphas)
122
123 expects = []
124 for feature in features:
125 sum = 0.0
126 for (i, j), f in feature.iteritems():
127 sum += p_yx[i][j] * f
128 expects.append(sum/len(xs))
129 return expects
130
131
133 """_calc_p_class_given_x(xs, classes, features, alphas) -> matrix
134
135 Calculate P(y|x), where y is the class and x is an instance from
136 the training set. Return a XSxCLASSES matrix of probabilities.
137
138 """
139 prob_yx = numpy.zeros((len(xs), len(classes)))
140
141
142 assert len(features) == len(alphas)
143 for feature, alpha in zip(features, alphas):
144 for (x, y), f in feature.iteritems():
145 prob_yx[x][y] += alpha * f
146
147 prob_yx = numpy.exp(prob_yx)
148
149 for i in range(len(xs)):
150 z = sum(prob_yx[i])
151 prob_yx[i] = prob_yx[i] / z
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166 return prob_yx
167
168
170 """_calc_f_sharp(N, nclasses, features) -> matrix of f sharp values."""
171
172 f_sharp = numpy.zeros((N, nclasses))
173 for feature in features:
174 for (i, j), f in feature.iteritems():
175 f_sharp[i][j] += f
176 return f_sharp
177
178
179 -def _iis_solve_delta(N, feature, f_sharp, empirical, prob_yx,
180 max_newton_iterations, newton_converge):
181
182
183 delta = 0.0
184 iters = 0
185 while iters < max_newton_iterations:
186 f_newton = df_newton = 0.0
187 for (i, j), f in feature.iteritems():
188 prod = prob_yx[i][j] * f * numpy.exp(delta * f_sharp[i][j])
189 f_newton += prod
190 df_newton += prod * f_sharp[i][j]
191 f_newton, df_newton = empirical - f_newton / N, -df_newton / N
192
193 ratio = f_newton / df_newton
194 delta -= ratio
195 if numpy.fabs(ratio) < newton_converge:
196 break
197 iters = iters + 1
198 else:
199 raise RuntimeError("Newton's method did not converge")
200 return delta
201
202
203 -def _train_iis(xs, classes, features, f_sharp, alphas, e_empirical,
204 max_newton_iterations, newton_converge):
205 """Do one iteration of hill climbing to find better alphas (PRIVATE)."""
206
207
208
209 p_yx = _calc_p_class_given_x(xs, classes, features, alphas)
210
211 N = len(xs)
212 newalphas = alphas[:]
213 for i in range(len(alphas)):
214 delta = _iis_solve_delta(N, features[i], f_sharp, e_empirical[i], p_yx,
215 max_newton_iterations, newton_converge)
216 newalphas[i] += delta
217 return newalphas
218
219
220 -def train(training_set, results, feature_fns, update_fn=None,
221 max_iis_iterations=10000, iis_converge=1.0e-5,
222 max_newton_iterations=100, newton_converge=1.0e-10):
223 """Train a maximum entropy classifier, returns MaxEntropy object.
224
225 Train a maximum entropy classifier on a training set.
226 training_set is a list of observations. results is a list of the
227 class assignments for each observation. feature_fns is a list of
228 the features. These are callback functions that take an
229 observation and class and return a 1 or 0. update_fn is a
230 callback function that is called at each training iteration. It is
231 passed a MaxEntropy object that encapsulates the current state of
232 the training.
233
234 The maximum number of iterations and the convergence criterion for IIS
235 are given by max_iis_iterations and iis_converge, respectively, while
236 max_newton_iterations and newton_converge are the maximum number
237 of iterations and the convergence criterion for Newton's method.
238 """
239 if not training_set:
240 raise ValueError("No data in the training set.")
241 if len(training_set) != len(results):
242 raise ValueError("training_set and results should be parallel lists.")
243
244
245 xs, ys = training_set, results
246
247
248 classes = sorted(set(results))
249
250
251 features = [_eval_feature_fn(fn, training_set, classes)
252 for fn in feature_fns]
253
254 f_sharp = _calc_f_sharp(len(training_set), len(classes), features)
255
256
257 e_empirical = _calc_empirical_expects(xs, ys, classes, features)
258
259
260 alphas = [0.0] * len(features)
261 iters = 0
262 while iters < max_iis_iterations:
263 nalphas = _train_iis(xs, classes, features, f_sharp,
264 alphas, e_empirical,
265 max_newton_iterations, newton_converge)
266 diff = map(lambda x, y: numpy.fabs(x-y), alphas, nalphas)
267 diff = reduce(lambda x, y: x+y, diff, 0)
268 alphas = nalphas
269
270 me = MaxEntropy()
271 me.alphas, me.classes, me.feature_fns = alphas, classes, feature_fns
272 if update_fn is not None:
273 update_fn(me)
274
275 if diff < iis_converge:
276 break
277 else:
278 raise RuntimeError("IIS did not converge")
279
280 return me
281
282
283 if __name__ == "__main__":
284
285
286
287 xcar=[
288 ['Red', 'Sports', 'Domestic'],
289 ['Red', 'Sports', 'Domestic'],
290 ['Red', 'Sports', 'Domestic'],
291 ['Yellow', 'Sports', 'Domestic'],
292 ['Yellow', 'Sports', 'Imported'],
293 ['Yellow', 'SUV', 'Imported'],
294 ['Yellow', 'SUV', 'Imported'],
295 ['Yellow', 'SUV', 'Domestic'],
296 ['Red', 'SUV', 'Imported'],
297 ['Red', 'Sports', 'Imported']
298 ]
299
300 ycar=[
301 'Yes',
302 'No',
303 'Yes',
304 'No',
305 'Yes',
306 'No',
307 'Yes',
308 'No',
309 'No',
310 'Yes'
311 ]
312
313
315 if ts[0] =='Red':
316 return 0
317 else:
318 return 1
319
321 if ts[1] =='Sports':
322 return 0
323 else:
324 return 1
325
327 if ts[2] =='Domestic':
328 return 0
329 else:
330 return 1
331
332 user_functions=[udf1, udf2, udf3]
333
334 xe=train(xcar, ycar, user_functions)
335 for xv, yv in zip(xcar, ycar):
336 xc=classify(xe, xv)
337 print 'Pred:', xv, 'gives', xc, 'y is', yv
338