Package Bio :: Package Motif :: Module Thresholds
[hide private]
[frames] | no frames]

Source Code for Module Bio.Motif.Thresholds

 1  # Copyright 2008 by Norbert Dojer.  All rights reserved. 
 2  # Adapted by Bartek Wilczynski. 
 3  # This code is part of the Biopython distribution and governed by its 
 4  # license.  Please see the LICENSE file that should have been included 
 5  # as part of this package. 
 6  """Approximate calculation of appropriate thresholds for motif finding 
 7  """ 
 8  import math 
 9  import random 
10   
11   
12 -class ScoreDistribution(object):
13 """ Class representing approximate score distribution for a given motif. 14 15 Utilizes a dynamic programming approch to calculate the distribution of 16 scores with a predefined precision. Provides a number of methods for calculating 17 thresholds for motif occurences. 18 """
19 - def __init__(self, motif, precision=10 ** 3):
20 self.min_score = min(0.0, motif.min_score()) 21 self.interval = max(0.0, motif.max_score()) - self.min_score 22 self.n_points = precision * motif.length 23 self.step = self.interval / (self.n_points - 1) 24 self.mo_density = [0.0] * self.n_points 25 self.mo_density[-self._index_diff(self.min_score)] = 1.0 26 self.bg_density = [0.0] * self.n_points 27 self.bg_density[-self._index_diff(self.min_score)] = 1.0 28 self.ic = motif.ic() 29 for lo, mo in zip(motif.log_odds(), motif.pwm()): 30 self.modify(lo, mo, motif.background)
31
32 - def _index_diff(self, x, y=0.0):
33 return int((x - y + 0.5 * self.step) // self.step)
34
35 - def _add(self, i, j):
36 return max(0, min(self.n_points - 1, i + j))
37
38 - def modify(self, scores, mo_probs, bg_probs):
39 mo_new = [0.0] * self.n_points 40 bg_new = [0.0] * self.n_points 41 for k, v in scores.items(): 42 d = self._index_diff(v) 43 for i in range(self.n_points): 44 mo_new[self._add(i, d)] += self.mo_density[i] * mo_probs[k] 45 bg_new[self._add(i, d)] += self.bg_density[i] * bg_probs[k] 46 self.mo_density = mo_new 47 self.bg_density = bg_new
48
49 - def threshold_fpr(self, fpr):
50 """ 51 Approximate the log-odds threshold which makes the type I error (false positive rate). 52 """ 53 i = self.n_points 54 prob = 0.0 55 while prob < fpr: 56 i -= 1 57 prob += self.bg_density[i] 58 return self.min_score + i * self.step
59
60 - def threshold_fnr(self, fnr):
61 """ 62 Approximate the log-odds threshold which makes the type II error (false negative rate). 63 """ 64 i = -1 65 prob = 0.0 66 while prob < fnr: 67 i += 1 68 prob += self.mo_density[i] 69 return self.min_score + i * self.step
70
71 - def threshold_balanced(self, rate_proportion=1.0, return_rate=False):
72 """ 73 Approximate the log-odds threshold which makes FNR equal to FPR times rate_proportion 74 """ 75 i = self.n_points 76 fpr = 0.0 77 fnr = 1.0 78 while fpr * rate_proportion < fnr: 79 i -= 1 80 fpr += self.bg_density[i] 81 fnr -= self.mo_density[i] 82 if return_rate: 83 return self.min_score + i * self.step, fpr 84 else: 85 return self.min_score + i * self.step
86
87 - def threshold_patser(self):
88 """Threshold selection mimicking the behaviour of patser (Hertz, Stormo 1999) software. 89 90 It selects such a threshold that the log(fpr)=-ic(M) 91 note: the actual patser software uses natural logarithms instead of log_2, so the numbers 92 are not directly comparable. 93 """ 94 return self.threshold_fpr(fpr=2 ** -self.ic)
95