Package Bio :: Package Motif :: Module Thresholds
[hide private]
[frames] | no frames]

Source Code for Module Bio.Motif.Thresholds

 1  # Copyright 2008 by Norbert Dojer.  All rights reserved. 
 2  # Adapted by Bartek Wilczynski. 
 3  # This code is part of the Biopython distribution and governed by its 
 4  # license.  Please see the LICENSE file that should have been included 
 5  # as part of this package. 
 6  """Approximate calculation of appropriate thresholds for motif finding 
 7  """ 
 8  import math 
 9  import random 
10   
11   
12 -class ScoreDistribution(object):
13 """ Class representing approximate score distribution for a given motif. 14 15 Utilizes a dynamic programming approch to calculate the distribution of 16 scores with a predefined precision. Provides a number of methods for calculating 17 thresholds for motif occurences. 18 """
19 - def __init__(self, motif, precision=10**3):
20 self.min_score=min(0.0, motif.min_score()) 21 self.interval=max(0.0, motif.max_score())-self.min_score 22 self.n_points=precision*motif.length 23 self.step=self.interval/(self.n_points-1) 24 self.mo_density=[0.0]*self.n_points 25 self.mo_density[-self._index_diff(self.min_score)]=1.0 26 self.bg_density=[0.0]*self.n_points 27 self.bg_density[-self._index_diff(self.min_score)]=1.0 28 self.ic=motif.ic() 29 for lo, mo in zip(motif.log_odds(), motif.pwm()): 30 self.modify(lo, mo, motif.background)
31
32 - def _index_diff(self, x, y=0.0):
33 return int((x-y+0.5*self.step)//self.step)
34
35 - def _add(self, i, j):
36 return max(0, min(self.n_points-1, i+j))
37
38 - def modify(self, scores, mo_probs, bg_probs):
39 mo_new=[0.0]*self.n_points 40 bg_new=[0.0]*self.n_points 41 for k, v in scores.items(): 42 d=self._index_diff(v) 43 for i in range(self.n_points): 44 mo_new[self._add(i, d)]+=self.mo_density[i]*mo_probs[k] 45 bg_new[self._add(i, d)]+=self.bg_density[i]*bg_probs[k] 46 self.mo_density=mo_new 47 self.bg_density=bg_new
48
49 - def threshold_fpr(self, fpr):
50 """ 51 Approximate the log-odds threshold which makes the type I error (false positive rate). 52 """ 53 i=self.n_points 54 prob=0.0 55 while prob<fpr: 56 i-=1 57 prob+=self.bg_density[i] 58 return self.min_score+i*self.step
59
60 - def threshold_fnr(self, fnr):
61 """ 62 Approximate the log-odds threshold which makes the type II error (false negative rate). 63 """ 64 i=-1 65 prob=0.0 66 while prob<fnr: 67 i+=1 68 prob+=self.mo_density[i] 69 return self.min_score+i*self.step
70
71 - def threshold_balanced(self, rate_proportion=1.0, return_rate=False):
72 """ 73 Approximate the log-odds threshold which makes FNR equal to FPR times rate_proportion 74 """ 75 i=self.n_points 76 fpr=0.0 77 fnr=1.0 78 while fpr*rate_proportion<fnr: 79 i-=1 80 fpr+=self.bg_density[i] 81 fnr-=self.mo_density[i] 82 if return_rate: 83 return self.min_score+i*self.step, fpr 84 else: 85 return self.min_score+i*self.step
86
87 - def threshold_patser(self):
88 """Threshold selection mimicking the behaviour of patser (Hertz, Stormo 1999) software. 89 90 It selects such a threshold that the log(fpr)=-ic(M) 91 note: the actual patser software uses natural logarithms instead of log_2, so the numbers 92 are not directly comparable. 93 """ 94 return self.threshold_fpr(fpr=2**-self.ic)
95