Package Bio :: Package Motif :: Module Thresholds
[hide private]
[frames] | no frames]

Source Code for Module Bio.Motif.Thresholds

 1  # Copyright 2008 by Norbert Dojer.  All rights reserved. 
 2  # Adapted by Bartek Wilczynski. 
 3  # This code is part of the Biopython distribution and governed by its 
 4  # license.  Please see the LICENSE file that should have been included 
 5  # as part of this package. 
 6  """Approximate calculation of appropriate thresholds for motif finding 
 7  """ 
 8  import math, random 
 9   
10 -class ScoreDistribution(object):
11 """ Class representing approximate score distribution for a given motif. 12 13 Utilizes a dynamic programming approch to calculate the distribution of 14 scores with a predefined precision. Provides a number of methods for calculating 15 thresholds for motif occurences. 16 """
17 - def __init__(self,motif,precision=10**3):
18 self.min_score=min(0.0, motif.min_score()) 19 self.interval=max(0.0, motif.max_score())-self.min_score 20 self.n_points=precision*motif.length 21 self.step=self.interval/(self.n_points-1) 22 self.mo_density=[0.0]*self.n_points 23 self.mo_density[-self._index_diff(self.min_score)]=1.0 24 self.bg_density=[0.0]*self.n_points 25 self.bg_density[-self._index_diff(self.min_score)]=1.0 26 self.ic=motif.ic() 27 for lo, mo in zip(motif.log_odds(), motif.pwm()): 28 self.modify(lo, mo, motif.background)
29
30 - def _index_diff(self,x,y=0.0):
31 return int((x-y+0.5*self.step)//self.step)
32
33 - def _add(self, i, j):
34 return max(0, min(self.n_points-1, i+j))
35
36 - def modify(self, scores, mo_probs, bg_probs):
37 mo_new=[0.0]*self.n_points 38 bg_new=[0.0]*self.n_points 39 for k, v in scores.items(): 40 d=self._index_diff(v) 41 for i in range(self.n_points): 42 mo_new[self._add(i, d)]+=self.mo_density[i]*mo_probs[k] 43 bg_new[self._add(i, d)]+=self.bg_density[i]*bg_probs[k] 44 self.mo_density=mo_new 45 self.bg_density=bg_new
46
47 - def threshold_fpr(self, fpr):
48 """ 49 Approximate the log-odds threshold which makes the type I error (false positive rate). 50 """ 51 i=self.n_points 52 prob=0.0 53 while prob<fpr: 54 i-=1 55 prob+=self.bg_density[i] 56 return self.min_score+i*self.step
57
58 - def threshold_fnr(self, fnr):
59 """ 60 Approximate the log-odds threshold which makes the type II error (false negative rate). 61 """ 62 i=-1 63 prob=0.0 64 while prob<fnr: 65 i+=1 66 prob+=self.mo_density[i] 67 return self.min_score+i*self.step
68
69 - def threshold_balanced(self,rate_proportion=1.0,return_rate=False):
70 """ 71 Approximate the log-odds threshold which makes FNR equal to FPR times rate_proportion 72 """ 73 i=self.n_points 74 fpr=0.0 75 fnr=1.0 76 while fpr*rate_proportion<fnr: 77 i-=1 78 fpr+=self.bg_density[i] 79 fnr-=self.mo_density[i] 80 if return_rate: 81 return self.min_score+i*self.step, fpr 82 else: 83 return self.min_score+i*self.step
84
85 - def threshold_patser(self):
86 """Threshold selection mimicking the behaviour of patser (Hertz, Stormo 1999) software. 87 88 It selects such a threshold that the log(fpr)=-ic(M) 89 note: the actual patser software uses natural logarithms instead of log_2, so the numbers 90 are not directly comparable. 91 """ 92 return self.threshold_fpr(fpr=2**-self.ic)
93