Package Bio :: Package SubsMat :: Module FreqTable
[hide private]
[frames] | no frames]

Source Code for Module Bio.SubsMat.FreqTable

  1  # This code is part of the Biopython distribution and governed by its 
  2  # license.  Please see the LICENSE file that should have been included 
  3  # as part of this package. 
  4  # 
  5   
  6  from Bio import Alphabet 
  7  COUNT = 1 
  8  FREQ = 2 
  9  ################################################################## 
 10  # A class to handle frequency tables 
 11  # Copyright Iddo Friedberg idoerg@cc.huji.ac.il 
 12  # Biopython (http://biopython.org) license applies 
 13  # Methods to read a letter frequency or a letter count file: 
 14  # Example files for a DNA alphabet: 
 15  # 
 16  # A count file (whitespace separated): 
 17  # 
 18  # A  50 
 19  # C  37 
 20  # G  23 
 21  # T  58 
 22  # 
 23  # The same info as a frequency file: 
 24  # 
 25  # A 0.2976 
 26  # C 0.2202 
 27  # G 0.1369 
 28  # T 0.3452 
 29  # 
 30  # Functions: 
 31  #   read_count(f): read a count file from stream f. Then convert to 
 32  #   frequencies 
 33  #   read_freq(f): read a frequency data file from stream f. Of course, we then 
 34  #   don't have the counts, but it is usually the letter frquencies which are 
 35  #   interesting. 
 36  # 
 37  # Methods: 
 38  #   (all internal) 
 39  # Attributes: 
 40  #   alphabet: The IUPAC alphabet set (or any other) whose letters you are 
 41  #   using. Common sets are: IUPAC.protein (20-letter protein), 
 42  #   IUPAC.unambiguous_dna (4-letter DNA). See Bio/alphabet for more. 
 43  #   data: frequency dictionary. 
 44  #   count: count dictionary. Empty if no counts are provided. 
 45  # 
 46  # Example of use: 
 47  #   >>> from SubsMat import FreqTable 
 48  #   >>> ftab = FreqTable.FreqTable(my_frequency_dictionary,FreqTable.FREQ) 
 49  #   >>> ftab = FreqTable.FreqTable(my_count_dictionary,FreqTable.COUNT) 
 50  #   >>> ftab = FreqTable.read_count(open('myDNACountFile')) 
 51  # 
 52  # 
 53  ################################################################## 
 54   
 55   
56 -class FreqTable(dict):
57
58 - def _freq_from_count(self):
59 total = float(sum(self.count.values())) 60 for i, v in self.count.items(): 61 self[i] = v / total
62
63 - def _alphabet_from_input(self):
64 s = '' 65 for i in sorted(self): 66 s += i 67 return s
68
69 - def __init__(self, in_dict, dict_type, alphabet=None):
70 self.alphabet = alphabet 71 if dict_type == COUNT: 72 self.count = in_dict 73 self._freq_from_count() 74 elif dict_type == FREQ: 75 self.count = {} 76 self.update(in_dict) 77 else: 78 raise ValueError("bad dict_type") 79 if not alphabet: 80 self.alphabet = Alphabet.Alphabet() 81 self.alphabet.letters = self._alphabet_from_input()
82 83
84 -def read_count(f):
85 count = {} 86 for line in f: 87 key, value = line.strip().split() 88 count[key] = int(value) 89 freq_table = FreqTable(count, COUNT) 90 return freq_table
91 92
93 -def read_freq(f):
94 freq_dict = {} 95 for line in f: 96 key, value = line.strip().split() 97 freq_dict[key] = float(value) 98 return FreqTable(freq_dict, FREQ)
99