Package Bio :: Package Wise :: Module dnal
[hide private]
[frames] | no frames]

Source Code for Module Bio.Wise.dnal

  1  #!/usr/bin/env python 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  # 
  6  # Bio.Wise contains modules for running and processing the output of 
  7  # some of the models in the Wise2 package by Ewan Birney available from: 
  8  # ftp://ftp.ebi.ac.uk/pub/software/unix/wise2/ 
  9  # http://www.ebi.ac.uk/Wise2/ 
 10  # 
 11  # Bio.Wise.psw is for protein Smith-Waterman alignments 
 12  # Bio.Wise.dnal is for Smith-Waterman DNA alignments 
 13   
 14  from __future__ import print_function 
 15   
 16  import re 
 17   
 18  #Importing with leading underscore as not intended to be exposed 
 19  from Bio._py3k import getoutput as _getoutput 
 20  from Bio._py3k import zip 
 21  from Bio._py3k import map 
 22   
 23  from Bio import Wise 
 24   
 25  _SCORE_MATCH = 4 
 26  _SCORE_MISMATCH = -1 
 27  _SCORE_GAP_START = -5 
 28  _SCORE_GAP_EXTENSION = -1 
 29   
 30  _CMDLINE_DNAL = ["dnal", "-alb", "-nopretty"] 
 31   
 32   
33 -def _build_dnal_cmdline(match, mismatch, gap, extension):
34 res = _CMDLINE_DNAL[:] 35 res.extend(["-match", str(match)]) 36 res.extend(["-mis", str(mismatch)]) 37 res.extend(["-gap", str(-gap)]) # negative: convert score to penalty 38 res.extend(["-ext", str(-extension)]) # negative: convert score to penalty 39 40 return res
41 42 _CMDLINE_FGREP_COUNT = "fgrep -c '%s' %s" 43 44
45 -def _fgrep_count(pattern, file):
46 return int(_getoutput(_CMDLINE_FGREP_COUNT % (pattern, file)))
47 48 _re_alb_line2coords = re.compile(r"^\[([^:]+):[^\[]+\[([^:]+):") 49 50
51 -def _alb_line2coords(line):
52 return tuple([int(coord)+1 # one-based -> zero-based 53 for coord 54 in _re_alb_line2coords.match(line).groups()])
55 56
57 -def _get_coords(filename):
58 alb = file(filename) 59 60 start_line = None 61 end_line = None 62 63 for line in alb: 64 if line.startswith("["): 65 if not start_line: 66 start_line = line # rstrip not needed 67 else: 68 end_line = line 69 70 if end_line is None: # sequence is too short 71 return [(0, 0), (0, 0)] 72 73 return list(zip(*map(_alb_line2coords, [start_line, end_line]))) # returns [(start0, end0), (start1, end1)]
74 75
76 -class Statistics(object):
77 """ 78 Calculate statistics from an ALB report 79 """
80 - def __init__(self, filename, match, mismatch, gap, extension):
81 self.matches = _fgrep_count('"SEQUENCE" %s' % match, filename) 82 self.mismatches = _fgrep_count('"SEQUENCE" %s' % mismatch, filename) 83 self.gaps = _fgrep_count('"INSERT" %s' % gap, filename) 84 85 if gap == extension: 86 self.extensions = 0 87 else: 88 self.extensions = _fgrep_count('"INSERT" %s' % extension, filename) 89 90 self.score = (match*self.matches + 91 mismatch*self.mismatches + 92 gap*self.gaps + 93 extension*self.extensions) 94 95 if self.matches or self.mismatches or self.gaps or self.extensions: 96 self.coords = _get_coords(filename) 97 else: 98 self.coords = [(0, 0), (0, 0)]
99
100 - def identity_fraction(self):
101 return self.matches/(self.matches+self.mismatches)
102 103 header = "identity_fraction\tmatches\tmismatches\tgaps\textensions" 104
105 - def __str__(self):
106 return "\t".join(str(x) for x in (self.identity_fraction(), 107 self.matches, self.mismatches, 108 self.gaps, self.extensions))
109 110
111 -def align(pair, match=_SCORE_MATCH, mismatch=_SCORE_MISMATCH, gap=_SCORE_GAP_START, extension=_SCORE_GAP_EXTENSION, **keywds):
112 cmdline = _build_dnal_cmdline(match, mismatch, gap, extension) 113 temp_file = Wise.align(cmdline, pair, **keywds) 114 try: 115 return Statistics(temp_file.name, match, mismatch, gap, extension) 116 except AttributeError: 117 try: 118 keywds['dry_run'] 119 return None 120 except KeyError: 121 raise
122 123
124 -def main():
125 import sys 126 stats = align(sys.argv[1:3]) 127 print("\n".join("%s: %s" % (attr, getattr(stats, attr)) 128 for attr in ("matches", "mismatches", "gaps", "extensions"))) 129 print("identity_fraction: %s" % stats.identity_fraction()) 130 print("coords: %s" % stats.coords)
131 132
133 -def _test(*args, **keywds):
134 import doctest 135 import sys 136 doctest.testmod(sys.modules[__name__], *args, **keywds)
137 138 if __name__ == "__main__": 139 if __debug__: 140 _test() 141 main() 142