Package Bio :: Package Wise :: Module dnal
[hide private]
[frames] | no frames]

Source Code for Module Bio.Wise.dnal

  1  #!/usr/bin/env python 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  # 
  6  # Bio.Wise contains modules for running and processing the output of 
  7  # some of the models in the Wise2 package by Ewan Birney available from: 
  8  # ftp://ftp.ebi.ac.uk/pub/software/unix/wise2/ 
  9  # http://www.ebi.ac.uk/Wise2/ 
 10  # 
 11  # Bio.Wise.psw is for protein Smith-Waterman alignments 
 12  # Bio.Wise.dnal is for Smith-Waterman DNA alignments 
 13   
 14  __version__ = "$Revision: 1.12 $" 
 15   
 16  import commands 
 17  import itertools 
 18  import re 
 19   
 20  from Bio import Wise 
 21   
 22  _SCORE_MATCH = 4 
 23  _SCORE_MISMATCH = -1 
 24  _SCORE_GAP_START = -5 
 25  _SCORE_GAP_EXTENSION = -1 
 26   
 27  _CMDLINE_DNAL = ["dnal", "-alb", "-nopretty"] 
 28   
 29   
30 -def _build_dnal_cmdline(match, mismatch, gap, extension):
31 res = _CMDLINE_DNAL[:] 32 res.extend(["-match", str(match)]) 33 res.extend(["-mis", str(mismatch)]) 34 res.extend(["-gap", str(-gap)]) # negative: convert score to penalty 35 res.extend(["-ext", str(-extension)]) # negative: convert score to penalty 36 37 return res
38 39 _CMDLINE_FGREP_COUNT = "fgrep -c '%s' %s" 40 41
42 -def _fgrep_count(pattern, file):
43 return int(commands.getoutput(_CMDLINE_FGREP_COUNT % (pattern, file)))
44 45 _re_alb_line2coords = re.compile(r"^\[([^:]+):[^\[]+\[([^:]+):") 46 47
48 -def _alb_line2coords(line):
49 return tuple([int(coord)+1 # one-based -> zero-based 50 for coord 51 in _re_alb_line2coords.match(line).groups()])
52 53
54 -def _get_coords(filename):
55 alb = file(filename) 56 57 start_line = None 58 end_line = None 59 60 for line in alb: 61 if line.startswith("["): 62 if not start_line: 63 start_line = line # rstrip not needed 64 else: 65 end_line = line 66 67 if end_line is None: # sequence is too short 68 return [(0, 0), (0, 0)] 69 70 return zip(*map(_alb_line2coords, [start_line, end_line])) # returns [(start0, end0), (start1, end1)]
71 72
73 -def _any(seq, pred=bool):
74 "Returns True if pred(x) is True at least one element in the iterable" 75 return True in itertools.imap(pred, seq)
76 77
78 -class Statistics(object):
79 """ 80 Calculate statistics from an ALB report 81 """
82 - def __init__(self, filename, match, mismatch, gap, extension):
83 self.matches = _fgrep_count('"SEQUENCE" %s' % match, filename) 84 self.mismatches = _fgrep_count('"SEQUENCE" %s' % mismatch, filename) 85 self.gaps = _fgrep_count('"INSERT" %s' % gap, filename) 86 87 if gap == extension: 88 self.extensions = 0 89 else: 90 self.extensions = _fgrep_count('"INSERT" %s' % extension, filename) 91 92 self.score = (match*self.matches + 93 mismatch*self.mismatches + 94 gap*self.gaps + 95 extension*self.extensions) 96 97 if _any([self.matches, self.mismatches, self.gaps, self.extensions]): 98 self.coords = _get_coords(filename) 99 else: 100 self.coords = [(0, 0), (0,0)]
101
102 - def identity_fraction(self):
103 return self.matches/(self.matches+self.mismatches)
104 105 header = "identity_fraction\tmatches\tmismatches\tgaps\textensions" 106
107 - def __str__(self):
108 return "\t".join([str(x) for x in (self.identity_fraction(), self.matches, self.mismatches, self.gaps, self.extensions)])
109 110
111 -def align(pair, match=_SCORE_MATCH, mismatch=_SCORE_MISMATCH, gap=_SCORE_GAP_START, extension=_SCORE_GAP_EXTENSION, **keywds):
112 cmdline = _build_dnal_cmdline(match, mismatch, gap, extension) 113 temp_file = Wise.align(cmdline, pair, **keywds) 114 try: 115 return Statistics(temp_file.name, match, mismatch, gap, extension) 116 except AttributeError: 117 try: 118 keywds['dry_run'] 119 return None 120 except KeyError: 121 raise
122 123
124 -def main():
125 import sys 126 stats = align(sys.argv[1:3]) 127 print "\n".join(["%s: %s" % (attr, getattr(stats, attr)) 128 for attr in 129 ("matches", "mismatches", "gaps", "extensions")]) 130 print "identity_fraction: %s" % stats.identity_fraction() 131 print "coords: %s" % stats.coords
132 133
134 -def _test(*args, **keywds):
135 import doctest 136 import sys 137 doctest.testmod(sys.modules[__name__], *args, **keywds)
138 139 if __name__ == "__main__": 140 if __debug__: 141 _test() 142 main() 143