Package Bio :: Package SearchIO :: Package ExonerateIO :: Module exonerate_cigar
[hide private]
[frames] | no frames]

Source Code for Module Bio.SearchIO.ExonerateIO.exonerate_cigar

  1  # Copyright 2012 by Wibowo Arindrarto.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Bio.SearchIO parser for Exonerate cigar output format.""" 
  7   
  8  import re 
  9   
 10  from Bio._py3k import _as_bytes, _bytes_to_string 
 11   
 12  from ._base import _BaseExonerateParser, _STRAND_MAP 
 13  from .exonerate_vulgar import ExonerateVulgarIndexer 
 14   
 15   
 16  __all__ = ['ExonerateCigarParser', 'ExonerateCigarIndexer'] 
 17   
 18   
 19  # precompile regex 
 20  _RE_CIGAR = re.compile(r"""^cigar:\s+ 
 21          (\S+)\s+(\d+)\s+(\d+)\s+([\+-\.])\s+  # query: ID, start, end, strand 
 22          (\S+)\s+(\d+)\s+(\d+)\s+([\+-\.])\s+  # hit: ID, start, end, strand 
 23          (\d+)(\s+.*)$                         # score, vulgar components 
 24          """, re.VERBOSE) 
 25   
 26   
27 -class ExonerateCigarParser(_BaseExonerateParser):
28 29 """Parser for Exonerate cigar strings.""" 30 31 _ALN_MARK = 'cigar' 32
33 - def parse_alignment_block(self, header):
34 qresult = header['qresult'] 35 hit = header['hit'] 36 hsp = header['hsp'] 37 self.read_until(lambda line: line.startswith('cigar')) 38 cigars = re.search(_RE_CIGAR, self.line) 39 # if the file has c4 alignments 40 # check if cigar values match our previously parsed header values 41 if self.has_c4_alignment: 42 assert qresult['id'] == cigars.group(1) 43 assert hsp['query_start'] == cigars.group(2) 44 assert hsp['query_end'] == cigars.group(3) 45 assert hsp['query_strand'] == cigars.group(4) 46 assert hit['id'] == cigars.group(5) 47 assert hsp['hit_start'] == cigars.group(6) 48 assert hsp['hit_end'] == cigars.group(7) 49 assert hsp['hit_strand'] == cigars.group(8) 50 assert hsp['score'] == cigars.group(9) 51 else: 52 qresult['id'] = cigars.group(1) 53 hsp['query_start'] = cigars.group(2) 54 hsp['query_end'] = cigars.group(3) 55 hsp['query_strand'] = cigars.group(4) 56 hit['id'] = cigars.group(5) 57 hsp['hit_start'] = cigars.group(6) 58 hsp['hit_end'] = cigars.group(7) 59 hsp['hit_strand'] = cigars.group(8) 60 hsp['score'] = cigars.group(9) 61 62 # adjust strands 63 hsp['query_strand'] = _STRAND_MAP[hsp['query_strand']] 64 hsp['hit_strand'] = _STRAND_MAP[hsp['hit_strand']] 65 # cast coords into ints 66 qstart = int(hsp['query_start']) 67 qend = int(hsp['query_end']) 68 hstart = int(hsp['hit_start']) 69 hend = int(hsp['hit_end']) 70 # set coords (start <= end) 71 hsp['query_start'] = min(qstart, qend) 72 hsp['query_end'] = max(qstart, qend) 73 hsp['hit_start'] = min(hstart, hend) 74 hsp['hit_end'] = max(hstart, hend) 75 # cast score into int 76 hsp['score'] = int(hsp['score']) 77 # store cigar components 78 hsp['cigar_comp'] = cigars.group(10) 79 # HACK: since we can't really figure out exactly when a 80 # HSP starts or ends, we set the entire alignment as one HSP 81 hsp['query_ranges'] = [(hsp['query_start'], hsp['query_end'])] 82 hsp['hit_ranges'] = [(hsp['hit_start'], hsp['hit_end'])] 83 84 return {'qresult': qresult, 'hit': hit, 'hsp': hsp}
85 86
87 -class ExonerateCigarIndexer(ExonerateVulgarIndexer):
88 89 """Indexer class for exonerate cigar lines.""" 90 91 _parser = ExonerateCigarParser 92 _query_mark = _as_bytes('cigar') 93
94 - def get_qresult_id(self, pos):
95 """Returns the query ID of the nearest cigar line.""" 96 handle = self._handle 97 handle.seek(pos) 98 # get line, check if it's a vulgar line, and get query ID 99 line = handle.readline() 100 assert line.startswith(self._query_mark), line 101 id = re.search(_RE_CIGAR, _bytes_to_string(line)) 102 return id.group(1)
103 104 105 # if not used as a module, run the doctest 106 if __name__ == "__main__": 107 from Bio._utils import run_doctest 108 run_doctest() 109