| Trees | Indices | Help |
|
|---|
|
|
1 # Copyright 2012 by Wibowo Arindrarto. All rights reserved.
2 # This code is part of the Biopython distribution and governed by its
3 # license. Please see the LICENSE file that should have been included
4 # as part of this package.
5
6 """Bio.SearchIO parser for BLAST+ tab output format, with or without comments."""
7
8 import re
9
10 from Bio._py3k import _as_bytes, _bytes_to_string
11 from Bio.SearchIO._index import SearchIndexer
12 from Bio.SearchIO._model import QueryResult, Hit, HSP, HSPFragment
13
14
15 __all__ = ['BlastTabIndexer', 'BlastTabParser', 'BlastTabWriter']
16
17
18 # longname-shortname map
19 # maps the column names shown in a commented output to its short name
20 # (the one used in the command line)
21 _LONG_SHORT_MAP = {
22 'query id': 'qseqid',
23 'query acc.': 'qacc',
24 'query acc.ver': 'qaccver',
25 'query length': 'qlen',
26 'subject id': 'sseqid',
27 'subject acc.': 'sacc',
28 'subject acc.ver': 'saccver',
29 'subject length': 'slen',
30 'alignment length': 'length',
31 'bit score': 'bitscore',
32 'score': 'score',
33 'evalue': 'evalue',
34 'identical': 'nident',
35 '% identity': 'pident',
36 'positives': 'positive',
37 '% positives': 'ppos',
38 'mismatches': 'mismatch',
39 'gaps': 'gaps',
40 'q. start': 'qstart',
41 'q. end': 'qend',
42 's. start': 'sstart',
43 's. end': 'send',
44 'query frame': 'qframe',
45 'sbjct frame': 'sframe',
46 'query/sbjct frames': 'frames',
47 'query seq': 'qseq',
48 'subject seq': 'sseq',
49 'gap opens': 'gapopen',
50 'query gi': 'qgi',
51 'subject ids': 'sallseqid',
52 'subject gi': 'sgi',
53 'subject gis': 'sallgi',
54 'BTOP': 'btop',
55 }
56
57 # column to class attribute map
58 _COLUMN_QRESULT = {
59 'qseqid': ('id', str),
60 'qacc': ('accession', str),
61 'qaccver': ('accession_version', str),
62 'qlen': ('seq_len', int),
63 'qgi': ('gi', str),
64 }
65 _COLUMN_HIT = {
66 'sseqid': ('id', str),
67 'sallseqid': ('id_all', str),
68 'sacc': ('accession', str),
69 'saccver': ('accession_version', str),
70 'sgi': ('gi', str),
71 'sallgi': ('gi_all', str),
72 'slen': ('seq_len', int),
73 }
74 _COLUMN_HSP = {
75 'bitscore': ('bitscore', float),
76 'score': ('bitscore_raw', int),
77 'evalue': ('evalue', float),
78 'nident': ('ident_num', int),
79 'pident': ('ident_pct', float),
80 'positive': ('pos_num', int),
81 'ppos': ('pos_pct', float),
82 'mismatch': ('mismatch_num', int),
83 'gaps': ('gap_num', int),
84 'gapopen': ('gapopen_num', int),
85 'btop': ('btop', str),
86 }
87 _COLUMN_FRAG = {
88 'length': ('aln_span', int),
89 'qstart': ('query_start', int),
90 'qend': ('query_end', int),
91 'sstart': ('hit_start', int),
92 'send': ('hit_end', int),
93 'qframe': ('query_frame', int),
94 'sframe': ('hit_frame', int),
95 'frames': ('frames', str),
96 'qseq': ('query', str),
97 'sseq': ('hit', str),
98 }
99 _SUPPORTED_FIELDS = set(_COLUMN_QRESULT.keys() + _COLUMN_HIT.keys() +
100 _COLUMN_HSP.keys() + _COLUMN_FRAG.keys())
101
102 # column order in the non-commented tabular output variant
103 # values must be keys inside the column-attribute maps above
104 _DEFAULT_FIELDS = ['qseqid', 'sseqid', 'pident', 'length', 'mismatch',
105 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'evalue', 'bitscore']
106 # one field from each of the following sets must exist in order for the
107 # parser to work
108 _MIN_QUERY_FIELDS = set(['qseqid', 'qacc', 'qaccver'])
109 _MIN_HIT_FIELDS = set(['sseqid', 'sacc', 'saccver'])
110
111 # simple function to create BLAST HSP attributes that may be computed if
112 # other certain attributes are present
113 # This was previously implemented in the HSP objects in the old model
114
115 _RE_GAPOPEN = re.compile(r'\w-')
116
117
119 """Returns the number of gap openings in the given HSP."""
120 gapopen = 0
121 for seq_type in ('query', 'hit'):
122 seq = str(getattr(hsp, seq_type).seq)
123 gapopen += len(re.findall(_RE_GAPOPEN, seq))
124 return gapopen
125
126
128 """Calculates the given HSP attribute, for writing."""
129 if attr == 'aln_span':
130 # aln_span is number of identical matches + mismatches + gaps
131 func = lambda hsp: hsp.ident_num + hsp.mismatch_num + hsp.gap_num
132 # ident and gap will require the num values to be computed first
133 elif attr.startswith('ident'):
134 func = lambda hsp: hsp.aln_span - hsp.mismatch_num - hsp.gap_num
135 elif attr.startswith('gap'):
136 func = lambda hsp: hsp.aln_span - hsp.ident_num - hsp.mismatch_num
137 elif attr == 'mismatch_num':
138 func = lambda hsp: hsp.aln_span - hsp.ident_num - hsp.gap_num
139 elif attr == 'gapopen_num':
140 if not hasattr(hsp, 'query') or not hasattr(hsp, 'hit'):
141 # mock function so that the except clause below is triggered
142 # as both the query and hit are required to compute gapopen
143 def mock(hsp):
144 raise AttributeError
145 func = mock
146 else:
147 func = _compute_gapopen_num
148
149 # set the num values
150 # requires the endswith check, since we only want to set 'num' or 'span'
151 # attributes here
152 if not hasattr(hsp, attr) and not attr.endswith('_pct'):
153 value = func(hsp)
154 setattr(hsp, attr, value)
155
156 # if the attr is a percent value, calculate it
157 if attr == 'ident_pct':
158 func2 = lambda hsp: hsp.ident_num / float(hsp.aln_span) * 100
159 elif attr == 'pos_pct':
160 func = lambda hsp: hsp.pos_num / float(hsp.aln_span) * 100
161 elif attr == 'gap_pct':
162 func2 = lambda hsp: hsp.gap_num / float(hsp.aln_span) * 100
163 else:
164 func2 = None
165
166 # set the pct values
167 if func2 is not None:
168 value = func2(hsp)
169 setattr(hsp, attr, value)
170
171
173
174 """Parser for the BLAST tabular format."""
175
177 self.handle = handle
178 self.has_comments = comments
179 self.fields = self._prep_fields(fields)
180 self.line = self.handle.readline().strip()
181
183 # stop iteration if file has no lines
184 if not self.line:
185 raise StopIteration
186 # determine which iterator to use
187 elif self.has_comments:
188 iterfunc = self._parse_commented_qresult
189 else:
190 iterfunc = self._parse_qresult
191
192 for qresult in iterfunc():
193 yield qresult
194
196 """Validates and formats the given fields for use by the parser."""
197 # cast into list if fields is a space-separated string
198 if isinstance(fields, basestring):
199 fields = fields.strip().split(' ')
200 # blast allows 'std' as a proxy for the standard default lists
201 # we want to transform 'std' to its proper column names
202 if 'std' in fields:
203 idx = fields.index('std')
204 fields = fields[:idx] + _DEFAULT_FIELDS + fields[idx+1:]
205 # if set(fields) has a null intersection with minimum required
206 # fields for hit and query, raise an exception
207 if not set(fields).intersection(_MIN_QUERY_FIELDS) or \
208 not set(fields).intersection(_MIN_HIT_FIELDS):
209 raise ValueError("Required query and/or hit ID field not found.")
210
211 return fields
212
214 """Iterator returning `QueryResult` objects from a commented file."""
215 while True:
216 comments = self._parse_comments()
217 if comments:
218 try:
219 self.fields = comments['fields']
220 # iterator for the query results
221 qres_iter = self._parse_qresult()
222 except KeyError:
223 # no fields means the query has no results
224 assert 'fields' not in comments
225 # create an iterator returning one empty qresult
226 # if the query has no results
227 qres_iter = iter([QueryResult('')])
228
229 for qresult in qres_iter:
230 for key, value in comments.items():
231 setattr(qresult, key, value)
232 yield qresult
233
234 else:
235 break
236
238 """Returns a dictionary containing tab file comments."""
239 comments = {}
240 while True:
241 # parse program and version
242 # example: # BLASTX 2.2.26+
243 if 'BLAST' in self.line and 'processed' not in self.line:
244 program_line = self.line[len(' #'):].split(' ')
245 comments['program'] = program_line[0].lower()
246 comments['version'] = program_line[1]
247 # parse query id and description (if available)
248 # example: # Query: gi|356995852 Mus musculus POU domain
249 elif 'Query' in self.line:
250 query_line = self.line[len('# Query: '):].split(' ', 1)
251 comments['id'] = query_line[0]
252 if len(query_line) == 2:
253 comments['description'] = query_line[1]
254 # parse target database
255 # example: # Database: db/minirefseq_protein
256 elif 'Database' in self.line:
257 comments['target'] = self.line[len('# Database: '):]
258 # parse RID (from remote searches)
259 elif 'RID' in self.line:
260 comments['rid'] = self.line[len('# RID: '):]
261 # parse column order, required for parsing the result lines
262 # example: # Fields: query id, query gi, query acc., query length
263 elif 'Fields' in self.line:
264 comments['fields'] = self._parse_fields_line()
265 # if the line has these strings, it's either the end of a comment
266 # or the end of a file, so we return all the comments we've parsed
267 elif ' hits found' in self.line or 'processed' in self.line:
268 self.line = self.handle.readline().strip()
269 return comments
270
271 self.line = self.handle.readline()
272
273 if not self.line:
274 return comments
275 else:
276 self.line = self.line.strip()
277
279 """Returns a list of column short names from the 'Fields'
280 comment line."""
281 raw_field_str = self.line[len('# Fields: '):]
282 long_fields = raw_field_str.split(', ')
283 fields = [_LONG_SHORT_MAP[long_name] for long_name in long_fields]
284 return self._prep_fields(fields)
285
287 """Returns a dictionary of parsed row values."""
288 fields = self.fields
289 columns = self.line.strip().split('\t')
290 assert len(fields) == len(columns), "Expected %i columns, found: " \
291 "%i" % (len(fields), len(columns))
292
293 qresult, hit, hsp, frag = {}, {}, {}, {}
294 for idx, value in enumerate(columns):
295 sname = fields[idx]
296 # flag to check if any of the _COLUMNs contain sname
297 in_mapping = False
298 # iterate over each dict, mapping pair to determine
299 # attribute name and value of each column
300 for parsed_dict, mapping in (
301 (qresult, _COLUMN_QRESULT),
302 (hit, _COLUMN_HIT),
303 (hsp, _COLUMN_HSP),
304 (frag, _COLUMN_FRAG)):
305 # process parsed value according to mapping
306 if sname in mapping:
307 attr_name, caster = mapping[sname]
308 if caster is not str:
309 value = caster(value)
310 parsed_dict[attr_name] = value
311 in_mapping = True
312 # make sure that any unhandled field is not supported
313 if not in_mapping:
314 assert sname not in _SUPPORTED_FIELDS
315
316 return {'qresult': qresult, 'hit': hit, 'hsp': hsp, 'frag': frag}
317
319 """Returns the value used for a QueryResult or Hit ID from a parsed row."""
320 # use 'id', with 'accession' and 'accession_version' fallbacks
321 # one of these must have a value since we've checked whether
322 # they exist or not when parsing the comments
323 id_cache = parsed.get('id')
324 if id_cache is None:
325 id_cache = parsed.get('accession')
326 if id_cache is None:
327 id_cache = parsed.get('accession_version')
328
329 return id_cache
330
332 """Generator function that returns QueryResult objects."""
333 # state values, used to determine what to do with each line
334 state_EOF = 0
335 state_QRES_NEW = 1
336 state_QRES_SAME = 3
337 state_HIT_NEW = 2
338 state_HIT_SAME = 4
339 # dummies for initial states
340 qres_state = None
341 hit_state = None
342 file_state = None
343 # dummies for initial id caches
344 prev_qid = None
345 prev_hid = None
346 # dummies for initial parsed value containers
347 cur, prev = None, None
348 hit_list, hsp_list = [], []
349
350 while True:
351 # store previous line's parsed values if we've past the first line
352 if cur is not None:
353 prev = cur
354 prev_qid = cur_qid
355 prev_hid = cur_hid
356 # only parse the line if it's not EOF or not a comment line
357 if self.line and not self.line.startswith('#'):
358 cur = self._parse_result_row()
359 cur_qid = self._get_id(cur['qresult'])
360 cur_hid = self._get_id(cur['hit'])
361 else:
362 file_state = state_EOF
363 # mock values for cur_qid and cur_hid since the line is empty
364 cur_qid, cur_hid = None, None
365
366 # get the state of hit and qresult
367 if prev_qid != cur_qid:
368 qres_state = state_QRES_NEW
369 else:
370 qres_state = state_QRES_SAME
371 # new hits are hits with different id or hits in a new qresult
372 if prev_hid != cur_hid or qres_state == state_QRES_NEW:
373 hit_state = state_HIT_NEW
374 else:
375 hit_state = state_HIT_SAME
376
377 # we're creating objects for the previously parsed line(s),
378 # so nothing is done in the first parsed line (prev == None)
379 if prev is not None:
380 # every line is essentially an HSP with one fragment, so we
381 # create both of these for every line
382 frag = HSPFragment(prev_hid, prev_qid)
383 for attr, value in prev['frag'].items():
384 # adjust coordinates to Python range
385 # NOTE: this requires both start and end coords to be
386 # present, otherwise a KeyError will be raised.
387 # Without this limitation, we might misleadingly set the
388 # start / end coords
389 for seq_type in ('query', 'hit'):
390 if attr == seq_type + '_start':
391 value = min(value,
392 prev['frag'][seq_type + '_end']) - 1
393 elif attr == seq_type + '_end':
394 value = max(value,
395 prev['frag'][seq_type + '_start'])
396 setattr(frag, attr, value)
397 # strand and frame setattr require the full parsed values
398 # to be set first
399 for seq_type in ('hit', 'query'):
400 # try to set hit and query frame
401 frame = self._get_frag_frame(frag, seq_type,
402 prev['frag'])
403 setattr(frag, '%s_frame' % seq_type, frame)
404 # try to set hit and query strand
405 strand = self._get_frag_strand(frag, seq_type,
406 prev['frag'])
407 setattr(frag, '%s_strand' % seq_type, strand)
408
409 hsp = HSP([frag])
410 for attr, value in prev['hsp'].items():
411 setattr(hsp, attr, value)
412 hsp_list.append(hsp)
413
414 # create hit and append to temp hit container if hit_state
415 # says we're not at the same hit or at a new query
416 if hit_state == state_HIT_NEW:
417 hit = Hit(hsp_list)
418 for attr, value in prev['hit'].items():
419 setattr(hit, attr, value)
420 hit_list.append(hit)
421 hsp_list = []
422 # create qresult and yield if we're at a new qresult or EOF
423 if qres_state == state_QRES_NEW or file_state == state_EOF:
424 qresult = QueryResult(prev_qid, hits=hit_list)
425 for attr, value in prev['qresult'].items():
426 setattr(qresult, attr, value)
427 yield qresult
428 # if current line is EOF, break
429 if file_state == state_EOF:
430 break
431 hit_list = []
432
433 self.line = self.handle.readline().strip()
434
436 """Returns `HSPFragment` frame given the object, its sequence type,
437 and its parsed dictionary values."""
438 assert seq_type in ('query', 'hit')
439 frame = getattr(frag, '%s_frame' % seq_type, None)
440 if frame is not None:
441 return frame
442 else:
443 if 'frames' in parsedict:
444 # frames is 'x1/x2' string, x1 is query frame, x2 is hit frame
445 idx = 0 if seq_type == 'query' else 1
446 return int(parsedict['frames'].split('/')[idx])
447 # else implicit None return
448
450 """Returns `HSPFragment` strand given the object, its sequence type,
451 and its parsed dictionary values."""
452 # NOTE: this will never set the strands as 0 for protein
453 # queries / hits, since we can't detect the blast flavors
454 # from the columns alone.
455 assert seq_type in ('query', 'hit')
456 strand = getattr(frag, '%s_strand' % seq_type, None)
457 if strand is not None:
458 return strand
459 else:
460 # using parsedict instead of the fragment object since
461 # we need the unadjusted coordinated values
462 start = parsedict.get('%s_start' % seq_type)
463 end = parsedict.get('%s_end' % seq_type)
464 if start is not None and end is not None:
465 return 1 if start <= end else -1
466 # else implicit None return
467
468
470
471 """Indexer class for BLAST+ tab output."""
472
473 _parser = BlastTabParser
474
476 SearchIndexer.__init__(self, filename, comments=comments, fields=fields)
477
478 # if the file doesn't have comments,
479 # get index of column used as the key (qseqid / qacc / qaccver)
480 if not self._kwargs['comments']:
481 if 'qseqid' in fields:
482 self._key_idx = fields.index('qseqid')
483 elif 'qacc' in fields:
484 self._key_idx = fields.index('qacc')
485 elif 'qaccver' in fields:
486 self._key_idx = fields.index('qaccver')
487 else:
488 raise ValueError("Custom fields is missing an ID column. "
489 "One of these must be present: 'qseqid', 'qacc', or 'qaccver'.")
490
492 """Iterates over the file handle; yields key, start offset, and length."""
493 handle = self._handle
494 handle.seek(0)
495
496 if not self._kwargs['comments']:
497 iterfunc = self._qresult_index
498 else:
499 iterfunc = self._qresult_index_commented
500
501 for key, offset, length in iterfunc():
502 yield _bytes_to_string(key), offset, length
503
505 """Indexer for commented BLAST tabular files."""
506 handle = self._handle
507 handle.seek(0)
508 start_offset = 0
509 # mark of a new query
510 query_mark = None
511 # mark of the query's ID
512 qid_mark = _as_bytes('# Query: ')
513 # mark of the last line
514 end_mark = _as_bytes('# BLAST processed')
515
516 while True:
517 end_offset = handle.tell()
518 line = handle.readline()
519
520 if query_mark is None:
521 query_mark = line
522 start_offset = end_offset
523 elif line.startswith(qid_mark):
524 qresult_key = line[len(qid_mark):].split()[0]
525 elif line == query_mark or line.startswith(end_mark):
526 yield qresult_key, start_offset, end_offset - start_offset
527 start_offset = end_offset
528 elif not line:
529 break
530
532 """Indexer for noncommented BLAST tabular files."""
533 handle = self._handle
534 handle.seek(0)
535 start_offset = 0
536 qresult_key = None
537 key_idx = self._key_idx
538 tab_char = _as_bytes('\t')
539
540 while True:
541 # get end offset here since we only know a qresult ends after
542 # encountering the next one
543 end_offset = handle.tell()
544 #line = handle.readline()
545 line = handle.readline()
546
547 if qresult_key is None:
548 qresult_key = line.split(tab_char)[key_idx]
549 else:
550 try:
551 curr_key = line.split(tab_char)[key_idx]
552 except IndexError:
553 curr_key = _as_bytes('')
554
555 if curr_key != qresult_key:
556 yield qresult_key, start_offset, end_offset - start_offset
557 qresult_key = curr_key
558 start_offset = end_offset
559
560 # break if we've reached EOF
561 if not line:
562 break
563
565 """Returns the raw string of a QueryResult object from the given offset."""
566 if self._kwargs['comments']:
567 getfunc = self._get_raw_qresult_commented
568 else:
569 getfunc = self._get_raw_qresult
570
571 return getfunc(offset)
572
574 """Returns the raw string of a single QueryResult from a noncommented file."""
575 handle = self._handle
576 handle.seek(offset)
577 qresult_raw = _as_bytes('')
578 tab_char = _as_bytes('\t')
579 key_idx = self._key_idx
580 qresult_key = None
581
582 while True:
583 line = handle.readline()
584 # get the key if the first line (qresult key)
585 if qresult_key is None:
586 qresult_key = line.split(tab_char)[key_idx]
587 else:
588 try:
589 curr_key = line.split(tab_char)[key_idx]
590 except IndexError:
591 curr_key = _as_bytes('')
592 # only break when qresult is finished (key is different)
593 if curr_key != qresult_key:
594 break
595 # append to the raw string as long as qresult is the same
596 qresult_raw += line
597
598 return qresult_raw
599
601 """Returns the raw string of a single QueryResult from a commented file."""
602 handle = self._handle
603 handle.seek(offset)
604 qresult_raw = _as_bytes('')
605 end_mark = _as_bytes('# BLAST processed')
606
607 # query mark is the line marking a new query
608 # something like '# TBLASTN 2.2.25+'
609 query_mark = None
610 line = handle.readline()
611 while line:
612 # since query_mark depends on the BLAST search, we need to obtain it
613 # first
614 if query_mark is None:
615 query_mark = line
616 # break when we've reached the next qresult or the search ends
617 elif line == query_mark or line.startswith(end_mark):
618 break
619
620 qresult_raw += line
621 line = handle.readline()
622
623 return qresult_raw
624
625
627
628 """Writer for blast-tab output format."""
629
634
636 """Writes to the handle, returns how many QueryResult objects are written."""
637 handle = self.handle
638 qresult_counter, hit_counter, hsp_counter, frag_counter = 0, 0, 0, 0
639
640 for qresult in qresults:
641 if self.has_comments:
642 handle.write(self._build_comments(qresult))
643 if qresult:
644 handle.write(self._build_rows(qresult))
645 if not self.has_comments:
646 qresult_counter += 1
647 hit_counter += len(qresult)
648 hsp_counter += sum([len(hit) for hit in qresult])
649 frag_counter += sum([len(hit.fragments) for hit in qresult])
650 # if it's commented and there are no hits in the qresult, we still
651 # increment the counter
652 if self.has_comments:
653 qresult_counter += 1
654
655 # commented files have a line saying how many queries were processed
656 if self.has_comments:
657 handle.write('# BLAST processed %i queries' % qresult_counter)
658
659 return qresult_counter, hit_counter, hsp_counter, frag_counter
660
662 """Returns a string containing tabular rows of the QueryResult object."""
663 coordinates = set(['qstart', 'qend', 'sstart', 'send'])
664 qresult_lines = ''
665 for hit in qresult:
666 for hsp in hit:
667 line = []
668 for field in self.fields:
669 # get the column value ~ could either be an attribute
670 # of qresult, hit, or hsp
671 if field in _COLUMN_QRESULT:
672 value = getattr(qresult, _COLUMN_QRESULT[field][0])
673 elif field in _COLUMN_HIT:
674 value = getattr(hit, _COLUMN_HIT[field][0])
675 # special case, since 'frames' can be determined from
676 # query frame and hit frame
677 elif field == 'frames':
678 value = '%i/%i' % (hsp.query_frame, hsp.hit_frame)
679 elif field in _COLUMN_HSP:
680 try:
681 value = getattr(hsp, _COLUMN_HSP[field][0])
682 except AttributeError:
683 attr = _COLUMN_HSP[field][0]
684 _augment_blast_hsp(hsp, attr)
685 value = getattr(hsp, attr)
686 elif field in _COLUMN_FRAG:
687 value = getattr(hsp, _COLUMN_FRAG[field][0])
688 else:
689 assert field not in _SUPPORTED_FIELDS
690 continue
691
692 # adjust from and to according to strand, if from and to
693 # is included in the output field
694 if field in coordinates:
695 value = self._adjust_coords(field, value, hsp)
696 # adjust output formatting
697 value = self._adjust_output(field, value)
698
699 line.append(value)
700
701 hsp_line = '\t'.join(line)
702 qresult_lines += hsp_line + '\n'
703
704 return qresult_lines
705
707 """Adjusts start and end coordinates according to strand."""
708 assert field in ('qstart', 'qend', 'sstart', 'send')
709 # determine sequence type to operate on based on field's first letter
710 seq_type = 'query' if field.startswith('q') else 'hit'
711
712 strand = getattr(hsp, '%s_strand' % seq_type, None)
713 if strand is None:
714 raise ValueError("Required attribute %r not found." %
715 ('%s_strand' % (seq_type)))
716 # switch start <--> end coordinates if strand is -1
717 if strand < 0:
718 if field.endswith('start'):
719 value = getattr(hsp, '%s_end' % seq_type)
720 elif field.endswith('end'):
721 value = getattr(hsp, '%s_start' % seq_type) + 1
722 elif field.endswith('start'):
723 # adjust start coordinate for positive strand
724 value += 1
725
726 return value
727
729 """Adjusts formatting of the given field and value to mimic native tab output."""
730
731 # evalue formatting, adapted from BLAST+ source:
732 # src/objtools/align_format/align_format_util.cpp#L668
733 if field == 'evalue':
734 if value < 1.0e-180:
735 value = '0.0'
736 elif value < 1.0e-99:
737 value = '%2.0e' % value
738 elif value < 0.0009:
739 value = '%3.0e' % value
740 elif value < 0.1:
741 value = '%4.3f' % value
742 elif value < 1.0:
743 value = '%3.2f' % value
744 elif value < 10.0:
745 value = '%2.1f' % value
746 else:
747 value = '%5.0f' % value
748
749 # pident and ppos formatting
750 elif field in ('pident', 'ppos'):
751 value = '%.2f' % value
752
753 # evalue formatting, adapted from BLAST+ source:
754 # src/objtools/align_format/align_format_util.cpp#L723
755 elif field == 'bitscore':
756 if value > 9999:
757 value = '%4.3e' % value
758 elif value > 99.9:
759 value = '%4.0d' % value
760 else:
761 value = '%4.1f' % value
762
763 # everything else
764 else:
765 value = str(value)
766
767 return value
768
770 """Returns a string of a QueryResult tabular comment."""
771 comments = []
772 # inverse mapping of the long-short name map, required
773 # for writing comments
774 inv_field_map = dict((value, key) for key, value in
775 _LONG_SHORT_MAP.items())
776
777 # try to anticipate qress without version
778 if not hasattr(qres, 'version'):
779 program_line = '# %s' % qres.program.upper()
780 else:
781 program_line = '# %s %s' % (qres.program.upper(), qres.version)
782 comments.append(program_line)
783 # description may or may not be present, so we'll do a try here
784 try:
785 comments.append('# Query: %s %s' % (qres.id, qres.description))
786 except AttributeError:
787 comments.append('# Query: %s' % qres.id)
788 # try appending RID line, if present
789 try:
790 comments.append('# RID: %s' % qres.rid)
791 except AttributeError:
792 pass
793 comments.append('# Database: %s' % qres.target)
794 # qresults without hits don't show the Fields comment
795 if qres:
796 comments.append('# Fields: %s' %
797 ', '.join([inv_field_map[field] for field in self.fields]))
798 comments.append('# %i hits found' % len(qres))
799
800 return '\n'.join(comments) + '\n'
801
802
803 # if not used as a module, run the doctest
804 if __name__ == "__main__":
805 from Bio._utils import run_doctest
806 run_doctest()
807
| Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Tue Feb 5 18:03:11 2013 | http://epydoc.sourceforge.net |