1
2
3
4
5
6 """Bio.SearchIO object to model a single database hit."""
7
8 from itertools import chain
9
10 from Bio._utils import getattr_str, trim_str
11 from Bio.SearchIO._utils import allitems, optionalcascade
12
13 from _base import _BaseSearchObject
14 from hsp import HSP
15
16
17 -class Hit(_BaseSearchObject):
18
19 """Class representing a single database hit of a search result.
20
21 Hit objects are the second-level container in the SearchIO module. They
22 are the objects contained within a QueryResult (see QueryResult). They
23 themselves are container for HSP objects and will contain at least one
24 HSP.
25
26 To have a quick look at a Hit and its contents, invoke `print` on it:
27
28 >>> from Bio import SearchIO
29 >>> qresult = SearchIO.parse('Blast/mirna.xml', 'blast-xml').next()
30 >>> hit = qresult[3]
31 >>> print hit
32 Query: 33211
33 mir_1
34 Hit: gi|301171322|ref|NR_035857.1| (86)
35 Pan troglodytes microRNA mir-520c (MIR520C), microRNA
36 HSPs: ---- -------- --------- ------ --------------- ---------------------
37 # E-value Bit score Span Query range Hit range
38 ---- -------- --------- ------ --------------- ---------------------
39 0 8.9e-20 100.47 60 [1:61] [13:73]
40 1 3.3e-06 55.39 60 [0:60] [13:73]
41
42 You can invoke `len` on a Hit object to see how many HSP objects it contains:
43
44 >>> len(hit)
45 2
46
47 Hit objects behave very similar to Python lists. You can retrieve the HSP
48 object inside a Hit using the HSP's integer index. Hit objects can also be
49 sliced, which will return a new Hit objects containing only the sliced HSPs:
50
51 # HSP items inside the Hit can be retrieved using its integer index
52 >>> hit[0]
53 HSP(hit_id='gi|301171322|ref|NR_035857.1|', query_id='33211', 1 fragments)
54
55 # slicing returns a new Hit
56 >>> hit
57 Hit(id='gi|301171322|ref|NR_035857.1|', query_id='33211', 2 hsps)
58 >>> hit[:1]
59 Hit(id='gi|301171322|ref|NR_035857.1|', query_id='33211', 1 hsps)
60 >>> print hit[1:]
61 Query: 33211
62 mir_1
63 Hit: gi|301171322|ref|NR_035857.1| (86)
64 Pan troglodytes microRNA mir-520c (MIR520C), microRNA
65 HSPs: ---- -------- --------- ------ --------------- ---------------------
66 # E-value Bit score Span Query range Hit range
67 ---- -------- --------- ------ --------------- ---------------------
68 0 3.3e-06 55.39 60 [0:60] [13:73]
69
70 Hit objects provide `filter` and `map` methods, which are analogous to
71 Python's built-in `filter` and `map` except that they return a new Hit
72 object instead of a list.
73
74 Here is an example of using `filter` to select for HSPs whose e-value is
75 less than 1e-10:
76
77 >>> evalue_filter = lambda hsp: hsp.evalue < 1e-10
78 >>> filtered_hit = hit.filter(evalue_filter)
79 >>> len(hit)
80 2
81 >>> len(filtered_hit)
82 1
83 >>> print filtered_hit
84 Query: 33211
85 mir_1
86 Hit: gi|301171322|ref|NR_035857.1| (86)
87 Pan troglodytes microRNA mir-520c (MIR520C), microRNA
88 HSPs: ---- -------- --------- ------ --------------- ---------------------
89 # E-value Bit score Span Query range Hit range
90 ---- -------- --------- ------ --------------- ---------------------
91 0 8.9e-20 100.47 60 [1:61] [13:73]
92
93 There are also other methods which are counterparts of Python lists' methods
94 with the same names: `append`, `index`, `pop`, and `sort`. Consult their
95 respective documentations for more details and examples of their usage.
96
97 """
98
99
100
101 _NON_STICKY_ATTRS = ('_items', )
102
104 """Initializes a Hit object.
105
106 Arguments:
107 hsps -- List containing HSP objects.
108
109 Hit objects must be initialized with a list containing at least one HSP
110 object. If multiple HSP objects are used for initialization, they must
111 all have the same `query_id`, `query_description`, `hit_id`, and
112 `hit_description` properties.
113
114 """
115 for attr in ('query_id', 'query_description', 'hit_id',
116 'hit_description'):
117
118
119
120
121 if len(set([getattr(hsp, attr) for hsp in hsps])) > 1:
122 raise ValueError("Hit object can not contain HSPs with "
123 "more than one %s." % attr)
124
125 self._items = []
126 for hsp in hsps:
127
128 self._validate_hsp(hsp)
129
130 self.append(hsp)
131
133 return "Hit(id=%r, query_id=%r, %r hsps)" % (self.id, self.query_id,
134 len(self))
135
137 return iter(self.hsps)
138
140 return len(self.hsps)
141
143 return bool(self.hsps)
144
146 return hsp in self._items
147
149 lines = []
150
151
152 qid_line = 'Query: %s' % self.query_id
153 if self.query_description:
154 qid_line += trim_str('\n %s' %
155 self.query_description, 80, '...')
156 lines.append(qid_line)
157
158
159 hid_line = ' Hit: %s' % self.id
160 if hasattr(self, 'seq_len'):
161 hid_line += ' (%i)' % self.seq_len
162 if self.description:
163 hid_line += trim_str('\n %s' % self.description,
164 80, '...')
165 lines.append(hid_line)
166
167
168 if not self.hsps:
169 lines.append(' HSPs: ?')
170 else:
171 lines.append(' HSPs: %s %s %s %s %s %s' %
172 ('-'*4, '-'*8, '-'*9, '-'*6, '-'*15, '-'*21))
173 pattern = '%11s %8s %9s %6s %15s %21s'
174 lines.append(pattern % ('#', 'E-value', 'Bit score', 'Span',
175 'Query range', 'Hit range'))
176 lines.append(pattern % ('-'*4, '-'*8, '-'*9, '-'*6, '-'*15, '-'*21))
177 for idx, hsp in enumerate(self.hsps):
178
179 evalue = getattr_str(hsp, 'evalue', fmt='%.2g')
180
181 bitscore = getattr_str(hsp, 'bitscore', fmt='%.2f')
182
183 aln_span = getattr_str(hsp, 'aln_span')
184
185 query_start = getattr_str(hsp, 'query_start')
186 query_end = getattr_str(hsp, 'query_end')
187 query_range = '[%s:%s]' % (query_start, query_end)
188
189 query_range = trim_str(query_range, 15, '~]')
190
191 hit_start = getattr_str(hsp, 'hit_start')
192 hit_end = getattr_str(hsp, 'hit_end')
193 hit_range = '[%s:%s]' % (hit_start, hit_end)
194 hit_range = trim_str(hit_range, 21, '~]')
195
196 lines.append(pattern % (str(idx), evalue, bitscore, aln_span,
197 query_range, hit_range))
198
199 return '\n'.join(lines)
200
202
203 if isinstance(idx, slice):
204 obj = self.__class__(self.hsps[idx])
205 self._transfer_attrs(obj)
206 return obj
207 return self._items[idx]
208
218
221
222
224 """Validates an HSP object.
225
226 Valid HSP objects have the same hit_id as the Hit object ID and the
227 same query_id as the Hit object's query_id.
228
229 """
230 if not isinstance(hsp, HSP):
231 raise TypeError("Hit objects can only contain HSP objects.")
232
233 if self._items:
234 if hsp.hit_id != self.id:
235 raise ValueError("Expected HSP with hit ID %r, "
236 "found %r instead." % (self.id, hsp.hit_id))
237 if hsp.query_id != self.query_id:
238 raise ValueError("Expected HSP with query ID %r, "
239 "found %r instead." % (self.query_id, hsp.query_id))
240
241
242 description = optionalcascade('hit_description', """Hit description""")
243 query_description = optionalcascade('query_description',
244 """Description of the query that produced the hit""")
245 id = optionalcascade('hit_id', """Hit ID string.""")
246 query_id = optionalcascade('query_id',
247 """ID string of the query that produced the hit""")
248
249 hsps = allitems(doc="""HSP objects contained in the Hit""")
250
251 @property
253 """HSPFragment objects contained in the Hit"""
254 return [frag for frag in chain(*self._items)]
255
256
258 """Adds a HSP object to the end of Hit.
259
260 Parameters
261 hsp -- HSP object to append.
262
263 Any HSP object appended must have the same `hit_id` property as the
264 Hit object's `id` property and the same `query_id` property as the
265 Hit object's `query_id` property.
266
267 """
268 self._validate_hsp(hsp)
269 self._items.append(hsp)
270
272 """Creates a new Hit object whose HSP objects pass the filter
273 function.
274
275 Arguments:
276 func -- Callback function that accepts a HSP object as its parameter,
277 does a boolean check, and returns True or False.
278
279 `filter` is analogous to Python's built-in `filter` function, except
280 that instead of returning a list it returns a `Hit` object. Here is an
281 example of using `filter` to select for HSPs having bitscores bigger
282 than 60:
283
284 >>> from Bio import SearchIO
285 >>> qresult = SearchIO.parse('Blast/mirna.xml', 'blast-xml').next()
286 >>> hit = qresult[3]
287 >>> evalue_filter = lambda hsp: hsp.bitscore > 60
288 >>> filtered_hit = hit.filter(evalue_filter)
289 >>> len(hit)
290 2
291 >>> len(filtered_hit)
292 1
293 >>> print filtered_hit
294 Query: 33211
295 mir_1
296 Hit: gi|301171322|ref|NR_035857.1| (86)
297 Pan troglodytes microRNA mir-520c (MIR520C), microRNA
298 HSPs: ---- -------- --------- ------ --------------- ---------------------
299 # E-value Bit score Span Query range Hit range
300 ---- -------- --------- ------ --------------- ---------------------
301 0 8.9e-20 100.47 60 [1:61] [13:73]
302
303 """
304 hsps = filter(func, self.hsps)
305 if hsps:
306 obj = self.__class__(hsps)
307 self._transfer_attrs(obj)
308 return obj
309
311 """Returns the index of a given HSP object, zero-based.
312
313 Arguments:
314 hsp -- HSP object to be looked up.
315
316 """
317 return self._items.index(hsp)
318
319 - def map(self, func=None):
320 """Creates a new Hit object, mapping the given function to its HSPs.
321
322 Arguments:
323 func -- Callback function that accepts a HSP object as its parameter and
324 also returns a HSP object.
325
326 `map` is analogous to Python's built-in `map` function. It is applied to
327 all HSPs contained in the Hit object and returns a new Hit object.
328
329 """
330 if func is not None:
331 hsps = map(func, self.hsps[:])
332 else:
333 hsps = self.hsps[:]
334 if hsps:
335 obj = self.__class__(hsps)
336 self._transfer_attrs(obj)
337 return obj
338
339 - def pop(self, index=-1):
340 """Removes and returns the HSP object at the specified index.
341
342 Arguments:
343 index -- Integer denoting the index of the HSP object to remove.
344
345 """
346 return self._items.pop(index)
347
348 - def sort(self, key=None, reverse=False, in_place=True):
349 """Sorts the HSP objects.
350
351 Arguments:
352 key -- Function used to sort the HSP objects.
353 reverse -- Boolean, whether to reverse the sorting or not.
354 in_place -- Boolean, whether to perform sorting in place (in the same
355 object) or not (creating a new object).
356
357 `sort` defaults to sorting in-place, to mimick Python's `list.sort`
358 method. If you set the `in_place` argument to False, it will treat
359 return a new, sorted Hit object and keep the initial one unsorted
360
361 """
362 if in_place:
363 self._items.sort(key=key, reverse=reverse)
364 else:
365 hsps = self.hsps[:]
366 hsps.sort(key=key, reverse=reverse)
367 obj = self.__class__(hsps)
368 self._transfer_attrs(obj)
369 return obj
370
371
372
373 if __name__ == "__main__":
374 from Bio._utils import run_doctest
375 run_doctest()
376