1
2
3
4
5
6 """
7 Bio.AlignIO support for "fasta-m10" output from Bill Pearson's FASTA tools.
8
9 You are expected to use this module via the Bio.AlignIO functions (or the
10 Bio.SeqIO functions if you want to work directly with the gapped sequences).
11
12 This module contains a parser for the pairwise alignments produced by Bill
13 Pearson's FASTA tools, for use from the Bio.AlignIO interface where it is
14 refered to as the "fasta-m10" file format (as we only support the machine
15 readable output format selected with the -m 10 command line option).
16
17 This module does NOT cover the generic "fasta" file format originally
18 developed as an input format to the FASTA tools. The Bio.AlignIO and
19 Bio.SeqIO both use the Bio.SeqIO.FastaIO module to deal with these files,
20 which can also be used to store a multiple sequence alignments.
21 """
22
23 from Bio.Seq import Seq
24 from Bio.SeqRecord import SeqRecord
25 from Bio.Align import MultipleSeqAlignment
26 from Bio.Alphabet import single_letter_alphabet, generic_dna, generic_protein
27 from Bio.Alphabet import Gapped
28
29
31 """Helper function for the main parsing code (PRIVATE).
32
33 To get the actual pairwise alignment sequences, we must first
34 translate the un-gapped sequence based coordinates into positions
35 in the gapped sequence (which may have a flanking region shown
36 using leading - characters). To date, I have never seen any
37 trailing flanking region shown in the m10 file, but the
38 following code should also cope with that.
39
40 Note that this code seems to work fine even when the "sq_offset"
41 entries are prsent as a result of using the -X command line option.
42 """
43 align_stripped = alignment_seq_with_flanking.strip("-")
44 display_start = int(annotation['al_display_start'])
45 if int(annotation['al_start']) <= int(annotation['al_stop']):
46 start = int(annotation['al_start']) \
47 - display_start
48 end = int(annotation['al_stop']) \
49 - display_start + 1
50 else:
51
52 start = display_start \
53 - int(annotation['al_start'])
54 end = display_start \
55 - int(annotation['al_stop']) + 1
56 end += align_stripped.count("-")
57 assert 0 <= start and start < end and end <= len(align_stripped), \
58 "Problem with sequence start/stop,\n%s[%i:%i]\n%s" \
59 % (alignment_seq_with_flanking, start, end, annotation)
60 return align_stripped[start:end]
61
62
64 """Alignment iterator for the FASTA tool's pairwise alignment output.
65
66 This is for reading the pairwise alignments output by Bill Pearson's
67 FASTA program when called with the -m 10 command line option for machine
68 readable output. For more details about the FASTA tools, see the website
69 http://fasta.bioch.virginia.edu/ and the paper:
70
71 W.R. Pearson & D.J. Lipman PNAS (1988) 85:2444-2448
72
73 This class is intended to be used via the Bio.AlignIO.parse() function
74 by specifying the format as "fasta-m10" as shown in the following code:
75
76 from Bio import AlignIO
77 handle = ...
78 for a in AlignIO.parse(handle, "fasta-m10"):
79 assert len(a) == 2, "Should be pairwise!"
80 print "Alignment length %i" % a.get_alignment_length()
81 for record in a:
82 print record.seq, record.name, record.id
83
84 Note that this is not a full blown parser for all the information
85 in the FASTA output - for example, most of the header and all of the
86 footer is ignored. Also, the alignments are not batched according to
87 the input queries.
88
89 Also note that there can be up to about 30 letters of flanking region
90 included in the raw FASTA output as contextual information. This is NOT
91 part of the alignment itself, and is not included in the resulting
92 MultipleSeqAlignment objects returned.
93 """
94 if alphabet is None:
95 alphabet = single_letter_alphabet
96
97 state_PREAMBLE = -1
98 state_NONE = 0
99 state_QUERY_HEADER = 1
100 state_ALIGN_HEADER = 2
101 state_ALIGN_QUERY = 3
102 state_ALIGN_MATCH = 4
103 state_ALIGN_CONS = 5
104
105 def build_hsp():
106 if not query_tags and not match_tags:
107 raise ValueError("No data for query %r, match %r"
108 % (query_id, match_id))
109 assert query_tags, query_tags
110 assert match_tags, match_tags
111 evalue = align_tags.get("fa_expect", None)
112 q = "?"
113 m = "?"
114 tool = global_tags.get("tool", "").upper()
115 try:
116 q = _extract_alignment_region(query_seq, query_tags)
117 if tool in ["TFASTX"] and len(match_seq) == len(q):
118 m = match_seq
119
120
121 else:
122 m = _extract_alignment_region(match_seq, match_tags)
123 assert len(q) == len(m)
124 except AssertionError, err:
125 print "Darn... amino acids vs nucleotide coordinates?"
126 print tool
127 print query_seq
128 print query_tags
129 print q, len(q)
130 print match_seq
131 print match_tags
132 print m, len(m)
133 print handle.name
134 raise err
135
136 assert alphabet is not None
137 alignment = MultipleSeqAlignment([], alphabet)
138
139
140
141 alignment._annotations = {}
142
143
144 for key, value in header_tags.iteritems():
145 alignment._annotations[key] = value
146 for key, value in align_tags.iteritems():
147 alignment._annotations[key] = value
148
149
150
151 record = SeqRecord(Seq(q, alphabet),
152 id=query_id,
153 name="query",
154 description=query_descr,
155 annotations={"original_length": int(query_tags["sq_len"])})
156
157 record._al_start = int(query_tags["al_start"])
158 record._al_stop = int(query_tags["al_stop"])
159 alignment.append(record)
160
161
162
163
164 if alphabet == single_letter_alphabet and "sq_type" in query_tags:
165 if query_tags["sq_type"] == "D":
166 record.seq.alphabet = generic_dna
167 elif query_tags["sq_type"] == "p":
168 record.seq.alphabet = generic_protein
169 if "-" in q:
170 if not hasattr(record.seq.alphabet, "gap_char"):
171 record.seq.alphabet = Gapped(record.seq.alphabet, "-")
172
173
174
175 record = SeqRecord(Seq(m, alphabet),
176 id=match_id,
177 name="match",
178 description=match_descr,
179 annotations={"original_length": int(match_tags["sq_len"])})
180
181 record._al_start = int(match_tags["al_start"])
182 record._al_stop = int(match_tags["al_stop"])
183 alignment.append(record)
184
185
186 if alphabet == single_letter_alphabet and "sq_type" in match_tags:
187 if match_tags["sq_type"] == "D":
188 record.seq.alphabet = generic_dna
189 elif match_tags["sq_type"] == "p":
190 record.seq.alphabet = generic_protein
191 if "-" in m:
192 if not hasattr(record.seq.alphabet, "gap_char"):
193 record.seq.alphabet = Gapped(record.seq.alphabet, "-")
194
195 return alignment
196
197 state = state_PREAMBLE
198 query_id = None
199 match_id = None
200 query_descr = ""
201 match_descr = ""
202 global_tags = {}
203 header_tags = {}
204 align_tags = {}
205 query_tags = {}
206 match_tags = {}
207 query_seq = ""
208 match_seq = ""
209 cons_seq = ""
210 for line in handle:
211 if ">>>" in line and not line.startswith(">>>"):
212 if query_id and match_id:
213
214
215 yield build_hsp()
216 state = state_NONE
217 query_descr = line[line.find(">>>")+3:].strip()
218 query_id = query_descr.split(None, 1)[0]
219 match_id = None
220 header_tags = {}
221 align_tags = {}
222 query_tags = {}
223 match_tags = {}
224 query_seq = ""
225 match_seq = ""
226 cons_seq = ""
227 elif line.startswith("!! No "):
228
229
230
231
232 assert state == state_NONE
233 assert not header_tags
234 assert not align_tags
235 assert not match_tags
236 assert not query_tags
237 assert match_id is None
238 assert not query_seq
239 assert not match_seq
240 assert not cons_seq
241 query_id = None
242 elif line.strip() in [">>><<<", ">>>///"]:
243
244 if query_id and match_id:
245 yield build_hsp()
246 state = state_NONE
247 query_id = None
248 match_id = None
249 header_tags = {}
250 align_tags = {}
251 query_tags = {}
252 match_tags = {}
253 query_seq = ""
254 match_seq = ""
255 cons_seq = ""
256 elif line.startswith(">>>"):
257
258 assert query_id is not None
259 assert line[3:].split(", ", 1)[0] == query_id, line
260 assert match_id is None
261 assert not header_tags
262 assert not align_tags
263 assert not query_tags
264 assert not match_tags
265 assert not match_seq
266 assert not query_seq
267 assert not cons_seq
268 state = state_QUERY_HEADER
269 elif line.startswith(">>"):
270
271 if query_id and match_id:
272 yield build_hsp()
273 align_tags = {}
274 query_tags = {}
275 match_tags = {}
276 query_seq = ""
277 match_seq = ""
278 cons_seq = ""
279 match_descr = line[2:].strip()
280 match_id = match_descr.split(None, 1)[0]
281 state = state_ALIGN_HEADER
282 elif line.startswith(">--"):
283
284 assert query_id and match_id, line
285 yield build_hsp()
286
287
288 align_tags = {}
289 query_tags = {}
290 match_tags = {}
291 query_seq = ""
292 match_seq = ""
293 cons_seq = ""
294 state = state_ALIGN_HEADER
295 elif line.startswith(">"):
296 if state == state_ALIGN_HEADER:
297
298 assert query_id is not None, line
299 assert match_id is not None, line
300 assert query_id.startswith(line[1:].split(None, 1)[0]), line
301 state = state_ALIGN_QUERY
302 elif state == state_ALIGN_QUERY:
303
304 assert query_id is not None, line
305 assert match_id is not None, line
306 assert match_id.startswith(line[1:].split(None, 1)[0]), line
307 state = state_ALIGN_MATCH
308 elif state == state_NONE:
309
310 pass
311 else:
312 assert False, "state %i got %r" % (state, line)
313 elif line.startswith("; al_cons"):
314 assert state == state_ALIGN_MATCH, line
315 state = state_ALIGN_CONS
316
317 elif line.startswith("; "):
318 if ": " in line:
319 key, value = [s.strip() for s in line[2:].split(": ", 1)]
320 else:
321 import warnings
322
323
324 warnings.warn("Missing colon in line: %r" % line)
325 try:
326 key, value = [s.strip() for s in line[2:].split(" ", 1)]
327 except ValueError:
328 raise ValueError("Bad line: %r" % line)
329 if state == state_QUERY_HEADER:
330 header_tags[key] = value
331 elif state == state_ALIGN_HEADER:
332 align_tags[key] = value
333 elif state == state_ALIGN_QUERY:
334 query_tags[key] = value
335 elif state == state_ALIGN_MATCH:
336 match_tags[key] = value
337 else:
338 assert False, "Unexpected state %r, %r" % (state, line)
339 elif state == state_ALIGN_QUERY:
340 query_seq += line.strip()
341 elif state == state_ALIGN_MATCH:
342 match_seq += line.strip()
343 elif state == state_ALIGN_CONS:
344 cons_seq += line.strip("\n")
345 elif state == state_PREAMBLE:
346 if line.startswith("#"):
347 global_tags["command"] = line[1:].strip()
348 elif line.startswith(" version "):
349 global_tags["version"] = line[9:].strip()
350 elif " compares a " in line:
351 global_tags["tool"] = line[:line.find(" compares a ")].strip()
352 elif " searches a " in line:
353 global_tags["tool"] = line[:line.find(" searches a ")].strip()
354 else:
355 pass
356
357
358 if __name__ == "__main__":
359 print "Running a quick self-test"
360
361
362 simple_example = \
363 """# /opt/fasta/fasta34 -Q -H -E 1 -m 10 NC_002127.faa NC_009649.faa
364 FASTA searches a protein or DNA sequence data bank
365 version 34.26 January 12, 2007
366 Please cite:
367 W.R. Pearson & D.J. Lipman PNAS (1988) 85:2444-2448
368
369 Query library NC_002127.faa vs NC_009649.faa library
370 searching NC_009649.faa library
371
372 1>>>gi|10955263|ref|NP_052604.1| plasmid mobilization [Escherichia coli O157:H7 s 107 aa - 107 aa
373 vs NC_009649.faa library
374
375 45119 residues in 180 sequences
376 Expectation_n fit: rho(ln(x))= 6.9146+/-0.0249; mu= -5.7948+/- 1.273
377 mean_var=53.6859+/-13.609, 0's: 0 Z-trim: 1 B-trim: 9 in 1/25
378 Lambda= 0.175043
379
380 FASTA (3.5 Sept 2006) function [optimized, BL50 matrix (15:-5)] ktup: 2
381 join: 36, opt: 24, open/ext: -10/-2, width: 16
382 Scan time: 0.000
383 The best scores are: opt bits E(180)
384 gi|152973457|ref|YP_001338508.1| ATPase with chape ( 931) 71 24.9 0.58
385 gi|152973588|ref|YP_001338639.1| F pilus assembly ( 459) 63 23.1 0.99
386
387 >>>gi|10955263|ref|NP_052604.1|, 107 aa vs NC_009649.faa library
388 ; pg_name: /opt/fasta/fasta34
389 ; pg_ver: 34.26
390 ; pg_argv: /opt/fasta/fasta34 -Q -H -E 1 -m 10 NC_002127.faa NC_009649.faa
391 ; pg_name: FASTA
392 ; pg_ver: 3.5 Sept 2006
393 ; pg_matrix: BL50 (15:-5)
394 ; pg_open-ext: -10 -2
395 ; pg_ktup: 2
396 ; pg_optcut: 24
397 ; pg_cgap: 36
398 ; mp_extrap: 60000 180
399 ; mp_stats: Expectation_n fit: rho(ln(x))= 6.9146+/-0.0249; mu= -5.7948+/- 1.273 mean_var=53.6859+/-13.609, 0's: 0 Z-trim: 1 B-trim: 9 in 1/25 Lambda= 0.175043
400 ; mp_KS: -0.0000 (N=0) at 8159228
401 >>gi|152973457|ref|YP_001338508.1| ATPase with chaperone activity, ATP-binding subunit [Klebsiella pneumoniae subsp. pneumoniae MGH 78578]
402 ; fa_frame: f
403 ; fa_initn: 65
404 ; fa_init1: 43
405 ; fa_opt: 71
406 ; fa_z-score: 90.3
407 ; fa_bits: 24.9
408 ; fa_expect: 0.58
409 ; sw_score: 71
410 ; sw_ident: 0.250
411 ; sw_sim: 0.574
412 ; sw_overlap: 108
413 >gi|10955263| ..
414 ; sq_len: 107
415 ; sq_offset: 1
416 ; sq_type: p
417 ; al_start: 5
418 ; al_stop: 103
419 ; al_display_start: 1
420 --------------------------MTKRSGSNT-RRRAISRPVRLTAE
421 ED---QEIRKRAAECGKTVSGFLRAAALGKKVNSLTDDRVLKEVM-----
422 RLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRLMAD
423 >gi|152973457|ref|YP_001338508.1| ..
424 ; sq_len: 931
425 ; sq_type: p
426 ; al_start: 96
427 ; al_stop: 195
428 ; al_display_start: 66
429 SDFFRIGDDATPVAADTDDVVDASFGEPAAAGSGAPRRRGSGLASRISEQ
430 SEALLQEAAKHAAEFGRS------EVDTEHLLLALADSDVVKTILGQFKI
431 KVDDLKRQIESEAKR-GDKPF-EGEIGVSPRVKDALSRAFVASNELGHSY
432 VGPEHFLIGLAEEGEGLAANLLRRYGLTPQ
433 >>gi|152973588|ref|YP_001338639.1| F pilus assembly protein [Klebsiella pneumoniae subsp. pneumoniae MGH 78578]
434 ; fa_frame: f
435 ; fa_initn: 33
436 ; fa_init1: 33
437 ; fa_opt: 63
438 ; fa_z-score: 86.1
439 ; fa_bits: 23.1
440 ; fa_expect: 0.99
441 ; sw_score: 63
442 ; sw_ident: 0.266
443 ; sw_sim: 0.656
444 ; sw_overlap: 64
445 >gi|10955263| ..
446 ; sq_len: 107
447 ; sq_offset: 1
448 ; sq_type: p
449 ; al_start: 32
450 ; al_stop: 94
451 ; al_display_start: 2
452 TKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRAAALGKK
453 VNSLTDDRVLKEV-MRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRAL
454 LSRLMAD
455 >gi|152973588|ref|YP_001338639.1| ..
456 ; sq_len: 459
457 ; sq_type: p
458 ; al_start: 191
459 ; al_stop: 248
460 ; al_display_start: 161
461 VGGLFPRTQVAQQKVCQDIAGESNIFSDWAASRQGCTVGG--KMDSVQDK
462 ASDKDKERVMKNINIMWNALSKNRLFDG----NKELKEFIMTLTGTLIFG
463 ENSEITPLPARTTDQDLIRAMMEGGTAKIYHCNDSDKCLKVVADATVTIT
464 SNKALKSQISALLSSIQNKAVADEKLTDQE
465 2>>>gi|10955264|ref|NP_052605.1| hypothetical protein pOSAK1_02 [Escherichia coli O157:H7 s 126 aa - 126 aa
466 vs NC_009649.faa library
467
468 45119 residues in 180 sequences
469 Expectation_n fit: rho(ln(x))= 7.1374+/-0.0246; mu= -7.6540+/- 1.313
470 mean_var=51.1189+/-13.171, 0's: 0 Z-trim: 1 B-trim: 8 in 1/25
471 Lambda= 0.179384
472
473 FASTA (3.5 Sept 2006) function [optimized, BL50 matrix (15:-5)] ktup: 2
474 join: 36, opt: 24, open/ext: -10/-2, width: 16
475 Scan time: 0.000
476 The best scores are: opt bits E(180)
477 gi|152973462|ref|YP_001338513.1| hypothetical prot ( 101) 58 22.9 0.29
478
479 >>>gi|10955264|ref|NP_052605.1|, 126 aa vs NC_009649.faa library
480 ; pg_name: /opt/fasta/fasta34
481 ; pg_ver: 34.26
482 ; pg_argv: /opt/fasta/fasta34 -Q -H -E 1 -m 10 NC_002127.faa NC_009649.faa
483 ; pg_name: FASTA
484 ; pg_ver: 3.5 Sept 2006
485 ; pg_matrix: BL50 (15:-5)
486 ; pg_open-ext: -10 -2
487 ; pg_ktup: 2
488 ; pg_optcut: 24
489 ; pg_cgap: 36
490 ; mp_extrap: 60000 180
491 ; mp_stats: Expectation_n fit: rho(ln(x))= 7.1374+/-0.0246; mu= -7.6540+/- 1.313 mean_var=51.1189+/-13.171, 0's: 0 Z-trim: 1 B-trim: 8 in 1/25 Lambda= 0.179384
492 ; mp_KS: -0.0000 (N=0) at 8159228
493 >>gi|152973462|ref|YP_001338513.1| hypothetical protein KPN_pKPN3p05904 [Klebsiella pneumoniae subsp. pneumoniae MGH 78578]
494 ; fa_frame: f
495 ; fa_initn: 50
496 ; fa_init1: 50
497 ; fa_opt: 58
498 ; fa_z-score: 95.8
499 ; fa_bits: 22.9
500 ; fa_expect: 0.29
501 ; sw_score: 58
502 ; sw_ident: 0.289
503 ; sw_sim: 0.632
504 ; sw_overlap: 38
505 >gi|10955264| ..
506 ; sq_len: 126
507 ; sq_offset: 1
508 ; sq_type: p
509 ; al_start: 1
510 ; al_stop: 38
511 ; al_display_start: 1
512 ------------------------------MKKDKKYQIEAIKNKDKTLF
513 IVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDRYVYTSF
514 NGEKFSSYTLNKVTKTDEYN
515 >gi|152973462|ref|YP_001338513.1| ..
516 ; sq_len: 101
517 ; sq_type: p
518 ; al_start: 44
519 ; al_stop: 81
520 ; al_display_start: 14
521 DALLGEIQRLRKQVHQLQLERDILTKANELIKKDLGVSFLKLKNREKTLI
522 VDALKKKYPVAELLSVLQLARSCYFYQNVCTISMRKYA
523 3>>>gi|10955265|ref|NP_052606.1| hypothetical protein pOSAK1_03 [Escherichia coli O157:H7 s 346 aa - 346 aa
524 vs NC_009649.faa library
525
526 45119 residues in 180 sequences
527 Expectation_n fit: rho(ln(x))= 6.0276+/-0.0276; mu= 3.0670+/- 1.461
528 mean_var=37.1634+/- 8.980, 0's: 0 Z-trim: 1 B-trim: 14 in 1/25
529 Lambda= 0.210386
530
531 FASTA (3.5 Sept 2006) function [optimized, BL50 matrix (15:-5)] ktup: 2
532 join: 37, opt: 25, open/ext: -10/-2, width: 16
533 Scan time: 0.020
534 The best scores are: opt bits E(180)
535 gi|152973545|ref|YP_001338596.1| putative plasmid ( 242) 70 27.5 0.082
536
537 >>>gi|10955265|ref|NP_052606.1|, 346 aa vs NC_009649.faa library
538 ; pg_name: /opt/fasta/fasta34
539 ; pg_ver: 34.26
540 ; pg_argv: /opt/fasta/fasta34 -Q -H -E 1 -m 10 NC_002127.faa NC_009649.faa
541 ; pg_name: FASTA
542 ; pg_ver: 3.5 Sept 2006
543 ; pg_matrix: BL50 (15:-5)
544 ; pg_open-ext: -10 -2
545 ; pg_ktup: 2
546 ; pg_optcut: 25
547 ; pg_cgap: 37
548 ; mp_extrap: 60000 180
549 ; mp_stats: Expectation_n fit: rho(ln(x))= 6.0276+/-0.0276; mu= 3.0670+/- 1.461 mean_var=37.1634+/- 8.980, 0's: 0 Z-trim: 1 B-trim: 14 in 1/25 Lambda= 0.210386
550 ; mp_KS: -0.0000 (N=0) at 8159228
551 >>gi|152973545|ref|YP_001338596.1| putative plasmid SOS inhibition protein A [Klebsiella pneumoniae subsp. pneumoniae MGH 78578]
552 ; fa_frame: f
553 ; fa_initn: 52
554 ; fa_init1: 52
555 ; fa_opt: 70
556 ; fa_z-score: 105.5
557 ; fa_bits: 27.5
558 ; fa_expect: 0.082
559 ; sw_score: 70
560 ; sw_ident: 0.279
561 ; sw_sim: 0.651
562 ; sw_overlap: 43
563 >gi|10955265| ..
564 ; sq_len: 346
565 ; sq_offset: 1
566 ; sq_type: p
567 ; al_start: 197
568 ; al_stop: 238
569 ; al_display_start: 167
570 DFMCSILNMKEIVEQKNKEFNVDIKKETIESELHSKLPKSIDKIHEDIKK
571 QLSC-SLIMKKIDVEMEDYSTYCFSALRAIEGFIYQILNDVCNPSSSKNL
572 GEYFTENKPKYIIREIHQET
573 >gi|152973545|ref|YP_001338596.1| ..
574 ; sq_len: 242
575 ; sq_type: p
576 ; al_start: 52
577 ; al_stop: 94
578 ; al_display_start: 22
579 IMTVEEARQRGARLPSMPHVRTFLRLLTGCSRINSDVARRIPGIHRDPKD
580 RLSSLKQVEEALDMLISSHGEYCPLPLTMDVQAENFPEVLHTRTVRRLKR
581 QDFAFTRKMRREARQVEQSW
582 >>><<<
583
584
585 579 residues in 3 query sequences
586 45119 residues in 180 library sequences
587 Scomplib [34.26]
588 start: Tue May 20 16:38:45 2008 done: Tue May 20 16:38:45 2008
589 Total Scan time: 0.020 Total Display time: 0.010
590
591 Function used was FASTA [version 34.26 January 12, 2007]
592
593 """
594
595 from StringIO import StringIO
596
597 alignments = list(FastaM10Iterator(StringIO(simple_example)))
598 assert len(alignments) == 4, len(alignments)
599 assert len(alignments[0]) == 2
600 for a in alignments:
601 print "Alignment %i sequences of length %i" \
602 % (len(a), a.get_alignment_length())
603 for r in a:
604 print "%s %s %i" % (r.seq, r.id, r.annotations["original_length"])
605
606 print "Done"
607
608 import os
609 path = "../../Tests/Fasta/"
610 files = [f for f in os.listdir(path) if os.path.splitext(f)[-1] == ".m10"]
611 files.sort()
612 for filename in files:
613 if os.path.splitext(filename)[-1] == ".m10":
614 print
615 print filename
616 print "=" * len(filename)
617 for i, a in enumerate(FastaM10Iterator(open(os.path.join(path, filename)))):
618 print "#%i, %s" % (i+1, a)
619 for r in a:
620 if "-" in r.seq:
621 assert r.seq.alphabet.gap_char == "-"
622 else:
623 assert not hasattr(r.seq.alphabet, "gap_char")
624