1
2
3
4
5
6
7 """Bio.SeqIO support for the "genbank" and "embl" file formats.
8
9 You are expected to use this module via the Bio.SeqIO functions.
10 Note that internally this module calls Bio.GenBank to do the actual
11 parsing of GenBank, EMBL and IMGT files.
12
13 See also:
14
15 International Nucleotide Sequence Database Collaboration
16 http://www.insdc.org/
17
18 GenBank
19 http://www.ncbi.nlm.nih.gov/Genbank/
20
21 EMBL Nucleotide Sequence Database
22 http://www.ebi.ac.uk/embl/
23
24 DDBJ (DNA Data Bank of Japan)
25 http://www.ddbj.nig.ac.jp/
26
27 IMGT (use a variant of EMBL format with longer feature indents)
28 http://imgt.cines.fr/download/LIGM-DB/userman_doc.html
29 http://imgt.cines.fr/download/LIGM-DB/ftable_doc.html
30 http://www.ebi.ac.uk/imgt/hla/docs/manual.html
31
32 """
33
34 from Bio.Seq import UnknownSeq
35 from Bio.GenBank.Scanner import GenBankScanner, EmblScanner, _ImgtScanner
36 from Bio import Alphabet
37 from Interfaces import SequentialSequenceWriter
38 from Bio import SeqFeature
39
40 from Bio._py3k import _is_int_or_long
41
42
43
44
45
46
47
48
49
51 """Breaks up a Genbank file into SeqRecord objects.
52
53 Every section from the LOCUS line to the terminating // becomes
54 a single SeqRecord with associated annotation and features.
55
56 Note that for genomes or chromosomes, there is typically only
57 one record."""
58
59 return GenBankScanner(debug=0).parse_records(handle)
60
61
63 """Breaks up an EMBL file into SeqRecord objects.
64
65 Every section from the LOCUS line to the terminating // becomes
66 a single SeqRecord with associated annotation and features.
67
68 Note that for genomes or chromosomes, there is typically only
69 one record."""
70
71 return EmblScanner(debug=0).parse_records(handle)
72
73
75 """Breaks up an IMGT file into SeqRecord objects.
76
77 Every section from the LOCUS line to the terminating // becomes
78 a single SeqRecord with associated annotation and features.
79
80 Note that for genomes or chromosomes, there is typically only
81 one record."""
82
83 return _ImgtScanner(debug=0).parse_records(handle)
84
85
87 """Breaks up a Genbank file into SeqRecord objects for each CDS feature.
88
89 Every section from the LOCUS line to the terminating // can contain
90 many CDS features. These are returned as with the stated amino acid
91 translation sequence (if given).
92 """
93
94 return GenBankScanner(debug=0).parse_cds_features(handle, alphabet)
95
96
98 """Breaks up a EMBL file into SeqRecord objects for each CDS feature.
99
100 Every section from the LOCUS line to the terminating // can contain
101 many CDS features. These are returned as with the stated amino acid
102 translation sequence (if given).
103 """
104
105 return EmblScanner(debug=0).parse_cds_features(handle, alphabet)
106
107
109 """Build a GenBank/EMBL position string (PRIVATE).
110
111 Use offset=1 to add one to convert a start position from python counting.
112 """
113 if isinstance(pos, SeqFeature.ExactPosition):
114 return "%i" % (pos.position + offset)
115 elif isinstance(pos, SeqFeature.WithinPosition):
116 return "(%i.%i)" % (pos.position + offset,
117 pos.position + pos.extension + offset)
118 elif isinstance(pos, SeqFeature.BetweenPosition):
119 return "(%i^%i)" % (pos.position + offset,
120 pos.position + pos.extension + offset)
121 elif isinstance(pos, SeqFeature.BeforePosition):
122 return "<%i" % (pos.position + offset)
123 elif isinstance(pos, SeqFeature.AfterPosition):
124 return ">%i" % (pos.position + offset)
125 elif isinstance(pos, SeqFeature.OneOfPosition):
126 return "one-of(%s)" \
127 % ",".join([_insdc_feature_position_string(p, offset)
128 for p in pos.position_choices])
129 elif isinstance(pos, SeqFeature.AbstractPosition):
130 raise NotImplementedError("Please report this as a bug in Biopython.")
131 else:
132 raise ValueError("Expected a SeqFeature position object.")
133
134
136 if location.ref:
137 ref = "%s:" % location.ref
138 else:
139 ref = ""
140 assert not location.ref_db
141 if isinstance(location.start, SeqFeature.ExactPosition) \
142 and isinstance(location.end, SeqFeature.ExactPosition) \
143 and location.start.position == location.end.position:
144
145
146 if location.end.position == rec_length:
147
148
149
150 return "%s%i^1" % (ref, rec_length)
151 else:
152 return "%s%i^%i" % (ref, location.end.position,
153 location.end.position + 1)
154 if isinstance(location.start, SeqFeature.ExactPosition) \
155 and isinstance(location.end, SeqFeature.ExactPosition) \
156 and location.start.position + 1 == location.end.position:
157
158
159 return "%s%i" % (ref, location.end.position)
160 elif isinstance(location.start, SeqFeature.UnknownPosition) \
161 or isinstance(location.end, SeqFeature.UnknownPosition):
162
163 if isinstance(location.start, SeqFeature.UnknownPosition) \
164 and isinstance(location.end, SeqFeature.UnknownPosition):
165
166
167
168 raise ValueError("Feature with unknown location")
169 elif isinstance(location.start, SeqFeature.UnknownPosition):
170
171 return "%s<%i..%s" \
172 % (ref,
173 location.nofuzzy_end,
174 _insdc_feature_position_string(location.end))
175 else:
176
177 return "%s%s..>%i" \
178 % (ref,
179 _insdc_feature_position_string(location.start),
180 location.nofuzzy_start)
181 else:
182
183 return ref \
184 + _insdc_feature_position_string(location.start, +1) \
185 + ".." + \
186 _insdc_feature_position_string(location.end)
187
188
190 """Build a GenBank/EMBL location string from a SeqFeature (PRIVATE).
191
192 There is a choice of how to show joins on the reverse complement strand,
193 GenBank used "complement(join(1,10),(20,100))" while EMBL used to use
194 "join(complement(20,100),complement(1,10))" instead (but appears to have
195 now adopted the GenBank convention). Notice that the order of the entries
196 is reversed! This function therefore uses the first form. In this situation
197 we expect the parent feature and the two children to all be marked as
198 strand == -1, and in the order 0:10 then 19:100.
199
200 Also need to consider dual-strand examples like these from the Arabidopsis
201 thaliana chloroplast NC_000932: join(complement(69611..69724),139856..140650)
202 gene ArthCp047, GeneID:844801 or its CDS (protein NP_051038.1 GI:7525057)
203 which is further complicated by a splice:
204 join(complement(69611..69724),139856..140087,140625..140650)
205
206 For mixed this mixed strand feature, the parent SeqFeature should have
207 no strand (either 0 or None) while the child features should have either
208 strand +1 or -1 as appropriate, and be listed in the order given here.
209 """
210
211 if not feature.sub_features:
212
213
214
215
216 location = _insdc_location_string_ignoring_strand_and_subfeatures(
217 feature.location, rec_length)
218 if feature.strand == -1:
219 location = "complement(%s)" % location
220 return location
221
222 if feature.strand == -1:
223 for f in feature.sub_features:
224 if f.strand != -1:
225 raise ValueError("Inconsistent strands: %r for parent, %r for child"
226 % (feature.strand, f.strand))
227 return "complement(%s(%s))" \
228 % (feature.location_operator,
229 ",".join(_insdc_location_string_ignoring_strand_and_subfeatures(f.location, rec_length)
230 for f in feature.sub_features))
231
232
233
234
235 assert feature.location_operator != ""
236 return "%s(%s)" % (feature.location_operator,
237 ",".join([_insdc_feature_location_string(f, rec_length)
238 for f in feature.sub_features]))
239
240
242 """Base class for GenBank and EMBL writers (PRIVATE)."""
243 MAX_WIDTH = 80
244 QUALIFIER_INDENT = 21
245 QUALIFIER_INDENT_STR = " " * QUALIFIER_INDENT
246 QUALIFIER_INDENT_TMP = " %s "
247 FTQUAL_NO_QUOTE = ("anticodon", "citation", "codon_start", "compare",
248 "direction", "estimated_length", "mod_base", "number",
249 "rpt_type", "rpt_unit_range", "tag_peptide",
250 "transl_except", "transl_table")
251
286
302
322
324 """Get an annotation dictionary entry (as a string).
325
326 Some entries are lists, in which case if just_first=True the first entry
327 is returned. If just_first=False (default) this verifies there is only
328 one entry before returning it."""
329 try:
330 answer = record.annotations[key]
331 except KeyError:
332 return default
333 if isinstance(answer, list):
334 if not just_first:
335 assert len(answer) == 1
336 return str(answer[0])
337 else:
338 return str(answer)
339
341 """Returns a list of strings.
342
343 Any single words which are too long get returned as a whole line
344 (e.g. URLs) without an exception or warning.
345 """
346
347 text = text.strip()
348 if len(text) <= max_len:
349 return [text]
350
351 words = text.split()
352 text = ""
353 while words and len(text) + 1 + len(words[0]) <= max_len:
354 text += " " + words.pop(0)
355 text = text.strip()
356
357 answer = [text]
358 while words:
359 text = words.pop(0)
360 while words and len(text) + 1 + len(words[0]) <= max_len:
361 text += " " + words.pop(0)
362 text = text.strip()
363
364 answer.append(text)
365 assert not words
366 return answer
367
369 """Returns a list of strings, splits on commas."""
370
371
372
373 contig = record.annotations.get("contig", "")
374 if isinstance(contig, list) or isinstance(contig, tuple):
375 contig = "".join(contig)
376 contig = self.clean(contig)
377 answer = []
378 while contig:
379 if len(contig) > max_len:
380
381 pos = contig[:max_len - 1].rfind(",")
382 if pos == -1:
383 raise ValueError("Could not break up CONTIG")
384 text, contig = contig[:pos + 1], contig[pos + 1:]
385 else:
386 text, contig = contig, ""
387 answer.append(text)
388 return answer
389
390
392 HEADER_WIDTH = 12
393 QUALIFIER_INDENT = 21
394
405
414
423
425 default = "01-JAN-1980"
426 try:
427 date = record.annotations["date"]
428 except KeyError:
429 return default
430
431 if isinstance(date, list) and len(date) == 1:
432 date = date[0]
433
434 if not isinstance(date, basestring) or len(date) != 11 \
435 or date[2] != "-" or date[6] != "-" \
436 or not date[:2].isdigit() or not date[7:].isdigit() \
437 or int(date[:2]) > 31 \
438 or date[3:6] not in ["JAN", "FEB", "MAR", "APR", "MAY", "JUN",
439 "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"]:
440
441 return default
442 return date
443
445 try:
446 division = record.annotations["data_file_division"]
447 except KeyError:
448 division = "UNK"
449 if division in ["PRI", "ROD", "MAM", "VRT", "INV", "PLN", "BCT",
450 "VRL", "PHG", "SYN", "UNA", "EST", "PAT", "STS",
451 "GSS", "HTG", "HTC", "ENV", "CON"]:
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474 pass
475 else:
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496 embl_to_gbk = {"FUN": "PLN",
497 "HUM": "PRI",
498 "MUS": "ROD",
499 "PRO": "BCT",
500 "UNC": "UNK",
501 "XXX": "UNK",
502 }
503 try:
504 division = embl_to_gbk[division]
505 except KeyError:
506 division = "UNK"
507 assert len(division) == 3
508 return division
509
511 """Write the LOCUS line."""
512
513 locus = record.name
514 if not locus or locus == "<unknown name>":
515 locus = record.id
516 if not locus or locus == "<unknown id>":
517 locus = self._get_annotation_str(
518 record, "accession", just_first=True)
519 if len(locus) > 16:
520 raise ValueError("Locus identifier %r is too long" % str(locus))
521
522 if len(record) > 99999999999:
523
524
525 raise ValueError("Sequence too long!")
526
527
528 a = Alphabet._get_base_alphabet(record.seq.alphabet)
529 if not isinstance(a, Alphabet.Alphabet):
530 raise TypeError("Invalid alphabet")
531 elif isinstance(a, Alphabet.ProteinAlphabet):
532 units = "aa"
533 elif isinstance(a, Alphabet.NucleotideAlphabet):
534 units = "bp"
535 else:
536
537
538 raise ValueError("Need a Nucleotide or Protein alphabet")
539
540
541
542 if isinstance(a, Alphabet.ProteinAlphabet):
543 mol_type = ""
544 elif isinstance(a, Alphabet.DNAAlphabet):
545 mol_type = "DNA"
546 elif isinstance(a, Alphabet.RNAAlphabet):
547 mol_type = "RNA"
548 else:
549
550
551 raise ValueError("Need a DNA, RNA or Protein alphabet")
552
553 division = self._get_data_division(record)
554
555 assert len(units) == 2
556 assert len(division) == 3
557
558
559 line = "LOCUS %s %s %s %s %s %s\n" \
560 % (locus.ljust(16),
561 str(len(record)).rjust(11),
562 units,
563 mol_type.ljust(6),
564 division,
565 self._get_date(record))
566 assert len(line) == 79 + 1, repr(line)
567
568 assert line[12:28].rstrip() == locus, \
569 'LOCUS line does not contain the locus at the expected position:\n' + line
570 assert line[28:29] == " "
571 assert line[29:40].lstrip() == str(len(record)), \
572 'LOCUS line does not contain the length at the expected position:\n' + line
573
574
575 assert line[40:44] in [' bp ', ' aa '], \
576 'LOCUS line does not contain size units at expected position:\n' + \
577 line
578 assert line[44:47] in [' ', 'ss-', 'ds-', 'ms-'], \
579 'LOCUS line does not have valid strand type (Single stranded, ...):\n' + line
580 assert line[47:54].strip() == "" \
581 or 'DNA' in line[47:54].strip() \
582 or 'RNA' in line[47:54].strip(), \
583 'LOCUS line does not contain valid sequence type (DNA, RNA, ...):\n' + line
584 assert line[54:55] == ' ', \
585 'LOCUS line does not contain space at position 55:\n' + line
586 assert line[55:63].strip() in ['', 'linear', 'circular'], \
587 'LOCUS line does not contain valid entry (linear, circular, ...):\n' + line
588 assert line[63:64] == ' ', \
589 'LOCUS line does not contain space at position 64:\n' + line
590 assert line[67:68] == ' ', \
591 'LOCUS line does not contain space at position 68:\n' + line
592 assert line[70:71] == '-', \
593 'LOCUS line does not contain - at position 71 in date:\n' + line
594 assert line[74:75] == '-', \
595 'LOCUS line does not contain - at position 75 in date:\n' + line
596
597 self.handle.write(line)
598
600 number = 0
601 for ref in record.annotations["references"]:
602 if not isinstance(ref, SeqFeature.Reference):
603 continue
604 number += 1
605 data = str(number)
606
607 if ref.location and len(ref.location) == 1:
608 a = Alphabet._get_base_alphabet(record.seq.alphabet)
609 if isinstance(a, Alphabet.ProteinAlphabet):
610 units = "residues"
611 else:
612 units = "bases"
613 data += " (%s %i to %i)" % (units,
614 ref.location[0].nofuzzy_start + 1,
615 ref.location[0].nofuzzy_end)
616 self._write_single_line("REFERENCE", data)
617 if ref.authors:
618
619 self._write_multi_line(" AUTHORS", ref.authors)
620 if ref.consrtm:
621
622 self._write_multi_line(" CONSRTM", ref.consrtm)
623 if ref.title:
624
625 self._write_multi_line(" TITLE", ref.title)
626 if ref.journal:
627
628
629 self._write_multi_line(" JOURNAL", ref.journal)
630 if ref.medline_id:
631
632
633
634 self._write_multi_line(" MEDLINE", ref.medline_id)
635 if ref.pubmed_id:
636
637 self._write_multi_line(" PUBMED", ref.pubmed_id)
638 if ref.comment:
639 self._write_multi_line(" REMARK", ref.comment)
640
658
665
691
693 """Write a single record to the output file."""
694 handle = self.handle
695 self._write_the_first_line(record)
696
697 accession = self._get_annotation_str(record, "accession",
698 record.id.split(".", 1)[0],
699 just_first=True)
700 acc_with_version = accession
701 if record.id.startswith(accession + "."):
702 try:
703 acc_with_version = "%s.%i" \
704 % (accession,
705 int(record.id.split(".", 1)[1]))
706 except ValueError:
707 pass
708 gi = self._get_annotation_str(record, "gi", just_first=True)
709
710 descr = record.description
711 if descr == "<unknown description>":
712 descr = "."
713 self._write_multi_line("DEFINITION", descr)
714
715 self._write_single_line("ACCESSION", accession)
716 if gi != ".":
717 self._write_single_line("VERSION", "%s GI:%s"
718 % (acc_with_version, gi))
719 else:
720 self._write_single_line("VERSION", "%s" % (acc_with_version))
721
722
723
724
725 self._write_multi_entries("DBLINK", record.dbxrefs)
726
727 try:
728
729
730 keywords = "; ".join(record.annotations["keywords"])
731
732 if not keywords.endswith("."):
733 keywords += "."
734 except KeyError:
735
736 keywords = "."
737 self._write_multi_line("KEYWORDS", keywords)
738
739 if "segment" in record.annotations:
740
741
742 segment = record.annotations["segment"]
743 if isinstance(segment, list):
744 assert len(segment) == 1, segment
745 segment = segment[0]
746 self._write_single_line("SEGMENT", segment)
747
748 self._write_multi_line("SOURCE",
749 self._get_annotation_str(record, "source"))
750
751 org = self._get_annotation_str(record, "organism")
752 if len(org) > self.MAX_WIDTH - self.HEADER_WIDTH:
753 org = org[:self.MAX_WIDTH - self.HEADER_WIDTH - 4] + "..."
754 self._write_single_line(" ORGANISM", org)
755 try:
756
757
758 taxonomy = "; ".join(record.annotations["taxonomy"])
759
760 if not taxonomy.endswith("."):
761 taxonomy += "."
762 except KeyError:
763 taxonomy = "."
764 self._write_multi_line("", taxonomy)
765
766 if "references" in record.annotations:
767 self._write_references(record)
768
769 if "comment" in record.annotations:
770 self._write_comment(record)
771
772 handle.write("FEATURES Location/Qualifiers\n")
773 rec_length = len(record)
774 for feature in record.features:
775 self._write_feature(feature, rec_length)
776 self._write_sequence(record)
777 handle.write("//\n")
778
779
781 HEADER_WIDTH = 5
782 QUALIFIER_INDENT = 21
783 QUALIFIER_INDENT_STR = "FT" + " " * (QUALIFIER_INDENT - 2)
784 QUALIFIER_INDENT_TMP = "FT %s "
785 FEATURE_HEADER = "FH Key Location/Qualifiers\n"
786
792
848
850 assert len(tag) == 2
851 line = tag + " " + text
852 if len(text) > self.MAX_WIDTH:
853 import warnings
854 warnings.warn("Line %r too long" % line)
855 self.handle.write(line + "\n")
856
862
864 """Write the ID and AC lines."""
865 if "." in record.id and record.id.rsplit(".", 1)[1].isdigit():
866 version = "SV " + record.id.rsplit(".", 1)[1]
867 accession = self._get_annotation_str(record, "accession",
868 record.id.rsplit(".", 1)[0],
869 just_first=True)
870 else:
871 version = ""
872 accession = self._get_annotation_str(record, "accession",
873 record.id,
874 just_first=True)
875
876 if ";" in accession:
877 raise ValueError("Cannot have semi-colon in EMBL accession, %s"
878 % repr(str(accession)))
879 if " " in accession:
880
881 raise ValueError("Cannot have spaces in EMBL accession, %s"
882 % repr(str(accession)))
883
884
885
886
887 a = Alphabet._get_base_alphabet(record.seq.alphabet)
888 if not isinstance(a, Alphabet.Alphabet):
889 raise TypeError("Invalid alphabet")
890 elif isinstance(a, Alphabet.DNAAlphabet):
891 mol_type = "DNA"
892 units = "BP"
893 elif isinstance(a, Alphabet.RNAAlphabet):
894 mol_type = "RNA"
895 units = "BP"
896 elif isinstance(a, Alphabet.ProteinAlphabet):
897 mol_type = "PROTEIN"
898 units = "AA"
899 else:
900
901 raise ValueError("Need a DNA, RNA or Protein alphabet")
902
903
904 division = self._get_data_division(record)
905
906
907 handle = self.handle
908
909
910
911
912
913
914
915
916 self._write_single_line("ID", "%s; %s; ; %s; ; %s; %i %s."
917 % (accession, version, mol_type,
918 division, len(record), units))
919 handle.write("XX\n")
920 self._write_single_line("AC", accession + ";")
921 handle.write("XX\n")
922
924 try:
925 division = record.annotations["data_file_division"]
926 except KeyError:
927 division = "UNC"
928 if division in ["PHG", "ENV", "FUN", "HUM", "INV", "MAM", "VRT",
929 "MUS", "PLN", "PRO", "ROD", "SYN", "TGN", "UNC",
930 "VRL", "XXX"]:
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951 pass
952 else:
953
954
955
956
957
958 gbk_to_embl = {"BCT": "PRO",
959 "UNK": "UNC",
960 }
961 try:
962 division = gbk_to_embl[division]
963 except KeyError:
964 division = "UNC"
965 assert len(division) == 3
966 return division
967
998
1019
1065
1066
1073
1074 if __name__ == "__main__":
1075 print "Quick self test"
1076 import os
1077 from StringIO import StringIO
1078
1103
1105 """Check two lists of SeqRecords agree, raises a ValueError if mismatch."""
1106 if len(old_list) != len(new_list):
1107 raise ValueError(
1108 "%i vs %i records" % (len(old_list), len(new_list)))
1109 for old, new in zip(old_list, new_list):
1110 if not compare_record(old, new):
1111 return False
1112 return True
1113
1115 """Check two SeqFeatures agree."""
1116 if old.type != new.type:
1117 raise ValueError("Type %s versus %s" % (old.type, new.type))
1118 if old.location.nofuzzy_start != new.location.nofuzzy_start \
1119 or old.location.nofuzzy_end != new.location.nofuzzy_end:
1120 raise ValueError("%s versus %s:\n%s\nvs:\n%s"
1121 % (old.location, new.location, str(old), str(new)))
1122 if old.strand != new.strand:
1123 raise ValueError(
1124 "Different strand:\n%s\nvs:\n%s" % (str(old), str(new)))
1125 if old.location.start != new.location.start:
1126 raise ValueError("Start %s versus %s:\n%s\nvs:\n%s"
1127 % (old.location.start, new.location.start, str(old), str(new)))
1128 if old.location.end != new.location.end:
1129 raise ValueError("End %s versus %s:\n%s\nvs:\n%s"
1130 % (old.location.end, new.location.end, str(old), str(new)))
1131 if not ignore_sub_features:
1132 if len(old.sub_features) != len(new.sub_features):
1133 raise ValueError("Different sub features")
1134 for a, b in zip(old.sub_features, new.sub_features):
1135 if not compare_feature(a, b):
1136 return False
1137
1138
1139
1140 for key in set(old.qualifiers).intersection(new.qualifiers):
1141 if key in ["db_xref", "protein_id", "product", "note"]:
1142
1143 continue
1144 if old.qualifiers[key] != new.qualifiers[key]:
1145 raise ValueError("Qualifier mis-match for %s:\n%s\n%s"
1146 % (key, old.qualifiers[key], new.qualifiers[key]))
1147 return True
1148
1150 """Check two lists of SeqFeatures agree, raises a ValueError if mismatch."""
1151 if len(old_list) != len(new_list):
1152 raise ValueError(
1153 "%i vs %i features" % (len(old_list), len(new_list)))
1154 for old, new in zip(old_list, new_list):
1155
1156 if not compare_feature(old, new, ignore_sub_features):
1157 return False
1158 return True
1159
1167
1179
1180 for filename in os.listdir("../../Tests/GenBank"):
1181 if not filename.endswith(".gbk") and not filename.endswith(".gb"):
1182 continue
1183 print filename
1184
1185 handle = open("../../Tests/GenBank/%s" % filename)
1186 records = list(GenBankIterator(handle))
1187 handle.close()
1188
1189 check_genbank_writer(records)
1190 check_embl_writer(records)
1191
1192 for filename in os.listdir("../../Tests/EMBL"):
1193 if not filename.endswith(".embl"):
1194 continue
1195 print filename
1196
1197 handle = open("../../Tests/EMBL/%s" % filename)
1198 records = list(EmblIterator(handle))
1199 handle.close()
1200
1201 check_genbank_writer(records)
1202 check_embl_writer(records)
1203
1204 from Bio import SeqIO
1205 for filename in os.listdir("../../Tests/SwissProt"):
1206 if not filename.startswith("sp"):
1207 continue
1208 print filename
1209
1210 handle = open("../../Tests/SwissProt/%s" % filename)
1211 records = list(SeqIO.parse(handle, "swiss"))
1212 handle.close()
1213
1214 check_genbank_writer(records)
1215