1
2
3
4
5
6
7 """Bio.SeqIO support for the "genbank" and "embl" file formats.
8
9 You are expected to use this module via the Bio.SeqIO functions.
10 Note that internally this module calls Bio.GenBank to do the actual
11 parsing of GenBank, EMBL and IMGT files.
12
13 See also:
14
15 International Nucleotide Sequence Database Collaboration
16 http://www.insdc.org/
17
18 GenBank
19 http://www.ncbi.nlm.nih.gov/Genbank/
20
21 EMBL Nucleotide Sequence Database
22 http://www.ebi.ac.uk/embl/
23
24 DDBJ (DNA Data Bank of Japan)
25 http://www.ddbj.nig.ac.jp/
26
27 IMGT (use a variant of EMBL format with longer feature indents)
28 http://imgt.cines.fr/download/LIGM-DB/userman_doc.html
29 http://imgt.cines.fr/download/LIGM-DB/ftable_doc.html
30 http://www.ebi.ac.uk/imgt/hla/docs/manual.html
31
32 """
33
34 from Bio.Seq import UnknownSeq
35 from Bio.GenBank.Scanner import GenBankScanner, EmblScanner, _ImgtScanner
36 from Bio import Alphabet
37 from Interfaces import SequentialSequenceWriter
38 from Bio import SeqFeature
39
40 from Bio._py3k import _is_int_or_long
41
42
43
44
45
46
47
48
49
51 """Breaks up a Genbank file into SeqRecord objects.
52
53 Every section from the LOCUS line to the terminating // becomes
54 a single SeqRecord with associated annotation and features.
55
56 Note that for genomes or chromosomes, there is typically only
57 one record."""
58
59 return GenBankScanner(debug=0).parse_records(handle)
60
62 """Breaks up an EMBL file into SeqRecord objects.
63
64 Every section from the LOCUS line to the terminating // becomes
65 a single SeqRecord with associated annotation and features.
66
67 Note that for genomes or chromosomes, there is typically only
68 one record."""
69
70 return EmblScanner(debug=0).parse_records(handle)
71
73 """Breaks up an IMGT file into SeqRecord objects.
74
75 Every section from the LOCUS line to the terminating // becomes
76 a single SeqRecord with associated annotation and features.
77
78 Note that for genomes or chromosomes, there is typically only
79 one record."""
80
81 return _ImgtScanner(debug=0).parse_records(handle)
82
84 """Breaks up a Genbank file into SeqRecord objects for each CDS feature.
85
86 Every section from the LOCUS line to the terminating // can contain
87 many CDS features. These are returned as with the stated amino acid
88 translation sequence (if given).
89 """
90
91 return GenBankScanner(debug=0).parse_cds_features(handle, alphabet)
92
94 """Breaks up a EMBL file into SeqRecord objects for each CDS feature.
95
96 Every section from the LOCUS line to the terminating // can contain
97 many CDS features. These are returned as with the stated amino acid
98 translation sequence (if given).
99 """
100
101 return EmblScanner(debug=0).parse_cds_features(handle, alphabet)
102
104 """Build a GenBank/EMBL position string (PRIVATE).
105
106 Use offset=1 to add one to convert a start position from python counting.
107 """
108 if isinstance(pos, SeqFeature.ExactPosition):
109 return "%i" % (pos.position+offset)
110 elif isinstance(pos, SeqFeature.WithinPosition):
111 return "(%i.%i)" % (pos.position + offset,
112 pos.position + pos.extension + offset)
113 elif isinstance(pos, SeqFeature.BetweenPosition):
114 return "(%i^%i)" % (pos.position + offset,
115 pos.position + pos.extension + offset)
116 elif isinstance(pos, SeqFeature.BeforePosition):
117 return "<%i" % (pos.position + offset)
118 elif isinstance(pos, SeqFeature.AfterPosition):
119 return ">%i" % (pos.position + offset)
120 elif isinstance(pos, SeqFeature.OneOfPosition):
121 return "one-of(%s)" \
122 % ",".join([_insdc_feature_position_string(p,offset) \
123 for p in pos.position_choices])
124 elif isinstance(pos, SeqFeature.AbstractPosition):
125 raise NotImplementedError("Please report this as a bug in Biopython.")
126 else:
127 raise ValueError("Expected a SeqFeature position object.")
128
129
131 if location.ref:
132 ref = "%s:" % location.ref
133 else:
134 ref = ""
135 assert not location.ref_db
136 if isinstance(location.start, SeqFeature.ExactPosition) \
137 and isinstance(location.end, SeqFeature.ExactPosition) \
138 and location.start.position == location.end.position:
139
140
141 if location.end.position == rec_length:
142
143
144
145 return "%s%i^1" % (ref, rec_length)
146 else:
147 return "%s%i^%i" % (ref, location.end.position,
148 location.end.position+1)
149 if isinstance(location.start, SeqFeature.ExactPosition) \
150 and isinstance(location.end, SeqFeature.ExactPosition) \
151 and location.start.position+1 == location.end.position:
152
153
154 return "%s%i" % (ref, location.end.position)
155 elif isinstance(location.start, SeqFeature.UnknownPosition) \
156 or isinstance(location.end, SeqFeature.UnknownPosition):
157
158 if isinstance(location.start, SeqFeature.UnknownPosition) \
159 and isinstance(location.end, SeqFeature.UnknownPosition):
160
161
162
163 raise ValueError("Feature with unknown location")
164 elif isinstance(location.start, SeqFeature.UnknownPosition):
165
166 return "%s<%i..%s" \
167 % (ref,
168 location.nofuzzy_end,
169 _insdc_feature_position_string(location.end))
170 else:
171
172 return "%s%s..>%i" \
173 % (ref,
174 _insdc_feature_position_string(location.start),
175 location.nofuzzy_start)
176 else:
177
178 return ref \
179 + _insdc_feature_position_string(location.start, +1) \
180 + ".." + \
181 _insdc_feature_position_string(location.end)
182
184 """Build a GenBank/EMBL location string from a SeqFeature (PRIVATE).
185
186 There is a choice of how to show joins on the reverse complement strand,
187 GenBank used "complement(join(1,10),(20,100))" while EMBL used to use
188 "join(complement(20,100),complement(1,10))" instead (but appears to have
189 now adopted the GenBank convention). Notice that the order of the entries
190 is reversed! This function therefore uses the first form. In this situation
191 we expect the parent feature and the two children to all be marked as
192 strand == -1, and in the order 0:10 then 19:100.
193
194 Also need to consider dual-strand examples like these from the Arabidopsis
195 thaliana chloroplast NC_000932: join(complement(69611..69724),139856..140650)
196 gene ArthCp047, GeneID:844801 or its CDS (protein NP_051038.1 GI:7525057)
197 which is further complicated by a splice:
198 join(complement(69611..69724),139856..140087,140625..140650)
199
200 For mixed this mixed strand feature, the parent SeqFeature should have
201 no strand (either 0 or None) while the child features should have either
202 strand +1 or -1 as appropriate, and be listed in the order given here.
203 """
204
205 if not feature.sub_features:
206
207
208
209
210 location = _insdc_location_string_ignoring_strand_and_subfeatures(feature.location, rec_length)
211 if feature.strand == -1:
212 location = "complement(%s)" % location
213 return location
214
215 if feature.strand == -1:
216 for f in feature.sub_features:
217 if f.strand != -1:
218 raise ValueError("Inconsistent strands: %r for parent, %r for child" \
219 % (feature.strand, f.strand))
220 return "complement(%s(%s))" \
221 % (feature.location_operator,
222 ",".join(_insdc_location_string_ignoring_strand_and_subfeatures(f.location, rec_length) \
223 for f in feature.sub_features))
224
225
226
227
228 assert feature.location_operator != ""
229 return "%s(%s)" % (feature.location_operator,
230 ",".join([_insdc_feature_location_string(f, rec_length) \
231 for f in feature.sub_features]))
232
233
235 """Base class for GenBank and EMBL writers (PRIVATE)."""
236 MAX_WIDTH = 80
237 QUALIFIER_INDENT = 21
238 QUALIFIER_INDENT_STR = " "*QUALIFIER_INDENT
239 QUALIFIER_INDENT_TMP = " %s "
240
274
289
309
311 """Get an annotation dictionary entry (as a string).
312
313 Some entries are lists, in which case if just_first=True the first entry
314 is returned. If just_first=False (default) this verifies there is only
315 one entry before returning it."""
316 try:
317 answer = record.annotations[key]
318 except KeyError:
319 return default
320 if isinstance(answer, list):
321 if not just_first : assert len(answer) == 1
322 return str(answer[0])
323 else:
324 return str(answer)
325
327 """Returns a list of strings.
328
329 Any single words which are too long get returned as a whole line
330 (e.g. URLs) without an exception or warning.
331 """
332
333 text = text.strip()
334 if len(text) <= max_len:
335 return [text]
336
337 words = text.split()
338 text = ""
339 while words and len(text) + 1 + len(words[0]) <= max_len:
340 text += " " + words.pop(0)
341 text = text.strip()
342
343 answer = [text]
344 while words:
345 text = words.pop(0)
346 while words and len(text) + 1 + len(words[0]) <= max_len:
347 text += " " + words.pop(0)
348 text = text.strip()
349
350 answer.append(text)
351 assert not words
352 return answer
353
355 "Returns a list of strings, splits on commas."""
356
357
358
359 contig = record.annotations.get("contig", "")
360 if isinstance(contig, list) or isinstance(contig, tuple):
361 contig = "".join(contig)
362 contig = self.clean(contig)
363 i = 0
364 answer = []
365 while contig:
366 if len(contig) > max_len:
367
368 pos = contig[:max_len-1].rfind(",")
369 if pos == -1:
370 raise ValueError("Could not break up CONTIG")
371 text, contig = contig[:pos+1], contig[pos+1:]
372 else:
373 text, contig = contig, ""
374 answer.append(text)
375 return answer
376
378 HEADER_WIDTH = 12
379 QUALIFIER_INDENT = 21
380
382 "Used in the the 'header' of each GenBank record."""
383 assert len(tag) < self.HEADER_WIDTH
384 if len(text) > self.MAX_WIDTH - self.HEADER_WIDTH:
385 import warnings
386 warnings.warn("Annotation %r too long for %s line" % (text, tag))
387 self.handle.write("%s%s\n" % (tag.ljust(self.HEADER_WIDTH),
388 text.replace("\n", " ")))
389
398
407
409 default = "01-JAN-1980"
410 try :
411 date = record.annotations["date"]
412 except KeyError :
413 return default
414
415 if isinstance(date, list) and len(date)==1 :
416 date = date[0]
417
418 if not isinstance(date, basestring) or len(date) != 11 \
419 or date[2] != "-" or date[6] != "-" \
420 or not date[:2].isdigit() or not date[7:].isdigit() \
421 or int(date[:2]) > 31 \
422 or date[3:6] not in ["JAN", "FEB", "MAR", "APR", "MAY", "JUN",
423 "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"] :
424
425 return default
426 return date
427
429 try:
430 division = record.annotations["data_file_division"]
431 except KeyError:
432 division = "UNK"
433 if division in ["PRI", "ROD", "MAM", "VRT", "INV", "PLN", "BCT",
434 "VRL", "PHG", "SYN", "UNA", "EST", "PAT", "STS",
435 "GSS", "HTG", "HTC", "ENV", "CON"]:
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458 pass
459 else:
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480 embl_to_gbk = {"FUN":"PLN",
481 "HUM":"PRI",
482 "MUS":"ROD",
483 "PRO":"BCT",
484 "UNC":"UNK",
485 "XXX":"UNK",
486 }
487 try:
488 division = embl_to_gbk[division]
489 except KeyError:
490 division = "UNK"
491 assert len(division)==3
492 return division
493
495 """Write the LOCUS line."""
496
497 locus = record.name
498 if not locus or locus == "<unknown name>":
499 locus = record.id
500 if not locus or locus == "<unknown id>":
501 locus = self._get_annotation_str(record, "accession", just_first=True)
502 if len(locus) > 16:
503 raise ValueError("Locus identifier %r is too long" % str(locus))
504
505 if len(record) > 99999999999:
506
507
508 raise ValueError("Sequence too long!")
509
510
511 a = Alphabet._get_base_alphabet(record.seq.alphabet)
512 if not isinstance(a, Alphabet.Alphabet):
513 raise TypeError("Invalid alphabet")
514 elif isinstance(a, Alphabet.ProteinAlphabet):
515 units = "aa"
516 elif isinstance(a, Alphabet.NucleotideAlphabet):
517 units = "bp"
518 else:
519
520
521 raise ValueError("Need a Nucleotide or Protein alphabet")
522
523
524
525 if isinstance(a, Alphabet.ProteinAlphabet):
526 mol_type = ""
527 elif isinstance(a, Alphabet.DNAAlphabet):
528 mol_type = "DNA"
529 elif isinstance(a, Alphabet.RNAAlphabet):
530 mol_type = "RNA"
531 else:
532
533
534 raise ValueError("Need a DNA, RNA or Protein alphabet")
535
536 division = self._get_data_division(record)
537
538 assert len(units) == 2
539 assert len(division) == 3
540
541
542 line = "LOCUS %s %s %s %s %s %s\n" \
543 % (locus.ljust(16),
544 str(len(record)).rjust(11),
545 units,
546 mol_type.ljust(6),
547 division,
548 self._get_date(record))
549 assert len(line) == 79+1, repr(line)
550
551 assert line[12:28].rstrip() == locus, \
552 'LOCUS line does not contain the locus at the expected position:\n' + line
553 assert line[28:29] == " "
554 assert line[29:40].lstrip() == str(len(record)), \
555 'LOCUS line does not contain the length at the expected position:\n' + line
556
557
558 assert line[40:44] in [' bp ', ' aa '] , \
559 'LOCUS line does not contain size units at expected position:\n' + line
560 assert line[44:47] in [' ', 'ss-', 'ds-', 'ms-'], \
561 'LOCUS line does not have valid strand type (Single stranded, ...):\n' + line
562 assert line[47:54].strip() == "" \
563 or line[47:54].strip().find('DNA') != -1 \
564 or line[47:54].strip().find('RNA') != -1, \
565 'LOCUS line does not contain valid sequence type (DNA, RNA, ...):\n' + line
566 assert line[54:55] == ' ', \
567 'LOCUS line does not contain space at position 55:\n' + line
568 assert line[55:63].strip() in ['', 'linear', 'circular'], \
569 'LOCUS line does not contain valid entry (linear, circular, ...):\n' + line
570 assert line[63:64] == ' ', \
571 'LOCUS line does not contain space at position 64:\n' + line
572 assert line[67:68] == ' ', \
573 'LOCUS line does not contain space at position 68:\n' + line
574 assert line[70:71] == '-', \
575 'LOCUS line does not contain - at position 71 in date:\n' + line
576 assert line[74:75] == '-', \
577 'LOCUS line does not contain - at position 75 in date:\n' + line
578
579 self.handle.write(line)
580
582 number = 0
583 for ref in record.annotations["references"]:
584 if not isinstance(ref, SeqFeature.Reference):
585 continue
586 number += 1
587 data = str(number)
588
589 if ref.location and len(ref.location)==1:
590 a = Alphabet._get_base_alphabet(record.seq.alphabet)
591 if isinstance(a, Alphabet.ProteinAlphabet):
592 units = "residues"
593 else:
594 units = "bases"
595 data += " (%s %i to %i)" % (units,
596 ref.location[0].nofuzzy_start+1,
597 ref.location[0].nofuzzy_end)
598 self._write_single_line("REFERENCE", data)
599 if ref.authors:
600
601 self._write_multi_line(" AUTHORS", ref.authors)
602 if ref.consrtm:
603
604 self._write_multi_line(" CONSRTM", ref.consrtm)
605 if ref.title:
606
607 self._write_multi_line(" TITLE", ref.title)
608 if ref.journal:
609
610
611 self._write_multi_line(" JOURNAL", ref.journal)
612 if ref.medline_id:
613
614
615
616 self._write_multi_line(" MEDLINE", ref.medline_id)
617 if ref.pubmed_id:
618
619 self._write_multi_line(" PUBMED", ref.pubmed_id)
620 if ref.comment:
621 self._write_multi_line(" REMARK", ref.comment)
622
623
641
648
674
676 """Write a single record to the output file."""
677 handle = self.handle
678 self._write_the_first_line(record)
679
680 accession = self._get_annotation_str(record, "accession",
681 record.id.split(".", 1)[0],
682 just_first=True)
683 acc_with_version = accession
684 if record.id.startswith(accession+"."):
685 try:
686 acc_with_version = "%s.%i" \
687 % (accession,
688 int(record.id.split(".", 1)[1]))
689 except ValueError:
690 pass
691 gi = self._get_annotation_str(record, "gi", just_first=True)
692
693 descr = record.description
694 if descr == "<unknown description>" : descr = "."
695 self._write_multi_line("DEFINITION", descr)
696
697 self._write_single_line("ACCESSION", accession)
698 if gi != ".":
699 self._write_single_line("VERSION", "%s GI:%s" \
700 % (acc_with_version, gi))
701 else:
702 self._write_single_line("VERSION", "%s" % (acc_with_version))
703
704
705
706
707 self._write_multi_entries("DBLINK", record.dbxrefs)
708
709 try:
710
711
712 keywords = "; ".join(record.annotations["keywords"])
713
714 if not keywords.endswith(".") :
715 keywords += "."
716 except KeyError:
717
718 keywords = "."
719 self._write_multi_line("KEYWORDS", keywords)
720
721 if "segment" in record.annotations:
722
723
724 segment = record.annotations["segment"]
725 if isinstance(segment, list):
726 assert len(segment)==1, segment
727 segment = segment[0]
728 self._write_single_line("SEGMENT", segment)
729
730 self._write_multi_line("SOURCE", \
731 self._get_annotation_str(record, "source"))
732
733 org = self._get_annotation_str(record, "organism")
734 if len(org) > self.MAX_WIDTH - self.HEADER_WIDTH:
735 org = org[:self.MAX_WIDTH - self.HEADER_WIDTH-4]+"..."
736 self._write_single_line(" ORGANISM", org)
737 try:
738
739
740 taxonomy = "; ".join(record.annotations["taxonomy"])
741
742 if not taxonomy.endswith(".") :
743 taxonomy += "."
744 except KeyError:
745 taxonomy = "."
746 self._write_multi_line("", taxonomy)
747
748 if "references" in record.annotations:
749 self._write_references(record)
750
751 if "comment" in record.annotations:
752 self._write_comment(record)
753
754 handle.write("FEATURES Location/Qualifiers\n")
755 rec_length = len(record)
756 for feature in record.features:
757 self._write_feature(feature, rec_length)
758 self._write_sequence(record)
759 handle.write("//\n")
760
762 HEADER_WIDTH = 5
763 QUALIFIER_INDENT = 21
764 QUALIFIER_INDENT_STR = "FT" + " "*(QUALIFIER_INDENT-2)
765 QUALIFIER_INDENT_TMP = "FT %s "
766 FEATURE_HEADER = "FH Key Location/Qualifiers\n"
767
773
826
828 assert len(tag)==2
829 line = tag+" "+text
830 if len(text) > self.MAX_WIDTH:
831 import warnings
832 warnings.warn("Line %r too long" % line)
833 self.handle.write(line+"\n")
834
840
842 """Write the ID and AC lines."""
843 if "." in record.id and record.id.rsplit(".", 1)[1].isdigit():
844 version = "SV " + record.id.rsplit(".", 1)[1]
845 accession = self._get_annotation_str(record, "accession",
846 record.id.rsplit(".", 1)[0],
847 just_first=True)
848 else :
849 version = ""
850 accession = self._get_annotation_str(record, "accession",
851 record.id,
852 just_first=True)
853
854 if ";" in accession :
855 raise ValueError("Cannot have semi-colon in EMBL accession, %s" \
856 % repr(str(accession)))
857 if " " in accession :
858
859 raise ValueError("Cannot have spaces in EMBL accession, %s" \
860 % repr(str(accession)))
861
862
863
864
865 a = Alphabet._get_base_alphabet(record.seq.alphabet)
866 if not isinstance(a, Alphabet.Alphabet):
867 raise TypeError("Invalid alphabet")
868 elif isinstance(a, Alphabet.DNAAlphabet):
869 mol_type = "DNA"
870 units = "BP"
871 elif isinstance(a, Alphabet.RNAAlphabet):
872 mol_type = "RNA"
873 units = "BP"
874 elif isinstance(a, Alphabet.ProteinAlphabet):
875 mol_type = "PROTEIN"
876 units = "AA"
877 else:
878
879 raise ValueError("Need a DNA, RNA or Protein alphabet")
880
881
882 division = self._get_data_division(record)
883
884
885 handle = self.handle
886
887
888
889
890
891
892
893
894 self._write_single_line("ID", "%s; %s; ; %s; ; %s; %i %s." \
895 % (accession, version, mol_type,
896 division, len(record), units))
897 handle.write("XX\n")
898 self._write_single_line("AC", accession+";")
899 handle.write("XX\n")
900
902 try:
903 division = record.annotations["data_file_division"]
904 except KeyError:
905 division = "UNC"
906 if division in ["PHG", "ENV", "FUN", "HUM", "INV", "MAM", "VRT",
907 "MUS", "PLN", "PRO", "ROD", "SYN", "TGN", "UNC",
908 "VRL", "XXX"]:
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929 pass
930 else:
931
932
933
934
935
936 gbk_to_embl = {"BCT":"PRO",
937 "UNK":"UNC",
938 }
939 try:
940 division = gbk_to_embl[division]
941 except KeyError:
942 division = "UNC"
943 assert len(division)==3
944 return division
945
975
995
1039
1046
1047 if __name__ == "__main__":
1048 print "Quick self test"
1049 import os
1050 from StringIO import StringIO
1051
1075
1077 """Check two lists of SeqRecords agree, raises a ValueError if mismatch."""
1078 if len(old_list) != len(new_list):
1079 raise ValueError("%i vs %i records" % (len(old_list), len(new_list)))
1080 for old, new in zip(old_list, new_list):
1081 if not compare_record(old, new):
1082 return False
1083 return True
1084
1086 """Check two SeqFeatures agree."""
1087 if old.type != new.type:
1088 raise ValueError("Type %s versus %s" % (old.type, new.type))
1089 if old.location.nofuzzy_start != new.location.nofuzzy_start \
1090 or old.location.nofuzzy_end != new.location.nofuzzy_end:
1091 raise ValueError("%s versus %s:\n%s\nvs:\n%s" \
1092 % (old.location, new.location, str(old), str(new)))
1093 if old.strand != new.strand:
1094 raise ValueError("Different strand:\n%s\nvs:\n%s" % (str(old), str(new)))
1095 if old.location.start != new.location.start:
1096 raise ValueError("Start %s versus %s:\n%s\nvs:\n%s" \
1097 % (old.location.start, new.location.start, str(old), str(new)))
1098 if old.location.end != new.location.end:
1099 raise ValueError("End %s versus %s:\n%s\nvs:\n%s" \
1100 % (old.location.end, new.location.end, str(old), str(new)))
1101 if not ignore_sub_features:
1102 if len(old.sub_features) != len(new.sub_features):
1103 raise ValueError("Different sub features")
1104 for a, b in zip(old.sub_features, new.sub_features):
1105 if not compare_feature(a, b):
1106 return False
1107
1108
1109
1110 for key in set(old.qualifiers).intersection(new.qualifiers):
1111 if key in ["db_xref", "protein_id", "product", "note"]:
1112
1113 continue
1114 if old.qualifiers[key] != new.qualifiers[key]:
1115 raise ValueError("Qualifier mis-match for %s:\n%s\n%s" \
1116 % (key, old.qualifiers[key], new.qualifiers[key]))
1117 return True
1118
1120 """Check two lists of SeqFeatures agree, raises a ValueError if mismatch."""
1121 if len(old_list) != len(new_list):
1122 raise ValueError("%i vs %i features" % (len(old_list), len(new_list)))
1123 for old, new in zip(old_list, new_list):
1124
1125 if not compare_feature(old, new, ignore_sub_features):
1126 return False
1127 return True
1128
1136
1148
1149 for filename in os.listdir("../../Tests/GenBank"):
1150 if not filename.endswith(".gbk") and not filename.endswith(".gb"):
1151 continue
1152 print filename
1153
1154 handle = open("../../Tests/GenBank/%s" % filename)
1155 records = list(GenBankIterator(handle))
1156 handle.close()
1157
1158 check_genbank_writer(records)
1159 check_embl_writer(records)
1160
1161 for filename in os.listdir("../../Tests/EMBL"):
1162 if not filename.endswith(".embl"):
1163 continue
1164 print filename
1165
1166 handle = open("../../Tests/EMBL/%s" % filename)
1167 records = list(EmblIterator(handle))
1168 handle.close()
1169
1170 check_genbank_writer(records)
1171 check_embl_writer(records)
1172
1173 from Bio import SeqIO
1174 for filename in os.listdir("../../Tests/SwissProt"):
1175 if not filename.startswith("sp"):
1176 continue
1177 print filename
1178
1179 handle = open("../../Tests/SwissProt/%s" % filename)
1180 records = list(SeqIO.parse(handle, "swiss"))
1181 handle.close()
1182
1183 check_genbank_writer(records)
1184