1
2
3
4
5
6
7
8
9
10 """Bio.SeqIO support for the "ig" (IntelliGenetics or MASE) file format.
11
12 You are expected to use this module via the Bio.SeqIO functions."""
13
14 from Bio.Alphabet import single_letter_alphabet
15 from Bio.Seq import Seq
16 from Bio.SeqRecord import SeqRecord
17
18
20 """Iterate over IntelliGenetics records (as SeqRecord objects).
21
22 handle - input file
23 alphabet - optional alphabet
24
25 The optional free format file header lines (which start with two
26 semi-colons) are ignored.
27
28 The free format commentary lines at the start of each record (which
29 start with a semi-colon) are recorded as a single string with embedded
30 new line characters in the SeqRecord's annotations dictionary under the
31 key 'comment'.
32 """
33
34 while True:
35 line = handle.readline()
36 if not line:
37 break
38 if not line.startswith(";;"):
39 break
40
41 while line:
42
43 if line[0] != ";":
44 raise ValueError(
45 "Records should start with ';' and not:\n%s" % repr(line))
46
47
48
49
50
51
52 comment_lines = []
53 while line.startswith(";"):
54
55 comment_lines.append(line[1:].strip())
56 line = handle.readline()
57 title = line.rstrip()
58
59 seq_lines = []
60 while True:
61 line = handle.readline()
62 if not line:
63 break
64 if line[0] == ";":
65 break
66
67 seq_lines.append(line.rstrip().replace(" ", ""))
68 seq_str = "".join(seq_lines)
69 if seq_str.endswith("1"):
70
71 seq_str = seq_str[:-1]
72 if "1" in seq_str:
73 raise ValueError(
74 "Potential terminator digit one found within sequence.")
75
76
77 record = SeqRecord(Seq(seq_str, alphabet),
78 id=title, name=title)
79 record.annotations['comment'] = "\n".join(comment_lines)
80 yield record
81
82
83 assert not line
84
85 if __name__ == "__main__":
86 print "Running quick self test"
87
88 import os
89 path = "../../Tests/IntelliGenetics/"
90 if os.path.isdir(path):
91 for filename in os.listdir(path):
92 if os.path.splitext(filename)[-1] == ".txt":
93 print
94 print filename
95 print "-" * len(filename)
96 handle = open(os.path.join(path, filename))
97 for record in IgIterator(handle):
98 print record.id, len(record)
99 handle.close()
100 print "Done"
101 else:
102 print "Could not find input files"
103