1
2
3
4
5
6 """
7 Bio.AlignIO support for the "nexus" file format.
8
9 You are expected to use this module via the Bio.AlignIO functions (or the
10 Bio.SeqIO functions if you want to work directly with the gapped sequences).
11
12 See also the Bio.Nexus module (which this code calls internally),
13 as this offers more than just accessing the alignment or its
14 sequences as SeqRecord objects.
15 """
16
17 from Bio.SeqRecord import SeqRecord
18 from Bio.Nexus import Nexus
19 from Bio.Align import MultipleSeqAlignment
20 from Interfaces import AlignmentWriter
21 from Bio import Alphabet
22
23
24
25
26
27
29 """Returns SeqRecord objects from a Nexus file.
30
31 Thus uses the Bio.Nexus module to do the hard work.
32
33 You are expected to call this function via Bio.SeqIO or Bio.AlignIO
34 (and not use it directly).
35
36 NOTE - We only expect ONE alignment matrix per Nexus file,
37 meaning this iterator will only yield one MultipleSeqAlignment.
38 """
39 n = Nexus.Nexus(handle)
40 if not n.matrix:
41
42 raise StopIteration
43
44
45
46 assert len(n.unaltered_taxlabels) == len(n.taxlabels)
47
48 if seq_count and seq_count != len(n.unaltered_taxlabels):
49 raise ValueError("Found %i sequences, but seq_count=%i"
50 % (len(n.unaltered_taxlabels), seq_count))
51
52
53 records = (SeqRecord(n.matrix[new_name], id=new_name,
54 name=old_name, description="")
55 for old_name, new_name
56 in zip(n.unaltered_taxlabels, n.taxlabels))
57
58 yield MultipleSeqAlignment(records, n.alphabet)
59
60
62 """Nexus alignment writer.
63
64 Note that Nexus files are only expected to hold ONE alignment
65 matrix.
66
67 You are expected to call this class via the Bio.AlignIO.write() or
68 Bio.SeqIO.write() functions.
69 """
71 """Use this to write an entire file containing the given alignments.
72
73 alignments - A list or iterator returning MultipleSeqAlignment objects.
74 This should hold ONE and only one alignment.
75 """
76 align_iter = iter(alignments)
77 try:
78 first_alignment = align_iter.next()
79 except StopIteration:
80 first_alignment = None
81 if first_alignment is None:
82
83 return 0
84
85
86 try:
87 second_alignment = align_iter.next()
88 except StopIteration:
89 second_alignment = None
90 if second_alignment is not None:
91 raise ValueError("We can only write one Alignment to a Nexus file.")
92
93
94 self.write_alignment(first_alignment)
95 return 1
96
98
99
100 if len(alignment) == 0:
101 raise ValueError("Must have at least one sequence")
102 if alignment.get_alignment_length() == 0:
103 raise ValueError("Non-empty sequences are required")
104 minimal_record = "#NEXUS\nbegin data; dimensions ntax=0 nchar=0; " \
105 + "format datatype=%s; end;" \
106 % self._classify_alphabet_for_nexus(alignment._alphabet)
107 n = Nexus.Nexus(minimal_record)
108 n.alphabet = alignment._alphabet
109 for record in alignment:
110 n.add_sequence(record.id, str(record.seq))
111 n.write_nexus_data(self.handle)
112
132
133 if __name__ == "__main__":
134 from StringIO import StringIO
135 print "Quick self test"
136 print
137 print "Repeated names without a TAXA block"
138 handle = StringIO("""#NEXUS
139 [TITLE: NoName]
140
141 begin data;
142 dimensions ntax=4 nchar=50;
143 format interleave datatype=protein gap=- symbols="FSTNKEYVQMCLAWPHDRIG";
144
145 matrix
146 CYS1_DICDI -----MKVIL LFVLAVFTVF VSS------- --------RG IPPEEQ----
147 ALEU_HORVU MAHARVLLLA LAVLATAAVA VASSSSFADS NPIRPVTDRA ASTLESAVLG
148 CATH_HUMAN ------MWAT LPLLCAGAWL LGV------- -PVCGAAELS VNSLEK----
149 CYS1_DICDI -----MKVIL LFVLAVFTVF VSS------- --------RG IPPEEQ---X
150 ;
151 end;
152 """)
153 for a in NexusIterator(handle):
154 print a
155 for r in a:
156 print repr(r.seq), r.name, r.id
157 print "Done"
158
159 print
160 print "Repeated names with a TAXA block"
161 handle = StringIO("""#NEXUS
162 [TITLE: NoName]
163
164 begin taxa
165 CYS1_DICDI
166 ALEU_HORVU
167 CATH_HUMAN
168 CYS1_DICDI;
169 end;
170
171 begin data;
172 dimensions ntax=4 nchar=50;
173 format interleave datatype=protein gap=- symbols="FSTNKEYVQMCLAWPHDRIG";
174
175 matrix
176 CYS1_DICDI -----MKVIL LFVLAVFTVF VSS------- --------RG IPPEEQ----
177 ALEU_HORVU MAHARVLLLA LAVLATAAVA VASSSSFADS NPIRPVTDRA ASTLESAVLG
178 CATH_HUMAN ------MWAT LPLLCAGAWL LGV------- -PVCGAAELS VNSLEK----
179 CYS1_DICDI -----MKVIL LFVLAVFTVF VSS------- --------RG IPPEEQ---X
180 ;
181 end;
182 """)
183 for a in NexusIterator(handle):
184 print a
185 for r in a:
186 print repr(r.seq), r.name, r.id
187 print "Done"
188 print
189 print "Reading an empty file"
190 assert 0 == len(list(NexusIterator(StringIO())))
191 print "Done"
192 print
193 print "Writing..."
194
195 handle = StringIO()
196 NexusWriter(handle).write_file([a])
197 handle.seek(0)
198 print handle.read()
199
200 handle = StringIO()
201 try:
202 NexusWriter(handle).write_file([a, a])
203 assert False, "Should have rejected more than one alignment!"
204 except ValueError:
205 pass
206