1
2
3
4
5
6
7 from Bio.Alphabet import IUPAC
8 from Bio import Seq
9 from Bio import motifs
10
11
13 """Parses the text output of the MEME program into a MEME.Record object.
14
15 Example:
16
17 >>> f = open("meme.output.txt")
18 >>> from Bio.Motif import MEME
19 >>> record = MEME.parse(f)
20 >>> for motif in record:
21 ... for instance in motif.instances:
22 ... print instance.motif_name, instance.sequence_name, instance.strand, instance.pvalue
23
24 """
25 record = Record()
26 __read_version(record, handle)
27 __read_datafile(record, handle)
28 __read_alphabet(record, handle)
29 __read_sequences(record, handle)
30 __read_command(record, handle)
31 for line in handle:
32 if line.startswith('MOTIF 1'):
33 break
34 else:
35 raise ValueError('Unexpected end of stream')
36 alphabet = record.alphabet
37 revcomp = 'revcomp' in record.command
38 while True:
39 length, num_occurrences, evalue = __read_motif_statistics(line)
40 name = __read_motif_name(handle)
41 instances = __read_motif_sequences(handle, name, alphabet, length, revcomp)
42 motif = Motif(alphabet, instances)
43 motif.length = length
44 motif.num_occurrences = num_occurrences
45 motif.evalue = evalue
46 motif.name = name
47 record.append(motif)
48 __skip_unused_lines(handle)
49 try:
50 line = handle.next()
51 except StopIteration:
52 raise ValueError('Unexpected end of stream: Expected to find new motif, or the summary of motifs')
53 if line.startswith("SUMMARY OF MOTIFS"):
54 break
55 if not line.startswith('MOTIF'):
56 raise ValueError("Line does not start with 'MOTIF':\n%s" % line)
57 return record
58
59
60 -class Motif(motifs.Motif):
61 """A subclass of Motif used in parsing MEME (and MAST) output.
62
63 This subclass defines functions and data specific to MEME motifs.
64 This includes the motif name, the evalue for a motif, and its number
65 of occurrences.
66 """
67 - def __init__(self, alphabet=None, instances=None):
72
73
75 """A class describing the instances of a MEME motif, and the data thereof.
76 """
78 Seq.Seq.__init__(self,*args,**kwds)
79 self.sequence_name = ""
80 self.start = 0
81 self.pvalue = 1.0
82 self.strand = 0
83 self.length = 0
84 self.motif_name = ""
85
86
88 """A class for holding the results of a MEME run.
89
90 A MEME.Record is an object that holds the results from running
91 MEME. It implements no methods of its own.
92
93 The MEME.Record class inherits from list, so you can access individual
94 motifs in the record by their index. Alternatively, you can find a motif
95 by its name:
96
97 >>> f = open("meme.output.txt")
98 >>> from Bio import motifs
99 >>> record = motifs.parse(f, 'MEME')
100 >>> motif = record[0]
101 >>> print motif.name
102 Motif 1
103 >>> motif = record['Motif 1']
104 >>> print motif.name
105 Motif 1
106 """
107
109 """__init__ (self)"""
110 self.version = ""
111 self.datafile = ""
112 self.command = ""
113 self.alphabet = None
114 self.sequences = []
115
117 if isinstance(key, str):
118 for motif in self:
119 if motif.name==key:
120 return motif
121 else:
122 return list.__getitem__(self, key)
123
124
125
126
127
129 for line in handle:
130 if line.startswith('MEME version'):
131 break
132 else:
133 raise ValueError("Improper input file. File should contain a line starting MEME version.")
134 line = line.strip()
135 ls = line.split()
136 record.version = ls[2]
137
138
140 for line in handle:
141 if line.startswith('TRAINING SET'):
142 break
143 else:
144 raise ValueError("Unexpected end of stream: 'TRAINING SET' not found.")
145 try:
146 line = handle.next()
147 except StopIteration:
148 raise ValueError("Unexpected end of stream: Expected to find line starting with '****'")
149 if not line.startswith('****'):
150 raise ValueError("Line does not start with '****':\n%s" % line)
151 try:
152 line = handle.next()
153 except StopIteration:
154 raise ValueError("Unexpected end of stream: Expected to find line starting with 'DATAFILE'")
155 if not line.startswith('DATAFILE'):
156 raise ValueError("Line does not start with 'DATAFILE':\n%s" % line)
157 line = line.strip()
158 line = line.replace('DATAFILE= ','')
159 record.datafile = line
160
161
163 try:
164 line = handle.next()
165 except StopIteration:
166 raise ValueError("Unexpected end of stream: Expected to find line starting with 'ALPHABET'")
167 if not line.startswith('ALPHABET'):
168 raise ValueError("Line does not start with 'ALPHABET':\n%s" % line)
169 line = line.strip()
170 line = line.replace('ALPHABET= ','')
171 if line == 'ACGT':
172 al = IUPAC.unambiguous_dna
173 else:
174 al = IUPAC.protein
175 record.alphabet = al
176
177
179 try:
180 line = handle.next()
181 except StopIteration:
182 raise ValueError("Unexpected end of stream: Expected to find line starting with 'Sequence name'")
183 if not line.startswith('Sequence name'):
184 raise ValueError("Line does not start with 'Sequence name':\n%s" % line)
185 try:
186 line = handle.next()
187 except StopIteration:
188 raise ValueError("Unexpected end of stream: Expected to find line starting with '----'")
189 if not line.startswith('----'):
190 raise ValueError("Line does not start with '----':\n%s" % line)
191 for line in handle:
192 if line.startswith('***'):
193 break
194 line = line.strip()
195 ls = line.split()
196 record.sequences.append(ls[0])
197 if len(ls) == 6:
198 record.sequences.append(ls[3])
199 else:
200 raise ValueError("Unexpected end of stream: Expected to find line starting with '***'")
201
202
204 for line in handle:
205 if line.startswith('command:'):
206 break
207 else:
208 raise ValueError("Unexpected end of stream: Expected to find line starting with 'command'")
209 line = line.strip()
210 line = line.replace('command: ','')
211 record.command = line
212
213
215 line = line[5:].strip()
216 ls = line.split()
217 length = int(ls[3])
218 num_occurrences = int(ls[6])
219 evalue = float(ls[12])
220 return length, num_occurrences, evalue
221
222
224 for line in handle:
225 if 'sorted by position p-value' in line:
226 break
227 else:
228 raise ValueError('Unexpected end of stream: Failed to find motif name')
229 line = line.strip()
230 words = line.split()
231 name = " ".join(words[0:2])
232 return name
233
234
236 try:
237 line = handle.next()
238 except StopIteration:
239 raise ValueError('Unexpected end of stream: Failed to find motif sequences')
240 if not line.startswith('---'):
241 raise ValueError("Line does not start with '---':\n%s" % line)
242 try:
243 line = handle.next()
244 except StopIteration:
245 raise ValueError("Unexpected end of stream: Expected to find line starting with 'Sequence name'")
246 if not line.startswith('Sequence name'):
247 raise ValueError("Line does not start with 'Sequence name':\n%s" % line)
248 try:
249 line = handle.next()
250 except StopIteration:
251 raise ValueError('Unexpected end of stream: Failed to find motif sequences')
252 if not line.startswith('---'):
253 raise ValueError("Line does not start with '---':\n%s" % line)
254 instances = []
255 for line in handle:
256 if line.startswith('---'):
257 break
258 line = line.strip()
259 words = line.split()
260 if revcomp:
261 strand = words.pop(1)
262 else:
263 strand = '+'
264 sequence = words[4]
265 assert len(sequence)==length
266 instance = Instance(sequence, alphabet)
267 instance.motif_name = motif_name
268 instance.sequence_name = words[0]
269 instance.start = int(words[1])
270 instance.pvalue = float(words[2])
271 instance.strand = strand
272 instance.length = length
273 instances.append(instance)
274 else:
275 raise ValueError('Unexpected end of stream')
276 return motifs.Instances(instances, alphabet)
277
278
280 for line in handle:
281 if line.startswith('log-odds matrix'):
282 break
283 else:
284 raise ValueError("Unexpected end of stream: Expected to find line starting with 'log-odds matrix'")
285 for line in handle:
286 if line.startswith('---'):
287 break
288 else:
289 raise ValueError("Unexpected end of stream: Expected to find line starting with '---'")
290 for line in handle:
291 if line.startswith('letter-probability matrix'):
292 break
293 else:
294 raise ValueError("Unexpected end of stream: Expected to find line starting with 'letter-probability matrix'")
295 for line in handle:
296 if line.startswith('---'):
297 break
298 else:
299 raise ValueError("Unexpected end of stream: Expected to find line starting with '---'")
300 for line in handle:
301 if line.startswith('Time'):
302 break
303 else:
304 raise ValueError("Unexpected end of stream: Expected to find line starting with 'Time'")
305 try:
306 line = handle.next()
307 except StopIteration:
308 raise ValueError('Unexpected end of stream: Expected to find blank line')
309 if line.strip():
310 raise ValueError("Expected blank line, but got:\n%s" % line)
311 try:
312 line = handle.next()
313 except StopIteration:
314 raise ValueError("Unexpected end of stream: Expected to find line starting with '***'")
315 if not line.startswith('***'):
316 raise ValueError("Line does not start with '***':\n%s" % line)
317 for line in handle:
318 if line.strip():
319 break
320 else:
321 raise ValueError("Unexpected end of stream: Expected to find line starting with '***'")
322 if not line.startswith('***'):
323 raise ValueError("Line does not start with '***':\n%s" % line)
324