Package Bio :: Package motifs :: Module meme
[hide private]
[frames] | no frames]

Source Code for Module Bio.motifs.meme

  1  # Copyright 2008 by Bartek Wilczynski 
  2  # Adapted from  Bio.MEME.Parser by Jason A. Hackney.  All rights reserved. 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6   
  7  from Bio.Alphabet import IUPAC 
  8  from Bio import Seq 
  9  from Bio import motifs 
 10   
 11   
12 -def read(handle):
13 """Parses the text output of the MEME program into a MEME.Record object. 14 15 Example: 16 17 >>> f = open("meme.output.txt") 18 >>> from Bio.Motif import MEME 19 >>> record = MEME.parse(f) 20 >>> for motif in record: 21 ... for instance in motif.instances: 22 ... print instance.motif_name, instance.sequence_name, instance.strand, instance.pvalue 23 24 """ 25 record = Record() 26 __read_version(record, handle) 27 __read_datafile(record, handle) 28 __read_alphabet(record, handle) 29 __read_sequences(record, handle) 30 __read_command(record, handle) 31 for line in handle: 32 if line.startswith('MOTIF 1'): 33 break 34 else: 35 raise ValueError('Unexpected end of stream') 36 alphabet = record.alphabet 37 revcomp = 'revcomp' in record.command 38 while True: 39 length, num_occurrences, evalue = __read_motif_statistics(line) 40 name = __read_motif_name(handle) 41 instances = __read_motif_sequences(handle, name, alphabet, length, revcomp) 42 motif = Motif(alphabet, instances) 43 motif.length = length 44 motif.num_occurrences = num_occurrences 45 motif.evalue = evalue 46 motif.name = name 47 record.append(motif) 48 __skip_unused_lines(handle) 49 try: 50 line = handle.next() 51 except StopIteration: 52 raise ValueError('Unexpected end of stream: Expected to find new motif, or the summary of motifs') 53 if line.startswith("SUMMARY OF MOTIFS"): 54 break 55 if not line.startswith('MOTIF'): 56 raise ValueError("Line does not start with 'MOTIF':\n%s" % line) 57 return record
58 59
60 -class Motif(motifs.Motif):
61 """A subclass of Motif used in parsing MEME (and MAST) output. 62 63 This subclass defines functions and data specific to MEME motifs. 64 This includes the motif name, the evalue for a motif, and its number 65 of occurrences. 66 """
67 - def __init__(self, alphabet=None, instances=None):
68 motifs.Motif.__init__(self, alphabet, instances) 69 self.evalue = 0.0 70 self.num_occurrences = 0 71 self.name = None
72 73
74 -class Instance(Seq.Seq):
75 """A class describing the instances of a MEME motif, and the data thereof. 76 """
77 - def __init__(self,*args,**kwds):
78 Seq.Seq.__init__(self,*args,**kwds) 79 self.sequence_name = "" 80 self.start = 0 81 self.pvalue = 1.0 82 self.strand = 0 83 self.length = 0 84 self.motif_name = ""
85 86
87 -class Record(list):
88 """A class for holding the results of a MEME run. 89 90 A MEME.Record is an object that holds the results from running 91 MEME. It implements no methods of its own. 92 93 The MEME.Record class inherits from list, so you can access individual 94 motifs in the record by their index. Alternatively, you can find a motif 95 by its name: 96 97 >>> f = open("meme.output.txt") 98 >>> from Bio import motifs 99 >>> record = motifs.parse(f, 'MEME') 100 >>> motif = record[0] 101 >>> print motif.name 102 Motif 1 103 >>> motif = record['Motif 1'] 104 >>> print motif.name 105 Motif 1 106 """ 107
108 - def __init__(self):
109 """__init__ (self)""" 110 self.version = "" 111 self.datafile = "" 112 self.command = "" 113 self.alphabet = None 114 self.sequences = []
115
116 - def __getitem__(self, key):
117 if isinstance(key, str): 118 for motif in self: 119 if motif.name==key: 120 return motif 121 else: 122 return list.__getitem__(self, key)
123 124 125 # Everything below is private 126 127
128 -def __read_version(record, handle):
129 for line in handle: 130 if line.startswith('MEME version'): 131 break 132 else: 133 raise ValueError("Improper input file. File should contain a line starting MEME version.") 134 line = line.strip() 135 ls = line.split() 136 record.version = ls[2]
137 138
139 -def __read_datafile(record, handle):
140 for line in handle: 141 if line.startswith('TRAINING SET'): 142 break 143 else: 144 raise ValueError("Unexpected end of stream: 'TRAINING SET' not found.") 145 try: 146 line = handle.next() 147 except StopIteration: 148 raise ValueError("Unexpected end of stream: Expected to find line starting with '****'") 149 if not line.startswith('****'): 150 raise ValueError("Line does not start with '****':\n%s" % line) 151 try: 152 line = handle.next() 153 except StopIteration: 154 raise ValueError("Unexpected end of stream: Expected to find line starting with 'DATAFILE'") 155 if not line.startswith('DATAFILE'): 156 raise ValueError("Line does not start with 'DATAFILE':\n%s" % line) 157 line = line.strip() 158 line = line.replace('DATAFILE= ','') 159 record.datafile = line
160 161
162 -def __read_alphabet(record, handle):
163 try: 164 line = handle.next() 165 except StopIteration: 166 raise ValueError("Unexpected end of stream: Expected to find line starting with 'ALPHABET'") 167 if not line.startswith('ALPHABET'): 168 raise ValueError("Line does not start with 'ALPHABET':\n%s" % line) 169 line = line.strip() 170 line = line.replace('ALPHABET= ','') 171 if line == 'ACGT': 172 al = IUPAC.unambiguous_dna 173 else: 174 al = IUPAC.protein 175 record.alphabet = al
176 177
178 -def __read_sequences(record, handle):
179 try: 180 line = handle.next() 181 except StopIteration: 182 raise ValueError("Unexpected end of stream: Expected to find line starting with 'Sequence name'") 183 if not line.startswith('Sequence name'): 184 raise ValueError("Line does not start with 'Sequence name':\n%s" % line) 185 try: 186 line = handle.next() 187 except StopIteration: 188 raise ValueError("Unexpected end of stream: Expected to find line starting with '----'") 189 if not line.startswith('----'): 190 raise ValueError("Line does not start with '----':\n%s" % line) 191 for line in handle: 192 if line.startswith('***'): 193 break 194 line = line.strip() 195 ls = line.split() 196 record.sequences.append(ls[0]) 197 if len(ls) == 6: 198 record.sequences.append(ls[3]) 199 else: 200 raise ValueError("Unexpected end of stream: Expected to find line starting with '***'")
201 202
203 -def __read_command(record, handle):
204 for line in handle: 205 if line.startswith('command:'): 206 break 207 else: 208 raise ValueError("Unexpected end of stream: Expected to find line starting with 'command'") 209 line = line.strip() 210 line = line.replace('command: ','') 211 record.command = line
212 213
214 -def __read_motif_statistics(line):
215 line = line[5:].strip() 216 ls = line.split() 217 length = int(ls[3]) 218 num_occurrences = int(ls[6]) 219 evalue = float(ls[12]) 220 return length, num_occurrences, evalue
221 222
223 -def __read_motif_name(handle):
224 for line in handle: 225 if 'sorted by position p-value' in line: 226 break 227 else: 228 raise ValueError('Unexpected end of stream: Failed to find motif name') 229 line = line.strip() 230 words = line.split() 231 name = " ".join(words[0:2]) 232 return name
233 234
235 -def __read_motif_sequences(handle, motif_name, alphabet, length, revcomp):
236 try: 237 line = handle.next() 238 except StopIteration: 239 raise ValueError('Unexpected end of stream: Failed to find motif sequences') 240 if not line.startswith('---'): 241 raise ValueError("Line does not start with '---':\n%s" % line) 242 try: 243 line = handle.next() 244 except StopIteration: 245 raise ValueError("Unexpected end of stream: Expected to find line starting with 'Sequence name'") 246 if not line.startswith('Sequence name'): 247 raise ValueError("Line does not start with 'Sequence name':\n%s" % line) 248 try: 249 line = handle.next() 250 except StopIteration: 251 raise ValueError('Unexpected end of stream: Failed to find motif sequences') 252 if not line.startswith('---'): 253 raise ValueError("Line does not start with '---':\n%s" % line) 254 instances = [] 255 for line in handle: 256 if line.startswith('---'): 257 break 258 line = line.strip() 259 words = line.split() 260 if revcomp: 261 strand = words.pop(1) 262 else: 263 strand = '+' 264 sequence = words[4] 265 assert len(sequence)==length 266 instance = Instance(sequence, alphabet) 267 instance.motif_name = motif_name 268 instance.sequence_name = words[0] 269 instance.start = int(words[1]) 270 instance.pvalue = float(words[2]) 271 instance.strand = strand 272 instance.length = length 273 instances.append(instance) 274 else: 275 raise ValueError('Unexpected end of stream') 276 return motifs.Instances(instances, alphabet)
277 278
279 -def __skip_unused_lines(handle):
280 for line in handle: 281 if line.startswith('log-odds matrix'): 282 break 283 else: 284 raise ValueError("Unexpected end of stream: Expected to find line starting with 'log-odds matrix'") 285 for line in handle: 286 if line.startswith('---'): 287 break 288 else: 289 raise ValueError("Unexpected end of stream: Expected to find line starting with '---'") 290 for line in handle: 291 if line.startswith('letter-probability matrix'): 292 break 293 else: 294 raise ValueError("Unexpected end of stream: Expected to find line starting with 'letter-probability matrix'") 295 for line in handle: 296 if line.startswith('---'): 297 break 298 else: 299 raise ValueError("Unexpected end of stream: Expected to find line starting with '---'") 300 for line in handle: 301 if line.startswith('Time'): 302 break 303 else: 304 raise ValueError("Unexpected end of stream: Expected to find line starting with 'Time'") 305 try: 306 line = handle.next() 307 except StopIteration: 308 raise ValueError('Unexpected end of stream: Expected to find blank line') 309 if line.strip(): 310 raise ValueError("Expected blank line, but got:\n%s" % line) 311 try: 312 line = handle.next() 313 except StopIteration: 314 raise ValueError("Unexpected end of stream: Expected to find line starting with '***'") 315 if not line.startswith('***'): 316 raise ValueError("Line does not start with '***':\n%s" % line) 317 for line in handle: 318 if line.strip(): 319 break 320 else: 321 raise ValueError("Unexpected end of stream: Expected to find line starting with '***'") 322 if not line.startswith('***'): 323 raise ValueError("Line does not start with '***':\n%s" % line)
324