Package Bio :: Package Motif :: Package Parsers :: Module MAST
[hide private]
[frames] | no frames]

Source Code for Module Bio.Motif.Parsers.MAST

  1  # Copyright 2008 by Bartek Wilczynski. 
  2  # Adapted from Bio.MEME.Parser by Jason A. Hackney.  All rights reserved. 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6   
  7  from Bio.Alphabet import IUPAC 
  8  from Bio.Motif.Parsers.MEME import MEMEMotif 
  9   
 10   
11 -class Record(object):
12 """The class for holding the results from a MAST run. 13 14 A MAST.Record holds data about matches between motifs and sequences. 15 The motifs held by the Record are objects of the class MEMEMotif. 16 17 Methods: 18 get_motif_by_name (motif_name): returns a MEMEMotif with the given 19 name. 20 """ 21
22 - def __init__ (self):
23 self.sequences = [] 24 self.version = "" 25 self.database = "" 26 self.diagrams = {} 27 self.alphabet = None 28 self.motifs = []
29
30 - def get_motif_by_name (self, name):
31 for m in self.motifs: 32 if m.name == name: 33 return m
34
35 -def read(handle):
36 """read(handle)""" 37 record = Record() 38 __read_version(record, handle) 39 __read_database_and_motifs(record, handle) 40 __read_section_i(record, handle) 41 __read_section_ii(record, handle) 42 __read_section_iii(record, handle) 43 return record
44 45 46 # Everything below is private 47 48
49 -def __read_version(record, handle):
50 for line in handle: 51 if "MAST version" in line: 52 break 53 else: 54 raise ValueError("Improper input file. Does not begin with a line with 'MAST version'") 55 record.version = line.strip().split()[2]
56 57
58 -def __read_database_and_motifs(record, handle):
59 for line in handle: 60 if line.startswith('DATABASE AND MOTIFS'): 61 break 62 line = next(handle) 63 if not line.startswith('****'): 64 raise ValueError("Line does not start with '****':\n%s" % line) 65 line = next(handle) 66 if not 'DATABASE' in line: 67 raise ValueError("Line does not contain 'DATABASE':\n%s" % line) 68 words = line.strip().split() 69 record.database = words[1] 70 if words[2] == '(nucleotide)': 71 record.alphabet = IUPAC.unambiguous_dna 72 elif words[2] == '(peptide)': 73 record.alphabet = IUPAC.protein 74 for line in handle: 75 if 'MOTIF WIDTH' in line: 76 break 77 line = next(handle) 78 if not '----' in line: 79 raise ValueError("Line does not contain '----':\n%s" % line) 80 for line in handle: 81 if not line.strip(): 82 break 83 words = line.strip().split() 84 motif = MEMEMotif() 85 motif.alphabet = record.alphabet 86 motif.name = words[0] 87 motif.length = int(words[1]) 88 # motif.add_instance(words[2]) 89 record.motifs.append(motif)
90 91
92 -def __read_section_i(record, handle):
93 for line in handle: 94 if line.startswith('SECTION I:'): 95 break 96 for line in handle: 97 if line.startswith('SEQUENCE NAME'): 98 break 99 line = next(handle) 100 if not line.startswith('---'): 101 raise ValueError("Line does not start with '---':\n%s" % line) 102 for line in handle: 103 if not line.strip(): 104 break 105 else: 106 sequence, description_evalue_length = line.split(None, 1) 107 record.sequences.append(sequence) 108 line = next(handle) 109 if not line.startswith('****'): 110 raise ValueError("Line does not start with '****':\n%s" % line)
111 112
113 -def __read_section_ii(record, handle):
114 for line in handle: 115 if line.startswith('SECTION II:'): 116 break 117 for line in handle: 118 if line.startswith('SEQUENCE NAME'): 119 break 120 line = next(handle) 121 if not line.startswith('---'): 122 raise ValueError("Line does not start with '---':\n%s" % line) 123 for line in handle: 124 if not line.strip(): 125 break 126 elif line.startswith(" "): 127 diagram = line.strip() 128 record.diagrams[sequence] += diagram 129 else: 130 sequence, pvalue, diagram = line.split() 131 record.diagrams[sequence] = diagram 132 line = next(handle) 133 if not line.startswith('****'): 134 raise ValueError("Line does not start with '****':\n%s" % line)
135 136
137 -def __read_section_iii(record, handle):
138 for line in handle: 139 if line.startswith('SECTION III:'): 140 break 141 for line in handle: 142 if line.startswith('****'): 143 break 144 for line in handle: 145 if line.startswith('*****'): 146 break 147 for line in handle: 148 if line.strip(): 149 break
150