Package Bio :: Package Motif :: Package Parsers :: Module MAST
[hide private]
[frames] | no frames]

Source Code for Module Bio.Motif.Parsers.MAST

  1  # Copyright 2008 by Bartek Wilczynski. 
  2  # Adapted from Bio.MEME.Parser by Jason A. Hackney.  All rights reserved. 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6   
  7  from Bio.Alphabet import IUPAC 
  8  from Bio.Motif.Parsers.MEME import MEMEMotif 
  9   
 10   
11 -class Record(object):
12 """The class for holding the results from a MAST run. 13 14 A MAST.Record holds data about matches between motifs and sequences. 15 The motifs held by the Record are objects of the class MEMEMotif. 16 17 Methods: 18 get_motif_by_name (motif_name): returns a MEMEMotif with the given 19 name. 20 """ 21
22 - def __init__(self):
23 self.sequences = [] 24 self.version = "" 25 self.database = "" 26 self.diagrams = {} 27 self.alphabet = None 28 self.motifs = []
29
30 - def get_motif_by_name(self, name):
31 for m in self.motifs: 32 if m.name == name: 33 return m
34 35
36 -def read(handle):
37 """read(handle)""" 38 record = Record() 39 __read_version(record, handle) 40 __read_database_and_motifs(record, handle) 41 __read_section_i(record, handle) 42 __read_section_ii(record, handle) 43 __read_section_iii(record, handle) 44 return record
45 46 47 # Everything below is private 48 49
50 -def __read_version(record, handle):
51 for line in handle: 52 if "MAST version" in line: 53 break 54 else: 55 raise ValueError("Improper input file. Does not begin with a line with 'MAST version'") 56 record.version = line.strip().split()[2]
57 58
59 -def __read_database_and_motifs(record, handle):
60 for line in handle: 61 if line.startswith('DATABASE AND MOTIFS'): 62 break 63 line = next(handle) 64 if not line.startswith('****'): 65 raise ValueError("Line does not start with '****':\n%s" % line) 66 line = next(handle) 67 if 'DATABASE' not in line: 68 raise ValueError("Line does not contain 'DATABASE':\n%s" % line) 69 words = line.strip().split() 70 record.database = words[1] 71 if words[2] == '(nucleotide)': 72 record.alphabet = IUPAC.unambiguous_dna 73 elif words[2] == '(peptide)': 74 record.alphabet = IUPAC.protein 75 for line in handle: 76 if 'MOTIF WIDTH' in line: 77 break 78 line = next(handle) 79 if '----' not in line: 80 raise ValueError("Line does not contain '----':\n%s" % line) 81 for line in handle: 82 if not line.strip(): 83 break 84 words = line.strip().split() 85 motif = MEMEMotif() 86 motif.alphabet = record.alphabet 87 motif.name = words[0] 88 motif.length = int(words[1]) 89 # motif.add_instance(words[2]) 90 record.motifs.append(motif)
91 92
93 -def __read_section_i(record, handle):
94 for line in handle: 95 if line.startswith('SECTION I:'): 96 break 97 for line in handle: 98 if line.startswith('SEQUENCE NAME'): 99 break 100 line = next(handle) 101 if not line.startswith('---'): 102 raise ValueError("Line does not start with '---':\n%s" % line) 103 for line in handle: 104 if not line.strip(): 105 break 106 else: 107 sequence, description_evalue_length = line.split(None, 1) 108 record.sequences.append(sequence) 109 line = next(handle) 110 if not line.startswith('****'): 111 raise ValueError("Line does not start with '****':\n%s" % line)
112 113
114 -def __read_section_ii(record, handle):
115 for line in handle: 116 if line.startswith('SECTION II:'): 117 break 118 for line in handle: 119 if line.startswith('SEQUENCE NAME'): 120 break 121 line = next(handle) 122 if not line.startswith('---'): 123 raise ValueError("Line does not start with '---':\n%s" % line) 124 for line in handle: 125 if not line.strip(): 126 break 127 elif line.startswith(" "): 128 diagram = line.strip() 129 record.diagrams[sequence] += diagram 130 else: 131 sequence, pvalue, diagram = line.split() 132 record.diagrams[sequence] = diagram 133 line = next(handle) 134 if not line.startswith('****'): 135 raise ValueError("Line does not start with '****':\n%s" % line)
136 137
138 -def __read_section_iii(record, handle):
139 for line in handle: 140 if line.startswith('SECTION III:'): 141 break 142 for line in handle: 143 if line.startswith('****'): 144 break 145 for line in handle: 146 if line.startswith('*****'): 147 break 148 for line in handle: 149 if line.strip(): 150 break
151