Package Bio :: Package motifs :: Module mast
[hide private]
[frames] | no frames]

Source Code for Module Bio.motifs.mast

  1  # Copyright 2008 by Bartek Wilczynski. 
  2  # Adapted from Bio.MEME.Parser by Jason A. Hackney.  All rights reserved. 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6   
  7  from Bio.Alphabet import IUPAC 
  8  from Bio.motifs import meme 
  9   
 10   
11 -class Record(list):
12 """The class for holding the results from a MAST run. 13 14 A mast.Record holds data about matches between motifs and sequences. 15 The motifs held by the Record are objects of the class meme.Motif. 16 17 The mast.Record class inherits from list, so you can access individual 18 motifs in the record by their index. Alternatively, you can find a motif 19 by its name: 20 21 >>> f = open("mast.output.txt") 22 >>> from Bio import motifs 23 >>> record = motifs.parse(f, 'MAST') 24 >>> motif = record[0] 25 >>> print motif.name 26 1 27 >>> motif = record['1'] 28 >>> print motif.name 29 1 30 """ 31
32 - def __init__(self):
33 self.sequences = [] 34 self.version = "" 35 self.database = "" 36 self.diagrams = {} 37 self.alphabet = None
38
39 - def __getitem__(self, key):
40 if isinstance(key, str): 41 for motif in self: 42 if motif.name==key: 43 return motif 44 else: 45 return list.__getitem__(self, key)
46 47
48 -def read(handle):
49 """read(handle)""" 50 record = Record() 51 __read_version(record, handle) 52 __read_database_and_motifs(record, handle) 53 __read_section_i(record, handle) 54 __read_section_ii(record, handle) 55 __read_section_iii(record, handle) 56 return record
57 58 59 # Everything below is private 60 61
62 -def __read_version(record, handle):
63 for line in handle: 64 if "MAST version" in line: 65 break 66 else: 67 raise ValueError("Improper input file. Does not begin with a line with 'MAST version'") 68 record.version = line.strip().split()[2]
69 70
71 -def __read_database_and_motifs(record, handle):
72 for line in handle: 73 if line.startswith('DATABASE AND MOTIFS'): 74 break 75 line = handle.next() 76 if not line.startswith('****'): 77 raise ValueError("Line does not start with '****':\n%s" % line) 78 line = handle.next() 79 if not 'DATABASE' in line: 80 raise ValueError("Line does not contain 'DATABASE':\n%s" % line) 81 words = line.strip().split() 82 record.database = words[1] 83 if words[2] == '(nucleotide)': 84 record.alphabet = IUPAC.unambiguous_dna 85 elif words[2] == '(peptide)': 86 record.alphabet = IUPAC.protein 87 for line in handle: 88 if 'MOTIF WIDTH' in line: 89 break 90 line = handle.next() 91 if not '----' in line: 92 raise ValueError("Line does not contain '----':\n%s" % line) 93 for line in handle: 94 if not line.strip(): 95 break 96 words = line.strip().split() 97 motif = meme.Motif(record.alphabet) 98 motif.name = words[0] 99 motif.length = int(words[1]) 100 # words[2] contains the best possible match 101 record.append(motif)
102 103
104 -def __read_section_i(record, handle):
105 for line in handle: 106 if line.startswith('SECTION I:'): 107 break 108 for line in handle: 109 if line.startswith('SEQUENCE NAME'): 110 break 111 line = handle.next() 112 if not line.startswith('---'): 113 raise ValueError("Line does not start with '---':\n%s" % line) 114 for line in handle: 115 if not line.strip(): 116 break 117 else: 118 sequence, description_evalue_length = line.split(None, 1) 119 record.sequences.append(sequence) 120 line = handle.next() 121 if not line.startswith('****'): 122 raise ValueError("Line does not start with '****':\n%s" % line)
123 124
125 -def __read_section_ii(record, handle):
126 for line in handle: 127 if line.startswith('SECTION II:'): 128 break 129 for line in handle: 130 if line.startswith('SEQUENCE NAME'): 131 break 132 line = handle.next() 133 if not line.startswith('---'): 134 raise ValueError("Line does not start with '---':\n%s" % line) 135 for line in handle: 136 if not line.strip(): 137 break 138 elif line.startswith(" "): 139 diagram = line.strip() 140 record.diagrams[sequence] += diagram 141 else: 142 sequence, pvalue, diagram = line.split() 143 record.diagrams[sequence] = diagram 144 line = handle.next() 145 if not line.startswith('****'): 146 raise ValueError("Line does not start with '****':\n%s" % line)
147 148
149 -def __read_section_iii(record, handle):
150 for line in handle: 151 if line.startswith('SECTION III:'): 152 break 153 for line in handle: 154 if line.startswith('****'): 155 break 156 for line in handle: 157 if line.startswith('*****'): 158 break 159 for line in handle: 160 if line.strip(): 161 break
162