1
2
3
4
5
6
7 from Bio.Alphabet import IUPAC
8 from Bio.motifs import meme
9
10
12 """The class for holding the results from a MAST run.
13
14 A mast.Record holds data about matches between motifs and sequences.
15 The motifs held by the Record are objects of the class meme.Motif.
16
17 The mast.Record class inherits from list, so you can access individual
18 motifs in the record by their index. Alternatively, you can find a motif
19 by its name:
20
21 >>> f = open("mast.output.txt")
22 >>> from Bio import motifs
23 >>> record = motifs.parse(f, 'MAST')
24 >>> motif = record[0]
25 >>> print motif.name
26 1
27 >>> motif = record['1']
28 >>> print motif.name
29 1
30 """
31
38
40 if isinstance(key, str):
41 for motif in self:
42 if motif.name==key:
43 return motif
44 else:
45 return list.__getitem__(self, key)
46
47
57
58
59
60
61
63 for line in handle:
64 if "MAST version" in line:
65 break
66 else:
67 raise ValueError("Improper input file. Does not begin with a line with 'MAST version'")
68 record.version = line.strip().split()[2]
69
70
72 for line in handle:
73 if line.startswith('DATABASE AND MOTIFS'):
74 break
75 line = handle.next()
76 if not line.startswith('****'):
77 raise ValueError("Line does not start with '****':\n%s" % line)
78 line = handle.next()
79 if not 'DATABASE' in line:
80 raise ValueError("Line does not contain 'DATABASE':\n%s" % line)
81 words = line.strip().split()
82 record.database = words[1]
83 if words[2] == '(nucleotide)':
84 record.alphabet = IUPAC.unambiguous_dna
85 elif words[2] == '(peptide)':
86 record.alphabet = IUPAC.protein
87 for line in handle:
88 if 'MOTIF WIDTH' in line:
89 break
90 line = handle.next()
91 if not '----' in line:
92 raise ValueError("Line does not contain '----':\n%s" % line)
93 for line in handle:
94 if not line.strip():
95 break
96 words = line.strip().split()
97 motif = meme.Motif(record.alphabet)
98 motif.name = words[0]
99 motif.length = int(words[1])
100
101 record.append(motif)
102
103
105 for line in handle:
106 if line.startswith('SECTION I:'):
107 break
108 for line in handle:
109 if line.startswith('SEQUENCE NAME'):
110 break
111 line = handle.next()
112 if not line.startswith('---'):
113 raise ValueError("Line does not start with '---':\n%s" % line)
114 for line in handle:
115 if not line.strip():
116 break
117 else:
118 sequence, description_evalue_length = line.split(None, 1)
119 record.sequences.append(sequence)
120 line = handle.next()
121 if not line.startswith('****'):
122 raise ValueError("Line does not start with '****':\n%s" % line)
123
124
126 for line in handle:
127 if line.startswith('SECTION II:'):
128 break
129 for line in handle:
130 if line.startswith('SEQUENCE NAME'):
131 break
132 line = handle.next()
133 if not line.startswith('---'):
134 raise ValueError("Line does not start with '---':\n%s" % line)
135 for line in handle:
136 if not line.strip():
137 break
138 elif line.startswith(" "):
139 diagram = line.strip()
140 record.diagrams[sequence] += diagram
141 else:
142 sequence, pvalue, diagram = line.split()
143 record.diagrams[sequence] = diagram
144 line = handle.next()
145 if not line.startswith('****'):
146 raise ValueError("Line does not start with '****':\n%s" % line)
147
148
162