| Trees | Indices | Help |
|
|---|
|
|
1 # Copyright 2001 by Tarjei Mikkelsen. All rights reserved.
2 # Copyright 2007 by Michiel de Hoon. All rights reserved.
3 # This code is part of the Biopython distribution and governed by its
4 # license. Please see the LICENSE file that should have been included
5 # as part of this package.
6
7 """
8 This module provides code to work with the KEGG Enzyme database.
9
10 Functions:
11 parse - Returns an iterator giving Record objects.
12
13 Classes:
14 Record -- Holds the information from a KEGG Enzyme record.
15 """
16
17 from Bio.KEGG import _write_kegg
18 from Bio.KEGG import _wrap_kegg
19
20
21 # Set up line wrapping rules (see Bio.KEGG._wrap_kegg)
22 rxn_wrap = [0, "",
23 (" + ","",1,1),
24 (" = ","",1,1),
25 (" ","$",1,1),
26 ("-","$",1,1)]
27 name_wrap = [0, "",
28 (" ","$",1,1),
29 ("-","$",1,1)]
30 id_wrap = lambda indent : [indent, "",
31 (" ","",1,0)]
32 struct_wrap = lambda indent : [indent, "",
33 (" ","",1,1)]
34
35
37 """Holds info from a KEGG Enzyme record.
38
39 Members:
40 entry The EC number (withou the 'EC ').
41 name A list of the enzyme names.
42 classname A list of the classification terms.
43 sysname The systematic name of the enzyme.
44 reaction A list of the reaction description strings.
45 substrate A list of the substrates.
46 product A list of the products.
47 inhibitor A list of the inhibitors.
48 cofactor A list of the cofactors.
49 effector A list of the effectors.
50 comment A list of the comment strings.
51 pathway A list of 3-tuples: (database, id, pathway)
52 genes A list of 2-tuples: (organism, list of gene ids)
53 disease A list of 3-tuples: (database, id, disease)
54 structures A list of 2-tuples: (database, list of struct ids)
55 dblinks A list of 2-tuples: (database, list of db ids)
56 """
58 """__init___(self)
59
60 Create a new Record.
61 """
62 self.entry = ""
63 self.name = []
64 self.classname = []
65 self.sysname = []
66 self.reaction = []
67 self.substrate = []
68 self.product = []
69 self.inhibitor = []
70 self.cofactor = []
71 self.effector = []
72 self.comment = []
73 self.pathway = []
74 self.genes = []
75 self.disease = []
76 self.structures = []
77 self.dblinks = []
78
80 """__str__(self)
81
82 Returns a string representation of this Record.
83 """
84 return self._entry() + \
85 self._name() + \
86 self._classname() + \
87 self._sysname() + \
88 self._reaction() + \
89 self._substrate() + \
90 self._product() + \
91 self._inhibitor() + \
92 self._cofactor() + \
93 self._effector() + \
94 self._comment() + \
95 self._pathway() + \
96 self._genes() + \
97 self._disease() + \
98 self._structures() + \
99 self._dblinks() + \
100 "///"
101
105
110
114
116 return _write_kegg("SYSNAME",
117 [_wrap_kegg(l, wrap_rule = name_wrap)
118 for l in self.sysname])
119
121 return _write_kegg("REACTION",
122 [_wrap_kegg(l, wrap_rule = rxn_wrap)
123 for l in self.reaction])
124
126 return _write_kegg("SUBSTRATE",
127 [_wrap_kegg(l, wrap_rule = name_wrap)
128 for l in self.substrate])
129
131 return _write_kegg("PRODUCT",
132 [_wrap_kegg(l, wrap_rule = name_wrap)
133 for l in self.product])
134
136 return _write_kegg("INHIBITOR",
137 [_wrap_kegg(l, wrap_rule = name_wrap)
138 for l in self.inhibitor])
139
141 return _write_kegg("COFACTOR",
142 [_wrap_kegg(l, wrap_rule = name_wrap)
143 for l in self.cofactor])
144
146 return _write_kegg("EFFECTOR",
147 [_wrap_kegg(l, wrap_rule = name_wrap)
148 for l in self.effector])
149
151 return _write_kegg("COMMENT",
152 [_wrap_kegg(l, wrap_rule = id_wrap(0))
153 for l in self.comment])
154
156 s = []
157 for entry in self.pathway:
158 s.append(entry[0] + ": " + entry[1] + " " + entry[2])
159 return _write_kegg("PATHWAY",
160 [_wrap_kegg(l, wrap_rule = id_wrap(16))
161 for l in s])
162
164 s = []
165 for entry in self.genes:
166 s.append(entry[0] + ": " + " ".join(entry[1]))
167 return _write_kegg("GENES",
168 [_wrap_kegg(l, wrap_rule = id_wrap(5))
169 for l in s])
170
172 s = []
173 for entry in self.disease:
174 s.append(entry[0] + ": " + entry[1] + " " + entry[2])
175 return _write_kegg("DISEASE",
176 [_wrap_kegg(l, wrap_rule = id_wrap(13))
177 for l in s])
178
180 s = []
181 for entry in self.structures:
182 s.append(entry[0] + ": " + " ".join(entry[1]) + " ")
183 return _write_kegg("STRUCTURES",
184 [_wrap_kegg(l, wrap_rule = struct_wrap(5))
185 for l in s])
186
188 # This is a bit of a cheat that won't work if enzyme entries
189 # have more than one link id per db id. For now, that's not
190 # the case - storing links ids in a list is only to make
191 # this class similar to the Compound.Record class.
192 s = []
193 for entry in self.dblinks:
194 s.append(entry[0] + ": " + " ".join(entry[1]))
195 return _write_kegg("DBLINKS", s)
196
197
199 """Parse a KEGG Enzyme file, returning Record objects.
200
201 This is an iterator function, typically used in a for loop. For
202 example, using one of the example KEGG files in the Biopython
203 test suite,
204
205 >>> handle = open("KEGG/enzyme.sample")
206 >>> for record in parse(handle):
207 ... print record.entry, record.name[0]
208 ...
209 1.1.1.1 Alcohol dehydrogenase
210 1.1.1.62 Estradiol 17beta-dehydrogenase
211 1.1.1.68 Transferred to EC 1.7.99.5
212 1.6.5.3 NADH dehydrogenase (ubiquinone)
213 1.14.13.28 3,9-Dihydroxypterocarpan 6a-monooxygenase
214 2.4.1.68 Glycoprotein 6-alpha-L-fucosyltransferase
215 3.1.1.6 Acetylesterase
216 2.7.2.1 Acetate kinase
217 """
218 record = Record()
219 for line in handle:
220 if line[:3]=="///":
221 yield record
222 record = Record()
223 continue
224 if line[:12]!=" ":
225 keyword = line[:12]
226 data = line[12:].strip()
227 if keyword=="ENTRY ":
228 words = data.split()
229 record.entry = words[1]
230 elif keyword=="CLASS ":
231 record.classname.append(data)
232 elif keyword=="COFACTOR ":
233 record.cofactor.append(data)
234 elif keyword=="COMMENT ":
235 record.comment.append(data)
236 elif keyword=="DBLINKS ":
237 if ":" in data:
238 key, values = data.split(":")
239 values = values.split()
240 row = (key, values)
241 record.dblinks.append(row)
242 else:
243 row = record.dblinks[-1]
244 key, values = row
245 values.extend(data.split())
246 row = key, values
247 record.dblinks[-1] = row
248 elif keyword=="DISEASE ":
249 if ":" in data:
250 database, data = data.split(":")
251 number, name = data.split(None, 1)
252 row = (database, number, name)
253 record.disease.append(row)
254 else:
255 row = record.disease[-1]
256 database, number, name = row
257 name = name + " " + data
258 row = database, number, name
259 record.disease[-1] = row
260 elif keyword=="EFFECTOR ":
261 record.effector.append(data.strip(";"))
262 elif keyword=="GENES ":
263 if data[3:5]==': ':
264 key, values = data.split(":",1)
265 values = [value.split("(")[0] for value in values.split()]
266 row = (key, values)
267 record.genes.append(row)
268 else:
269 row = record.genes[-1]
270 key, values = row
271 for value in data.split():
272 value = value.split("(")[0]
273 values.append(value)
274 row = key, values
275 record.genes[-1] = row
276 elif keyword=="INHIBITOR ":
277 record.inhibitor.append(data.strip(";"))
278 elif keyword=="NAME ":
279 record.name.append(data.strip(";"))
280 elif keyword=="PATHWAY ":
281 if data[:5]=='PATH:':
282 _, map_num, name = data.split(None,2)
283 pathway = ('PATH', map_num, name)
284 record.pathway.append(pathway)
285 else:
286 ec_num, name = data.split(None,1)
287 pathway = 'PATH', ec_num, name
288 record.pathway.append(pathway)
289 elif keyword=="PRODUCT ":
290 record.product.append(data.strip(";"))
291 elif keyword=="REACTION ":
292 record.reaction.append(data.strip(";"))
293 elif keyword=="STRUCTURES ":
294 if data[:4]=='PDB:':
295 database = data[:3]
296 accessions = data[4:].split()
297 row = (database, accessions)
298 record.structures.append(row)
299 else:
300 row = record.structures[-1]
301 database, accessions = row
302 accessions.extend(data.split())
303 row = (database, accessions)
304 record.structures[-1] = row
305 elif keyword=="SUBSTRATE ":
306 record.substrate.append(data.strip(";"))
307 elif keyword=="SYSNAME ":
308 record.sysname.append(data.strip(";"))
309
310
311 if __name__ == "__main__":
312 from Bio._utils import run_doctest
313 run_doctest()
314
| Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Tue Feb 5 18:03:18 2013 | http://epydoc.sourceforge.net |