1
2
3
4
5
6
7 """
8 This module provides code to work with the enzyme.dat file from
9 Enzyme.
10 http://www.expasy.ch/enzyme/
11
12 Tested with the release of 03-Mar-2009.
13
14 Functions:
15 read Reads a file containing one ENZYME entry
16 parse Reads a file containing multiple ENZYME entries
17
18 Classes:
19 Record Holds ENZYME data.
20
21 """
22
23
25 """Parse ENZYME records.
26
27 This function is for parsing ENZYME files containing multiple
28 records.
29
30 handle - handle to the file."""
31
32 while True:
33 record = __read(handle)
34 if not record:
35 break
36 yield record
37
38
40 """Read one ENZYME record.
41
42 This function is for parsing ENZYME files containing
43 exactly one record.
44
45 handle - handle to the file."""
46
47 record = __read(handle)
48
49 remainder = handle.read()
50 if remainder:
51 raise ValueError("More than one ENZYME record found")
52 return record
53
54
56 """\
57 Holds information from an ExPASy ENZYME record as a Python dictionary.
58
59 Each record contains the following keys:
60 ID: EC number
61 DE: Recommended name
62 AN: Alternative names (if any)
63 CA: Catalytic activity
64 CF: Cofactors (if any)
65 PR: Pointers to the Prosite documentation entrie(s) that
66 correspond to the enzyme (if any)
67 DR: Pointers to the Swiss-Prot protein sequence entrie(s)
68 that correspond to the enzyme (if any)
69 CC: Comments
70 """
71
73 dict.__init__(self)
74 self["ID"] = ''
75 self["DE"] = ''
76 self["AN"] = []
77 self["CA"] = ''
78 self["CF"] = ''
79 self["CC"] = []
80 self["PR"] = []
81 self["DR"] = []
82
84 if self["ID"]:
85 if self["DE"]:
86 return "%s (%s, %s)" % (self.__class__.__name__,
87 self["ID"], self["DE"])
88 else:
89 return "%s (%s)" % (self.__class__.__name__,
90 self["ID"])
91 else:
92 return "%s ( )" % (self.__class__.__name__)
93
95 output = "ID: " + self["ID"]
96 output += " DE: " + self["DE"]
97 output += " AN: " + repr(self["AN"])
98 output += " CA: '" + self["CA"] + "'"
99 output += " CF: " + self["CF"]
100 output += " CC: " + repr(self["CC"])
101 output += " PR: " + repr(self["PR"])
102 output += " DR: %d Records" % len(self["DR"])
103 return output
104
105
106
107
109 record = None
110 for line in handle:
111 key, value = line[:2], line[5:].rstrip()
112 if key=="ID":
113 record = Record()
114 record["ID"] = value
115 elif key=="DE":
116 record["DE"]+=value
117 elif key=="AN":
118 if record["AN"] and not record["AN"][-1].endswith("."):
119 record["AN"][-1] += " " + value
120 else:
121 record["AN"].append(value)
122 elif key=="CA":
123 record["CA"] += value
124 elif key=="DR":
125 pair_data = value.rstrip(";").split(';')
126 for pair in pair_data:
127 t1, t2 = pair.split(',')
128 row = [t1.strip(), t2.strip()]
129 record["DR"].append(row)
130 elif key=="CF":
131 if record["CF"]:
132 record["CF"] += " " + value
133 else:
134 record["CF"] = value
135 elif key=="PR":
136 assert value.startswith("PROSITE; ")
137 value = value[9:].rstrip(";")
138 record["PR"].append(value)
139 elif key=='CC':
140 if value.startswith("-!- "):
141 record["CC"].append(value[4:])
142 elif value.startswith(" ") and record["CC"]:
143 record["CC"][-1] += value[3:]
144
145 elif key=="//":
146 if record:
147 return record
148 else:
149 continue
150 if record:
151 raise ValueError("Unexpected end of stream")
152