1
2
3
4
5
6 """Parser for PDB files."""
7
8
9 from __future__ import with_statement
10
11 import warnings
12
13 import numpy
14
15 from Bio.File import as_handle
16
17 from Bio.PDB.PDBExceptions import PDBConstructionException
18 from Bio.PDB.PDBExceptions import PDBConstructionWarning
19
20 from Bio.PDB.StructureBuilder import StructureBuilder
21 from Bio.PDB.parse_pdb_header import _parse_pdb_header_list
22
23
24
25
26
28 """
29 Parse a PDB file and return a Structure object.
30 """
31
32 - def __init__(self, PERMISSIVE=True, get_header=False,
33 structure_builder=None, QUIET=False):
34 """
35 The PDB parser call a number of standard methods in an aggregated
36 StructureBuilder object. Normally this object is instanciated by the
37 PDBParser object itself, but if the user provides his/her own
38 StructureBuilder object, the latter is used instead.
39
40 Arguments:
41
42 o PERMISSIVE - Evaluated as a Boolean. If false, exceptions in
43 constructing the SMCRA data structure are fatal. If true (DEFAULT),
44 the exceptions are caught, but some residues or atoms will be missing.
45 THESE EXCEPTIONS ARE DUE TO PROBLEMS IN THE PDB FILE!.
46
47 o structure_builder - an optional user implemented StructureBuilder class.
48
49 o QUIET - Evaluated as a Boolean. If true, warnings issued in constructing
50 the SMCRA data will be suppressed. If false (DEFAULT), they will be shown.
51 These warnings might be indicative of problems in the PDB file!
52 """
53 if structure_builder is not None:
54 self.structure_builder = structure_builder
55 else:
56 self.structure_builder = StructureBuilder()
57 self.header = None
58 self.trailer = None
59 self.line_counter = 0
60 self.PERMISSIVE = bool(PERMISSIVE)
61 self.QUIET = bool(QUIET)
62
63
64
66 """Return the structure.
67
68 Arguments:
69 o id - string, the id that will be used for the structure
70 o file - name of the PDB file OR an open filehandle
71 """
72
73 if self.QUIET:
74 warning_list = warnings.filters[:]
75 warnings.filterwarnings("ignore", category=PDBConstructionWarning)
76
77 self.header = None
78 self.trailer = None
79
80 self.structure_builder.init_structure(id)
81
82 with as_handle(file) as handle:
83 self._parse(handle.readlines())
84
85 self.structure_builder.set_header(self.header)
86
87 structure = self.structure_builder.get_structure()
88
89 if self.QUIET:
90 warnings.filters = warning_list
91
92 return structure
93
95 "Return the header."
96 return self.header
97
99 "Return the trailer."
100 return self.trailer
101
102
103
104 - def _parse(self, header_coords_trailer):
110
112 "Get the header of the PDB file, return the rest."
113 structure_builder = self.structure_builder
114 i = 0
115 for i in range(0, len(header_coords_trailer)):
116 structure_builder.set_line_counter(i + 1)
117 line = header_coords_trailer[i]
118 record_type = line[0:6]
119 if record_type == "ATOM " or record_type == "HETATM" or record_type == "MODEL ":
120 break
121 header = header_coords_trailer[0:i]
122
123 self.line_counter = i
124 coords_trailer = header_coords_trailer[i:]
125 header_dict = _parse_pdb_header_list(header)
126 return header_dict, coords_trailer
127
129 "Parse the atomic data in the PDB file."
130 local_line_counter = 0
131 structure_builder = self.structure_builder
132 current_model_id = 0
133
134 model_open = 0
135 current_chain_id = None
136 current_segid = None
137 current_residue_id = None
138 current_resname = None
139 for i in range(0, len(coords_trailer)):
140 line = coords_trailer[i]
141 record_type = line[0:6]
142 global_line_counter = self.line_counter + local_line_counter + 1
143 structure_builder.set_line_counter(global_line_counter)
144 if record_type == "ATOM " or record_type == "HETATM":
145
146 if not model_open:
147 structure_builder.init_model(current_model_id)
148 current_model_id += 1
149 model_open = 1
150 fullname = line[12:16]
151
152 split_list = fullname.split()
153 if len(split_list) != 1:
154
155
156 name = fullname
157 else:
158
159 name = split_list[0]
160 altloc = line[16]
161 resname = line[17:20]
162 chainid = line[21]
163 try:
164 serial_number = int(line[6:11])
165 except:
166 serial_number = 0
167 resseq = int(line[22:26].split()[0])
168 icode = line[26]
169 if record_type == "HETATM":
170 if resname == "HOH" or resname == "WAT":
171 hetero_flag = "W"
172 else:
173 hetero_flag = "H"
174 else:
175 hetero_flag = " "
176 residue_id = (hetero_flag, resseq, icode)
177
178 try:
179 x = float(line[30:38])
180 y = float(line[38:46])
181 z = float(line[46:54])
182 except:
183
184
185 raise PDBConstructionException("Invalid or missing coordinate(s) at line %i."
186 % global_line_counter)
187 coord = numpy.array((x, y, z), "f")
188
189 try:
190 occupancy = float(line[54:60])
191 except:
192 self._handle_PDB_exception("Invalid or missing occupancy",
193 global_line_counter)
194 occupancy = 0.0
195 try:
196 bfactor = float(line[60:66])
197 except:
198 self._handle_PDB_exception("Invalid or missing B factor",
199 global_line_counter)
200 bfactor = 0.0
201 segid = line[72:76]
202 element = line[76:78].strip()
203 if current_segid != segid:
204 current_segid = segid
205 structure_builder.init_seg(current_segid)
206 if current_chain_id != chainid:
207 current_chain_id = chainid
208 structure_builder.init_chain(current_chain_id)
209 current_residue_id = residue_id
210 current_resname = resname
211 try:
212 structure_builder.init_residue(resname, hetero_flag, resseq, icode)
213 except PDBConstructionException, message:
214 self._handle_PDB_exception(message, global_line_counter)
215 elif current_residue_id != residue_id or current_resname != resname:
216 current_residue_id = residue_id
217 current_resname = resname
218 try:
219 structure_builder.init_residue(resname, hetero_flag, resseq, icode)
220 except PDBConstructionException, message:
221 self._handle_PDB_exception(message, global_line_counter)
222
223 try:
224 structure_builder.init_atom(name, coord, bfactor, occupancy, altloc,
225 fullname, serial_number, element)
226 except PDBConstructionException, message:
227 self._handle_PDB_exception(message, global_line_counter)
228 elif record_type == "ANISOU":
229 anisou = map(float, (line[28:35], line[35:42], line[43:49],
230 line[49:56], line[56:63], line[63:70]))
231
232 anisou_array = (numpy.array(anisou, "f") / 10000.0).astype("f")
233 structure_builder.set_anisou(anisou_array)
234 elif record_type == "MODEL ":
235 try:
236 serial_num = int(line[10:14])
237 except:
238 self._handle_PDB_exception("Invalid or missing model serial number",
239 global_line_counter)
240 serial_num = 0
241 structure_builder.init_model(current_model_id, serial_num)
242 current_model_id += 1
243 model_open = 1
244 current_chain_id = None
245 current_residue_id = None
246 elif record_type == "END " or record_type == "CONECT":
247
248 self.line_counter += local_line_counter
249 return coords_trailer[local_line_counter:]
250 elif record_type == "ENDMDL":
251 model_open = 0
252 current_chain_id = None
253 current_residue_id = None
254 elif record_type == "SIGUIJ":
255
256 siguij = map(float, (line[28:35], line[35:42], line[42:49],
257 line[49:56], line[56:63], line[63:70]))
258
259 siguij_array = (numpy.array(siguij, "f") / 10000.0).astype("f")
260 structure_builder.set_siguij(siguij_array)
261 elif record_type == "SIGATM":
262
263 sigatm = map(float, (line[30:38], line[38:45], line[46:54],
264 line[54:60], line[60:66]))
265 sigatm_array = numpy.array(sigatm, "f")
266 structure_builder.set_sigatm(sigatm_array)
267 local_line_counter += 1
268
269 self.line_counter = self.line_counter + local_line_counter
270 return []
271
273 """
274 This method catches an exception that occurs in the StructureBuilder
275 object (if PERMISSIVE), or raises it again, this time adding the
276 PDB line number to the error message.
277 """
278 message = "%s at line %i." % (message, line_counter)
279 if self.PERMISSIVE:
280
281 warnings.warn("PDBConstructionException: %s\n"
282 "Exception ignored.\n"
283 "Some atoms or residues may be missing in the data structure."
284 % message, PDBConstructionWarning)
285 else:
286
287 raise PDBConstructionException(message)
288
289
290 if __name__ == "__main__":
291
292 import sys
293
294 p = PDBParser(PERMISSIVE=True)
295
296 filename = sys.argv[1]
297 s = p.get_structure("scr", filename)
298
299 for m in s:
300 p = m.get_parent()
301 assert(p is s)
302 for c in m:
303 p = c.get_parent()
304 assert(p is m)
305 for r in c:
306 print r
307 p = r.get_parent()
308 assert(p is c)
309 for a in r:
310 p = a.get_parent()
311 if not p is r:
312 print p, r
313