Package Bio :: Package PDB :: Module MMCIFParser'
[hide private]
[frames] | no frames]

Source Code for Module Bio.PDB.MMCIFParser'

  1  # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """mmCIF parser""" 
  7   
  8  from __future__ import print_function 
  9   
 10  from string import ascii_letters 
 11   
 12  import numpy 
 13  import warnings 
 14   
 15  from Bio._py3k import range 
 16   
 17  from Bio.PDB.MMCIF2Dict import MMCIF2Dict 
 18  from Bio.PDB.StructureBuilder import StructureBuilder 
 19  from Bio.PDB.PDBExceptions import PDBConstructionException 
 20  from Bio.PDB.PDBExceptions import PDBConstructionWarning 
 21   
 22   
23 -class MMCIFParser(object):
24 """Parse a PDB file and return a Structure object.""" 25
26 - def __init__(self, structure_builder=None, QUIET=False):
27 """Create a PDBParser object. 28 The PDB parser call a number of standard methods in an aggregated 29 StructureBuilder object. Normally this object is instanciated by the 30 MMCIParser object itself, but if the user provides his/her own 31 StructureBuilder object, the latter is used instead. 32 Arguments: 33 - structure_builder - an optional user implemented StructureBuilder class. 34 - QUIET - Evaluated as a Boolean. If true, warnings issued in constructing 35 the SMCRA data will be suppressed. If false (DEFAULT), they will be shown. 36 These warnings might be indicative of problems in the PDB file! 37 """ 38 if structure_builder is not None: 39 self._structure_builder = structure_builder 40 else: 41 self._structure_builder = StructureBuilder() 42 # self.header = None 43 # self.trailer = None 44 self.line_counter = 0 45 self.build_structure = None 46 self.QUIET = bool(QUIET)
47 48 # Public methods 49
50 - def get_structure(self, structure_id, filename):
51 """Return the structure. 52 53 Arguments: 54 - structure_id - string, the id that will be used for the structure 55 - filename - name of the mmCIF file OR an open filehandle 56 """ 57 with warnings.catch_warnings(): 58 if self.QUIET: 59 warnings.filterwarnings("ignore", category=PDBConstructionWarning) 60 self._mmcif_dict = MMCIF2Dict(filename) 61 self._build_structure(structure_id) 62 return self._structure_builder.get_structure()
63 64 # Private methods 65
66 - def _build_structure(self, structure_id):
67 mmcif_dict = self._mmcif_dict 68 atom_id_list = mmcif_dict["_atom_site.label_atom_id"] 69 residue_id_list = mmcif_dict["_atom_site.label_comp_id"] 70 try: 71 element_list = mmcif_dict["_atom_site.type_symbol"] 72 except KeyError: 73 element_list = None 74 seq_id_list = mmcif_dict["_atom_site.label_seq_id"] 75 chain_id_list = mmcif_dict["_atom_site.label_asym_id"] 76 x_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_x"]] 77 y_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_y"]] 78 z_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_z"]] 79 alt_list = mmcif_dict["_atom_site.label_alt_id"] 80 icode_list = mmcif_dict["_atom_site.pdbx_PDB_ins_code"] 81 b_factor_list = mmcif_dict["_atom_site.B_iso_or_equiv"] 82 occupancy_list = mmcif_dict["_atom_site.occupancy"] 83 fieldname_list = mmcif_dict["_atom_site.group_PDB"] 84 try: 85 serial_list = [int(n) for n in mmcif_dict["_atom_site.pdbx_PDB_model_num"]] 86 except KeyError: 87 # No model number column 88 serial_list = None 89 except ValueError: 90 # Invalid model number (malformed file) 91 raise PDBConstructionException("Invalid model number") 92 try: 93 aniso_u11 = mmcif_dict["_atom_site.aniso_U[1][1]"] 94 aniso_u12 = mmcif_dict["_atom_site.aniso_U[1][2]"] 95 aniso_u13 = mmcif_dict["_atom_site.aniso_U[1][3]"] 96 aniso_u22 = mmcif_dict["_atom_site.aniso_U[2][2]"] 97 aniso_u23 = mmcif_dict["_atom_site.aniso_U[2][3]"] 98 aniso_u33 = mmcif_dict["_atom_site.aniso_U[3][3]"] 99 aniso_flag = 1 100 except KeyError: 101 # no anisotropic B factors 102 aniso_flag = 0 103 # if auth_seq_id is present, we use this. 104 # Otherwise label_seq_id is used. 105 if "_atom_site.auth_seq_id" in mmcif_dict: 106 seq_id_list = mmcif_dict["_atom_site.auth_seq_id"] 107 else: 108 seq_id_list = mmcif_dict["_atom_site.label_seq_id"] 109 # Now loop over atoms and build the structure 110 current_chain_id = None 111 current_residue_id = None 112 structure_builder = self._structure_builder 113 structure_builder.init_structure(structure_id) 114 structure_builder.init_seg(" ") 115 # Historically, Biopython PDB parser uses model_id to mean array index 116 # so serial_id means the Model ID specified in the file 117 current_model_id = -1 118 current_serial_id = 0 119 for i in range(0, len(atom_id_list)): 120 121 # set the line_counter for 'ATOM' lines only and not 122 # as a global line counter found in the PDBParser() 123 # this number should match the '_atom_site.id' index in the MMCIF 124 structure_builder.set_line_counter(i) 125 126 x = x_list[i] 127 y = y_list[i] 128 z = z_list[i] 129 resname = residue_id_list[i] 130 chainid = chain_id_list[i] 131 altloc = alt_list[i] 132 if altloc == ".": 133 altloc = " " 134 resseq = seq_id_list[i] 135 icode = icode_list[i] 136 if icode == "?": 137 icode = " " 138 name = atom_id_list[i] 139 # occupancy & B factor 140 try: 141 tempfactor = float(b_factor_list[i]) 142 except ValueError: 143 raise PDBConstructionException("Invalid or missing B factor") 144 try: 145 occupancy = float(occupancy_list[i]) 146 except ValueError: 147 raise PDBConstructionException("Invalid or missing occupancy") 148 fieldname = fieldname_list[i] 149 if fieldname == "HETATM": 150 hetatm_flag = "H" 151 else: 152 hetatm_flag = " " 153 if serial_list is not None: 154 # model column exists; use it 155 serial_id = serial_list[i] 156 if current_serial_id != serial_id: 157 # if serial changes, update it and start new model 158 current_serial_id = serial_id 159 current_model_id += 1 160 structure_builder.init_model(current_model_id, current_serial_id) 161 current_chain_id = None 162 current_residue_id = None 163 else: 164 # no explicit model column; initialize single model 165 structure_builder.init_model(current_model_id) 166 167 if current_chain_id != chainid: 168 current_chain_id = chainid 169 structure_builder.init_chain(current_chain_id) 170 171 if current_residue_id != resseq: 172 current_residue_id = resseq 173 int_resseq = int(resseq) 174 structure_builder.init_residue(resname, hetatm_flag, int_resseq, icode) 175 176 coord = numpy.array((x, y, z), 'f') 177 element = element_list[i] if element_list else None 178 structure_builder.init_atom(name, coord, tempfactor, occupancy, altloc, 179 name, element=element) 180 if aniso_flag == 1: 181 u = (aniso_u11[i], aniso_u12[i], aniso_u13[i], 182 aniso_u22[i], aniso_u23[i], aniso_u33[i]) 183 mapped_anisou = [float(x) for x in u] 184 anisou_array = numpy.array(mapped_anisou, 'f') 185 structure_builder.set_anisou(anisou_array) 186 # Now try to set the cell 187 try: 188 a = float(mmcif_dict["_cell.length_a"]) 189 b = float(mmcif_dict["_cell.length_b"]) 190 c = float(mmcif_dict["_cell.length_c"]) 191 alpha = float(mmcif_dict["_cell.angle_alpha"]) 192 beta = float(mmcif_dict["_cell.angle_beta"]) 193 gamma = float(mmcif_dict["_cell.angle_gamma"]) 194 cell = numpy.array((a, b, c, alpha, beta, gamma), 'f') 195 spacegroup = mmcif_dict["_symmetry.space_group_name_H-M"] 196 spacegroup = spacegroup[1:-1] # get rid of quotes!! 197 if spacegroup is None: 198 raise Exception 199 structure_builder.set_symmetry(spacegroup, cell) 200 except Exception: 201 pass # no cell found, so just ignore
202 203 204 if __name__ == "__main__": 205 import sys 206 207 if len(sys.argv) != 2: 208 print("Usage: python MMCIFparser.py filename") 209 raise SystemExit 210 filename = sys.argv[1] 211 212 p = MMCIFParser() 213 214 structure = p.get_structure("test", filename) 215 216 for model in structure.get_list(): 217 print(model) 218 for chain in model.get_list(): 219 print(chain) 220 print("Found %d residues." % len(chain.get_list())) 221