Package Bio :: Package PDB :: Module MMCIFParser
[hide private]
[frames] | no frames]

Source Code for Module Bio.PDB.MMCIFParser

  1  # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """mmCIF parser (partly implemented in C).""" 
  7   
  8  from string import ascii_letters 
  9   
 10  import numpy 
 11   
 12  from Bio.PDB.MMCIF2Dict import MMCIF2Dict 
 13  from Bio.PDB.StructureBuilder import StructureBuilder 
 14  from Bio.PDB.PDBExceptions import PDBConstructionException 
 15   
 16   
17 -class MMCIFParser(object):
18 - def get_structure(self, structure_id, filename):
19 self._mmcif_dict=MMCIF2Dict(filename) 20 self._structure_builder=StructureBuilder() 21 self._build_structure(structure_id) 22 return self._structure_builder.get_structure()
23
24 - def _build_structure(self, structure_id):
25 mmcif_dict=self._mmcif_dict 26 atom_id_list=mmcif_dict["_atom_site.label_atom_id"] 27 residue_id_list=mmcif_dict["_atom_site.label_comp_id"] 28 try: 29 element_list = mmcif_dict["_atom_site.type_symbol"] 30 except KeyError: 31 element_list = None 32 seq_id_list=mmcif_dict["_atom_site.label_seq_id"] 33 chain_id_list=mmcif_dict["_atom_site.label_asym_id"] 34 x_list=map(float, mmcif_dict["_atom_site.Cartn_x"]) 35 y_list=map(float, mmcif_dict["_atom_site.Cartn_y"]) 36 z_list=map(float, mmcif_dict["_atom_site.Cartn_z"]) 37 alt_list=mmcif_dict["_atom_site.label_alt_id"] 38 b_factor_list=mmcif_dict["_atom_site.B_iso_or_equiv"] 39 occupancy_list=mmcif_dict["_atom_site.occupancy"] 40 fieldname_list=mmcif_dict["_atom_site.group_PDB"] 41 try: 42 serial_list = [int(n) for n in mmcif_dict["_atom_site.pdbx_PDB_model_num"]] 43 except KeyError: 44 # No model number column 45 serial_list = None 46 except ValueError: 47 # Invalid model number (malformed file) 48 raise PDBConstructionException("Invalid model number") 49 try: 50 aniso_u11=mmcif_dict["_atom_site.aniso_U[1][1]"] 51 aniso_u12=mmcif_dict["_atom_site.aniso_U[1][2]"] 52 aniso_u13=mmcif_dict["_atom_site.aniso_U[1][3]"] 53 aniso_u22=mmcif_dict["_atom_site.aniso_U[2][2]"] 54 aniso_u23=mmcif_dict["_atom_site.aniso_U[2][3]"] 55 aniso_u33=mmcif_dict["_atom_site.aniso_U[3][3]"] 56 aniso_flag=1 57 except KeyError: 58 # no anisotropic B factors 59 aniso_flag=0 60 # if auth_seq_id is present, we use this. 61 # Otherwise label_seq_id is used. 62 if "_atom_site.auth_seq_id" in mmcif_dict: 63 seq_id_list=mmcif_dict["_atom_site.auth_seq_id"] 64 else: 65 seq_id_list=mmcif_dict["_atom_site.label_seq_id"] 66 # Now loop over atoms and build the structure 67 current_chain_id=None 68 current_residue_id=None 69 structure_builder=self._structure_builder 70 structure_builder.init_structure(structure_id) 71 structure_builder.init_seg(" ") 72 # Historically, Biopython PDB parser uses model_id to mean array index 73 # so serial_id means the Model ID specified in the file 74 current_model_id = 0 75 current_serial_id = 0 76 for i in xrange(0, len(atom_id_list)): 77 x=x_list[i] 78 y=y_list[i] 79 z=z_list[i] 80 resname=residue_id_list[i] 81 chainid=chain_id_list[i] 82 altloc=alt_list[i] 83 if altloc==".": 84 altloc=" " 85 resseq=seq_id_list[i] 86 name=atom_id_list[i] 87 tempfactor=b_factor_list[i] 88 occupancy=occupancy_list[i] 89 fieldname=fieldname_list[i] 90 if fieldname=="HETATM": 91 hetatm_flag="H" 92 else: 93 hetatm_flag=" " 94 if serial_list is not None: 95 # model column exists; use it 96 serial_id = serial_list[i] 97 if current_serial_id != serial_id: 98 # if serial changes, update it and start new model 99 current_serial_id = serial_id 100 structure_builder.init_model(current_model_id, current_serial_id) 101 current_model_id += 1 102 else: 103 # no explicit model column; initialize single model 104 structure_builder.init_model(current_model_id) 105 if current_chain_id!=chainid: 106 current_chain_id=chainid 107 structure_builder.init_chain(current_chain_id) 108 current_residue_id=resseq 109 icode, int_resseq=self._get_icode(resseq) 110 structure_builder.init_residue(resname, hetatm_flag, int_resseq, 111 icode) 112 elif current_residue_id!=resseq: 113 current_residue_id=resseq 114 icode, int_resseq=self._get_icode(resseq) 115 structure_builder.init_residue(resname, hetatm_flag, int_resseq, 116 icode) 117 coord=numpy.array((x, y, z), 'f') 118 element = element_list[i] if element_list else None 119 structure_builder.init_atom(name, coord, tempfactor, occupancy, altloc, 120 name, element=element) 121 if aniso_flag==1: 122 u=(aniso_u11[i], aniso_u12[i], aniso_u13[i], 123 aniso_u22[i], aniso_u23[i], aniso_u33[i]) 124 mapped_anisou=map(float, u) 125 anisou_array=numpy.array(mapped_anisou, 'f') 126 structure_builder.set_anisou(anisou_array) 127 # Now try to set the cell 128 try: 129 a=float(mmcif_dict["_cell.length_a"]) 130 b=float(mmcif_dict["_cell.length_b"]) 131 c=float(mmcif_dict["_cell.length_c"]) 132 alpha=float(mmcif_dict["_cell.angle_alpha"]) 133 beta=float(mmcif_dict["_cell.angle_beta"]) 134 gamma=float(mmcif_dict["_cell.angle_gamma"]) 135 cell=numpy.array((a, b, c, alpha, beta, gamma), 'f') 136 spacegroup=mmcif_dict["_symmetry.space_group_name_H-M"] 137 spacegroup=spacegroup[1:-1] # get rid of quotes!! 138 if spacegroup is None: 139 raise Exception 140 structure_builder.set_symmetry(spacegroup, cell) 141 except: 142 pass # no cell found, so just ignore
143
144 - def _get_icode(self, resseq):
145 """Tries to return the icode. In MMCIF files this is just part of 146 resseq! In PDB files, it's a separate field.""" 147 last_resseq_char=resseq[-1] 148 if last_resseq_char in ascii_letters: 149 icode=last_resseq_char 150 int_resseq=int(resseq[0:-1]) 151 else: 152 icode=" " 153 int_resseq=int(resseq) 154 return icode, int_resseq
155 156 157 if __name__=="__main__": 158 import sys 159 160 if len(sys.argv) != 2: 161 print "Usage: python MMCIFparser.py filename" 162 raise SystemExit 163 filename=sys.argv[1] 164 165 p=MMCIFParser() 166 167 structure=p.get_structure("test", filename) 168 169 for model in structure.get_list(): 170 print model 171 for chain in model.get_list(): 172 print chain 173 print "Found %d residues." % len(chain.get_list()) 174