Package Bio :: Package PDB :: Module FragmentMapper'
[hide private]
[frames] | no frames]

Source Code for Module Bio.PDB.FragmentMapper'

  1  # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Classify protein backbone structure according to Kolodny et al's fragment 
  7  libraries. 
  8   
  9  It can be regarded as a form of objective secondary structure classification. 
 10  Only fragments of length 5 or 7 are supported (ie. there is a 'central' 
 11  residue). 
 12   
 13  Full reference: 
 14   
 15  Kolodny R, Koehl P, Guibas L, Levitt M. 
 16  Small libraries of protein fragments model native protein structures accurately. 
 17  J Mol Biol. 2002 323(2):297-307. 
 18   
 19  The definition files of the fragments can be obtained from: 
 20   
 21  U{http://csb.stanford.edu/~rachel/fragments/} 
 22   
 23  You need these files to use this module. 
 24   
 25  The following example uses the library with 10 fragments of length 5. 
 26  The library files can be found in directory 'fragment_data'. 
 27   
 28      >>> model = structure[0] 
 29      >>> fm = FragmentMapper(model, lsize=10, flength=5, dir="fragment_data") 
 30      >>> fragment = fm[residue] 
 31  """ 
 32   
 33  from __future__ import print_function 
 34   
 35  import numpy 
 36   
 37  from Bio.SVDSuperimposer import SVDSuperimposer 
 38   
 39  from Bio.PDB import Selection 
 40  from Bio.PDB.PDBExceptions import PDBException 
 41  from Bio.PDB.PDBParser import PDBParser 
 42  from Bio.PDB.Polypeptide import PPBuilder 
 43   
 44  __docformat__ = "restructuredtext en" 
 45   
 46  # fragment file (lib_SIZE_z_LENGTH.txt) 
 47  # SIZE=number of fragments 
 48  # LENGTH=length of fragment (4,5,6,7) 
 49  _FRAGMENT_FILE="lib_%s_z_%s.txt" 
 50   
 51   
52 -def _read_fragments(size, length, dir="."):
53 """ 54 Read a fragment spec file (available from 55 U{http://csb.stanford.edu/rachel/fragments/} 56 and return a list of Fragment objects. 57 58 @param size: number of fragments in the library 59 @type size: int 60 61 @param length: length of the fragments 62 @type length: int 63 64 @param dir: directory where the fragment spec files can be found 65 @type dir: string 66 """ 67 filename=(dir+"/"+_FRAGMENT_FILE) % (size, length) 68 with open(filename, "r") as fp: 69 flist=[] 70 # ID of fragment=rank in spec file 71 fid=0 72 for l in fp.readlines(): 73 # skip comment and blank lines 74 if l[0]=="*" or l[0]=="\n": 75 continue 76 sl=l.split() 77 if sl[1]=="------": 78 # Start of fragment definition 79 f=Fragment(length, fid) 80 flist.append(f) 81 # increase fragment id (rank) 82 fid+=1 83 continue 84 # Add CA coord to Fragment 85 coord = numpy.array([float(x) for x in sl[0:3]]) 86 # XXX= dummy residue name 87 f.add_residue("XXX", coord) 88 return flist
89 90
91 -class Fragment(object):
92 """ 93 Represent a polypeptide C-alpha fragment. 94 """
95 - def __init__(self, length, fid):
96 """ 97 @param length: length of the fragment 98 @type length: int 99 100 @param fid: id for the fragment 101 @type fid: int 102 """ 103 # nr of residues in fragment 104 self.length=length 105 # nr of residues added 106 self.counter=0 107 self.resname_list=[] 108 # CA coordinate matrix 109 self.coords_ca=numpy.zeros((length, 3), "d") 110 self.fid=fid
111
112 - def get_resname_list(self):
113 """ 114 @return: the residue names 115 @rtype: [string, string,...] 116 """ 117 return self.resname_list
118
119 - def get_id(self):
120 """ 121 @return: id for the fragment 122 @rtype: int 123 """ 124 return self.fid
125
126 - def get_coords(self):
127 """ 128 @return: the CA coords in the fragment 129 @rtype: Numeric (Nx3) array 130 """ 131 return self.coords_ca
132
133 - def add_residue(self, resname, ca_coord):
134 """ 135 @param resname: residue name (eg. GLY). 136 @type resname: string 137 138 @param ca_coord: the c-alpha coorinates of the residues 139 @type ca_coord: Numeric array with length 3 140 """ 141 if self.counter>=self.length: 142 raise PDBException("Fragment boundary exceeded.") 143 self.resname_list.append(resname) 144 self.coords_ca[self.counter]=ca_coord 145 self.counter=self.counter+1
146
147 - def __len__(self):
148 """ 149 @return: length of fragment 150 @rtype: int 151 """ 152 return self.length
153
154 - def __sub__(self, other):
155 """ 156 Return rmsd between two fragments. 157 158 Example: 159 >>> rmsd=fragment1-fragment2 160 161 @return: rmsd between fragments 162 @rtype: float 163 """ 164 sup=SVDSuperimposer() 165 sup.set(self.coords_ca, other.coords_ca) 166 sup.run() 167 return sup.get_rms()
168
169 - def __repr__(self):
170 """ 171 Returns <Fragment length=L id=ID> where L=length of fragment 172 and ID the identifier (rank in the library). 173 """ 174 return "<Fragment length=%i id=%i>" % (self.length, self.fid)
175 176
177 -def _make_fragment_list(pp, length):
178 """ 179 Dice up a peptide in fragments of length "length". 180 181 @param pp: a list of residues (part of one peptide) 182 @type pp: [L{Residue}, L{Residue}, ...] 183 184 @param length: fragment length 185 @type length: int 186 """ 187 frag_list=[] 188 for i in range(0, len(pp)-length+1): 189 f=Fragment(length, -1) 190 for j in range(0, length): 191 residue=pp[i+j] 192 resname=residue.get_resname() 193 if residue.has_id("CA"): 194 ca=residue["CA"] 195 else: 196 raise PDBException("CHAINBREAK") 197 if ca.is_disordered(): 198 raise PDBException("CHAINBREAK") 199 ca_coord=ca.get_coord() 200 f.add_residue(resname, ca_coord) 201 frag_list.append(f) 202 return frag_list
203 204
205 -def _map_fragment_list(flist, reflist):
206 """ 207 Map all frgaments in flist to the closest 208 (in RMSD) fragment in reflist. 209 210 Returns a list of reflist indices. 211 212 @param flist: list of protein fragments 213 @type flist: [L{Fragment}, L{Fragment}, ...] 214 215 @param reflist: list of reference (ie. library) fragments 216 @type reflist: [L{Fragment}, L{Fragment}, ...] 217 """ 218 mapped=[] 219 for f in flist: 220 rank=[] 221 for i in range(0, len(reflist)): 222 rf=reflist[i] 223 rms=f-rf 224 rank.append((rms, rf)) 225 rank.sort() 226 fragment=rank[0][1] 227 mapped.append(fragment) 228 return mapped
229 230
231 -class FragmentMapper(object):
232 """ 233 Map polypeptides in a model to lists of representative fragments. 234 """
235 - def __init__(self, model, lsize=20, flength=5, fdir="."):
236 """ 237 :: 238 239 @param model: the model that will be mapped 240 @type model: L{Model} 241 242 @param lsize: number of fragments in the library 243 @type lsize: int 244 245 @param flength: length of fragments in the library 246 @type flength: int 247 248 @param fdir: directory where the definition files are 249 found (default=".") 250 @type fdir: string 251 """ 252 if flength==5: 253 self.edge=2 254 elif flength==7: 255 self.edge=3 256 else: 257 raise PDBException("Fragment length should be 5 or 7.") 258 self.flength=flength 259 self.lsize=lsize 260 self.reflist=_read_fragments(lsize, flength, fdir) 261 self.model=model 262 self.fd=self._map(self.model)
263
264 - def _map(self, model):
265 """ 266 @param model: the model that will be mapped 267 @type model: L{Model} 268 """ 269 ppb=PPBuilder() 270 ppl=ppb.build_peptides(model) 271 fd={} 272 for pp in ppl: 273 try: 274 # make fragments 275 flist=_make_fragment_list(pp, self.flength) 276 # classify fragments 277 mflist=_map_fragment_list(flist, self.reflist) 278 for i in range(0, len(pp)): 279 res=pp[i] 280 if i<self.edge: 281 # start residues 282 continue 283 elif i>=(len(pp)-self.edge): 284 # end residues 285 continue 286 else: 287 # fragment 288 index=i-self.edge 289 assert(index>=0) 290 fd[res]=mflist[index] 291 except PDBException as why: 292 if why == 'CHAINBREAK': 293 # Funny polypeptide - skip 294 pass 295 else: 296 raise PDBException(why) 297 return fd
298
299 - def has_key(self, res):
300 """(Obsolete) 301 302 @type res: L{Residue} 303 """ 304 import warnings 305 from Bio import BiopythonDeprecationWarning 306 warnings.warn("has_key is deprecated; use 'res in object' instead", BiopythonDeprecationWarning) 307 return (res in self)
308
309 - def __contains__(self, res):
310 """True if the given residue is in any of the mapped fragments. 311 312 @type res: L{Residue} 313 """ 314 return (res in self.fd)
315
316 - def __getitem__(self, res):
317 """ 318 @type res: L{Residue} 319 320 @return: fragment classification 321 @rtype: L{Fragment} 322 """ 323 return self.fd[res]
324 325 326 if __name__=="__main__": 327 328 import sys 329 330 p = PDBParser() 331 s = p.get_structure("X", sys.argv[1]) 332 m = s[0] 333 fm = FragmentMapper(m, 10, 5, "levitt_data") 334 335 for r in Selection.unfold_entities(m, "R"): 336 print("%s:" % r) 337 if r in fm: 338 print(fm[r]) 339