Package Bio :: Package PDB :: Module FragmentMapper'
[hide private]
[frames] | no frames]

Source Code for Module Bio.PDB.FragmentMapper'

  1  # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Classify protein backbone structure according to Kolodny et al's fragment 
  7  libraries. 
  8   
  9  It can be regarded as a form of objective secondary structure classification. 
 10  Only fragments of length 5 or 7 are supported (ie. there is a 'central' 
 11  residue). 
 12   
 13  Full reference: 
 14   
 15  Kolodny R, Koehl P, Guibas L, Levitt M. 
 16  Small libraries of protein fragments model native protein structures accurately. 
 17  J Mol Biol. 2002 323(2):297-307. 
 18   
 19  The definition files of the fragments can be obtained from: 
 20   
 21  U{http://csb.stanford.edu/~rachel/fragments/} 
 22   
 23  You need these files to use this module. 
 24   
 25  The following example uses the library with 10 fragments of length 5. 
 26  The library files can be found in directory 'fragment_data'. 
 27   
 28      >>> model = structure[0] 
 29      >>> fm = FragmentMapper(model, lsize=10, flength=5, dir="fragment_data") 
 30      >>> fragment = fm[residue] 
 31  """ 
 32   
 33  from __future__ import print_function 
 34   
 35  import numpy 
 36   
 37  from Bio.SVDSuperimposer import SVDSuperimposer 
 38   
 39  from Bio.PDB import Selection 
 40  from Bio.PDB.PDBExceptions import PDBException 
 41  from Bio.PDB.PDBParser import PDBParser 
 42  from Bio.PDB.Polypeptide import PPBuilder 
 43   
 44   
 45  # fragment file (lib_SIZE_z_LENGTH.txt) 
 46  # SIZE=number of fragments 
 47  # LENGTH=length of fragment (4,5,6,7) 
 48  _FRAGMENT_FILE="lib_%s_z_%s.txt" 
 49   
 50   
51 -def _read_fragments(size, length, dir="."):
52 """ 53 Read a fragment spec file (available from 54 U{http://csb.stanford.edu/rachel/fragments/} 55 and return a list of Fragment objects. 56 57 @param size: number of fragments in the library 58 @type size: int 59 60 @param length: length of the fragments 61 @type length: int 62 63 @param dir: directory where the fragment spec files can be found 64 @type dir: string 65 """ 66 filename=(dir+"/"+_FRAGMENT_FILE) % (size, length) 67 with open(filename, "r") as fp: 68 flist=[] 69 # ID of fragment=rank in spec file 70 fid=0 71 for l in fp.readlines(): 72 # skip comment and blank lines 73 if l[0]=="*" or l[0]=="\n": 74 continue 75 sl=l.split() 76 if sl[1]=="------": 77 # Start of fragment definition 78 f=Fragment(length, fid) 79 flist.append(f) 80 # increase fragment id (rank) 81 fid+=1 82 continue 83 # Add CA coord to Fragment 84 coord = numpy.array([float(x) for x in sl[0:3]]) 85 # XXX= dummy residue name 86 f.add_residue("XXX", coord) 87 return flist
88 89
90 -class Fragment(object):
91 """ 92 Represent a polypeptide C-alpha fragment. 93 """
94 - def __init__(self, length, fid):
95 """ 96 @param length: length of the fragment 97 @type length: int 98 99 @param fid: id for the fragment 100 @type fid: int 101 """ 102 # nr of residues in fragment 103 self.length=length 104 # nr of residues added 105 self.counter=0 106 self.resname_list=[] 107 # CA coordinate matrix 108 self.coords_ca=numpy.zeros((length, 3), "d") 109 self.fid=fid
110
111 - def get_resname_list(self):
112 """ 113 @return: the residue names 114 @rtype: [string, string,...] 115 """ 116 return self.resname_list
117
118 - def get_id(self):
119 """ 120 @return: id for the fragment 121 @rtype: int 122 """ 123 return self.fid
124
125 - def get_coords(self):
126 """ 127 @return: the CA coords in the fragment 128 @rtype: Numeric (Nx3) array 129 """ 130 return self.coords_ca
131
132 - def add_residue(self, resname, ca_coord):
133 """ 134 @param resname: residue name (eg. GLY). 135 @type resname: string 136 137 @param ca_coord: the c-alpha coorinates of the residues 138 @type ca_coord: Numeric array with length 3 139 """ 140 if self.counter>=self.length: 141 raise PDBException("Fragment boundary exceeded.") 142 self.resname_list.append(resname) 143 self.coords_ca[self.counter]=ca_coord 144 self.counter=self.counter+1
145
146 - def __len__(self):
147 """ 148 @return: length of fragment 149 @rtype: int 150 """ 151 return self.length
152
153 - def __sub__(self, other):
154 """ 155 Return rmsd between two fragments. 156 157 Example: 158 >>> rmsd=fragment1-fragment2 159 160 @return: rmsd between fragments 161 @rtype: float 162 """ 163 sup=SVDSuperimposer() 164 sup.set(self.coords_ca, other.coords_ca) 165 sup.run() 166 return sup.get_rms()
167
168 - def __repr__(self):
169 """ 170 Returns <Fragment length=L id=ID> where L=length of fragment 171 and ID the identifier (rank in the library). 172 """ 173 return "<Fragment length=%i id=%i>" % (self.length, self.fid)
174 175
176 -def _make_fragment_list(pp, length):
177 """ 178 Dice up a peptide in fragments of length "length". 179 180 @param pp: a list of residues (part of one peptide) 181 @type pp: [L{Residue}, L{Residue}, ...] 182 183 @param length: fragment length 184 @type length: int 185 """ 186 frag_list=[] 187 for i in range(0, len(pp)-length+1): 188 f=Fragment(length, -1) 189 for j in range(0, length): 190 residue=pp[i+j] 191 resname=residue.get_resname() 192 if residue.has_id("CA"): 193 ca=residue["CA"] 194 else: 195 raise PDBException("CHAINBREAK") 196 if ca.is_disordered(): 197 raise PDBException("CHAINBREAK") 198 ca_coord=ca.get_coord() 199 f.add_residue(resname, ca_coord) 200 frag_list.append(f) 201 return frag_list
202 203
204 -def _map_fragment_list(flist, reflist):
205 """ 206 Map all frgaments in flist to the closest 207 (in RMSD) fragment in reflist. 208 209 Returns a list of reflist indices. 210 211 @param flist: list of protein fragments 212 @type flist: [L{Fragment}, L{Fragment}, ...] 213 214 @param reflist: list of reference (ie. library) fragments 215 @type reflist: [L{Fragment}, L{Fragment}, ...] 216 """ 217 mapped=[] 218 for f in flist: 219 rank=[] 220 for i in range(0, len(reflist)): 221 rf=reflist[i] 222 rms=f-rf 223 rank.append((rms, rf)) 224 rank.sort() 225 fragment=rank[0][1] 226 mapped.append(fragment) 227 return mapped
228 229
230 -class FragmentMapper(object):
231 """ 232 Map polypeptides in a model to lists of representative fragments. 233 """
234 - def __init__(self, model, lsize=20, flength=5, fdir="."):
235 """ 236 @param model: the model that will be mapped 237 @type model: L{Model} 238 239 @param lsize: number of fragments in the library 240 @type lsize: int 241 242 @param flength: length of fragments in the library 243 @type flength: int 244 245 @param fdir: directory where the definition files are 246 found (default=".") 247 @type fdir: string 248 """ 249 if flength==5: 250 self.edge=2 251 elif flength==7: 252 self.edge=3 253 else: 254 raise PDBException("Fragment length should be 5 or 7.") 255 self.flength=flength 256 self.lsize=lsize 257 self.reflist=_read_fragments(lsize, flength, fdir) 258 self.model=model 259 self.fd=self._map(self.model)
260
261 - def _map(self, model):
262 """ 263 @param model: the model that will be mapped 264 @type model: L{Model} 265 """ 266 ppb=PPBuilder() 267 ppl=ppb.build_peptides(model) 268 fd={} 269 for pp in ppl: 270 try: 271 # make fragments 272 flist=_make_fragment_list(pp, self.flength) 273 # classify fragments 274 mflist=_map_fragment_list(flist, self.reflist) 275 for i in range(0, len(pp)): 276 res=pp[i] 277 if i<self.edge: 278 # start residues 279 continue 280 elif i>=(len(pp)-self.edge): 281 # end residues 282 continue 283 else: 284 # fragment 285 index=i-self.edge 286 assert(index>=0) 287 fd[res]=mflist[index] 288 except PDBException as why: 289 if why == 'CHAINBREAK': 290 # Funny polypeptide - skip 291 pass 292 else: 293 raise PDBException(why) 294 return fd
295
296 - def has_key(self, res):
297 """(Obsolete) 298 299 @type res: L{Residue} 300 """ 301 import warnings 302 from Bio import BiopythonDeprecationWarning 303 warnings.warn("has_key is deprecated; use 'res in object' instead", BiopythonDeprecationWarning) 304 return (res in self)
305
306 - def __contains__(self, res):
307 """True if the given residue is in any of the mapped fragments. 308 309 @type res: L{Residue} 310 """ 311 return (res in self.fd)
312
313 - def __getitem__(self, res):
314 """ 315 @type res: L{Residue} 316 317 @return: fragment classification 318 @rtype: L{Fragment} 319 """ 320 return self.fd[res]
321 322 323 if __name__=="__main__": 324 325 import sys 326 327 p = PDBParser() 328 s = p.get_structure("X", sys.argv[1]) 329 m = s[0] 330 fm = FragmentMapper(m, 10, 5, "levitt_data") 331 332 for r in Selection.unfold_entities(m, "R"): 333 print("%s:" % r) 334 if r in fm: 335 print(fm[r]) 336