Package Bio :: Package SCOP :: Module Cla
[hide private]
[frames] | no frames]

Source Code for Module Bio.SCOP.Cla

  1  # Copyright 2001 by Gavin E. Crooks.  All rights reserved. 
  2  # Modifications Copyright 2010 Jeffrey Finkelstein. All rights reserved. 
  3  # 
  4  # This code is part of the Biopython distribution and governed by its 
  5  # license.  Please see the LICENSE file that should have been included 
  6  # as part of this package. 
  7   
  8  """ Handle the SCOP CLAssification file, which describes SCOP domains. 
  9   
 10  The file format is described in the scop 
 11  "release notes.":http://scop.mrc-lmb.cam.ac.uk/scop/release-notes.html 
 12  The latest CLA file can be found 
 13  "elsewhere at SCOP.":http://scop.mrc-lmb.cam.ac.uk/scop/parse/ 
 14   
 15  "Release 1.73": http://scop.mrc-lmb.cam.ac.uk/scop/parse/dir.cla.scop.txt_1.73 
 16  (July 2008) 
 17   
 18  """ 
 19   
 20  from . import Residues 
 21   
 22   
23 -class Record(object):
24 """Holds information for one SCOP domain. 25 26 sid -- SCOP identifier. e.g. d1danl2 27 28 residues -- The domain definition as a Residues object 29 30 sccs -- SCOP concise classification strings. e.g. b.1.2.1 31 32 sunid -- SCOP unique identifier for this domain 33 34 hierarchy -- A dictionary, keys are nodetype, values are sunid, 35 describing the location of this domain in the SCOP 36 hierarchy. See the Scop module for a description of 37 nodetypes. This used to be a list of (key,value) tuples 38 in older versions of Biopython (see Bug 3109). 39 """
40 - def __init__(self, line=None):
41 self.sid = '' 42 self.residues = None 43 self.sccs = '' 44 self.sunid = '' 45 self.hierarchy = {} 46 if line: 47 self._process(line)
48
49 - def _process(self, line):
50 line = line.rstrip() # no trailing whitespace 51 columns = line.split('\t') # separate the tab-delineated cols 52 if len(columns) != 6: 53 raise ValueError("I don't understand the format of %s" % line) 54 55 self.sid, pdbid, residues, self.sccs, self.sunid, hierarchy = columns 56 self.residues = Residues.Residues(residues) 57 self.residues.pdbid = pdbid 58 self.sunid = int(self.sunid) 59 60 for ht in hierarchy.split(","): 61 key, value = ht.split('=') 62 self.hierarchy[key] = int(value)
63
64 - def __str__(self):
65 s = [] 66 s.append(self.sid) 67 s += str(self.residues).split(" ") 68 s.append(self.sccs) 69 s.append(self.sunid) 70 71 s.append(','.join('='.join((key, str(value))) for key, value 72 in self.hierarchy.items())) 73 74 return "\t".join(map(str, s)) + "\n"
75 76
77 -def parse(handle):
78 """Iterates over a CLA file, returning a Cla record for each line 79 in the file. 80 81 Arguments: 82 83 handle -- file-like object. 84 """ 85 for line in handle: 86 if line.startswith('#'): 87 continue 88 yield Record(line)
89 90
91 -class Index(dict):
92 """A CLA file indexed by SCOP identifiers, allowing rapid 93 random access into a file."""
94 - def __init__(self, filename):
95 """ 96 Arguments: 97 98 filename -- The file to index 99 """ 100 dict.__init__(self) 101 self.filename = filename 102 with open(self.filename, "rU") as f: 103 position = 0 104 while True: 105 line = f.readline() 106 if not line: 107 break 108 if line.startswith('#'): 109 continue 110 record = Record(line) 111 key = record.sid 112 if key is not None: 113 self[key] = position 114 position = f.tell()
115
116 - def __getitem__(self, key):
117 """Return an item from the indexed file.""" 118 position = dict.__getitem__(self, key) 119 120 with open(self.filename, "rU") as f: 121 f.seek(position) 122 line = f.readline() 123 record = Record(line) 124 return record
125