Package Bio :: Module Index
[hide private]
[frames] | no frames]

Source Code for Module Bio.Index

  1  # Copyright 1999 by Jeffrey Chang.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Index.py 
  7   
  8  This module provides a way to create indexes to text files. 
  9   
 10  Classes: 
 11  Index     Dictionary-like class used to store index information. 
 12   
 13  _ShelveIndex    An Index class based on the shelve module. 
 14  _InMemoryIndex  An in-memory Index class. 
 15   
 16  """ 
 17  import os 
 18  import array 
 19  import cPickle 
 20  import shelve 
 21   
 22   
23 -class _ShelveIndex(dict):
24 """An index file wrapped around shelve. 25 26 """ 27 # Without a good dbm module installed, this is pretty slow and 28 # generates large files. When generating an index on a FASTA- 29 # formatted file with 82000 sequences (37Mb), the 30 # index 'dat' file is 42Mb and 'dir' file is 8Mb. 31 32 __version = 2 33 __version_key = '__version' 34
35 - def __init__(self, indexname, truncate=None):
36 dict.__init__(self) 37 try: 38 if truncate: 39 # In python 1.52 and before, dumbdbm (under shelve) 40 # doesn't clear the old database. 41 files = [indexname + '.dir', 42 indexname + '.dat', 43 indexname + '.bak' 44 ] 45 for file in files: 46 if os.path.exists(file): 47 os.unlink(file) 48 raise Exception("open a new shelf") 49 self.data = shelve.open(indexname, flag='r') 50 except: 51 # No database exists. 52 self.data = shelve.open(indexname, flag='n') 53 self.data[self.__version_key] = self.__version 54 else: 55 # Check to make sure the database is the correct version. 56 version = self.data.get(self.__version_key, None) 57 if version is None: 58 raise IOError("Unrecognized index format") 59 elif version != self.__version: 60 raise IOError("Version %s doesn't match my version %s" 61 % (version, self.__version))
62
63 - def __del__(self):
64 if 'data' in self.__dict__: 65 self.data.close()
66 67
68 -class _InMemoryIndex(dict):
69 """This creates an in-memory index file. 70 71 """ 72 # File Format: 73 # version 74 # key value 75 # [...] 76 77 __version = 3 78 __version_key = '__version' 79
80 - def __init__(self, indexname, truncate=None):
81 self._indexname = indexname 82 dict.__init__(self) 83 self.__changed = 0 # the index hasn't changed 84 85 # Remove the database if truncate is true. 86 if truncate and os.path.exists(indexname): 87 os.unlink(indexname) 88 self.__changed = 1 89 90 # Load the database if it exists 91 if os.path.exists(indexname): 92 handle = open(indexname) 93 version = self._toobj(handle.readline().rstrip()) 94 if version != self.__version: 95 raise IOError("Version %s doesn't match my version %s" 96 % (version, self.__version)) 97 for line in handle: 98 key, value = line.split() 99 key, value = self._toobj(key), self._toobj(value) 100 self[key] = value 101 self.__changed = 0
102
103 - def update(self, dict):
104 self.__changed = 1 105 dict.update(self, dict)
106
107 - def __setitem__(self, key, value):
108 self.__changed = 1 109 dict.__setitem__(self, key, value)
110
111 - def __delitem__(self, key):
112 self.__changed = 1 113 dict.__delitem__(self, key)
114
115 - def clear(self):
116 self.__changed = 1 117 dict.clear(self)
118
119 - def __del__(self):
120 if self.__changed: 121 handle = open(self._indexname, 'w') 122 handle.write("%s\n" % self._tostr(self.__version)) 123 for key, value in self.items(): 124 handle.write("%s %s\n" % 125 (self._tostr(key), self._tostr(value))) 126 handle.close()
127
128 - def _tostr(self, obj):
129 # I need a representation of the object that's saveable to 130 # a file that uses whitespace as delimiters. Thus, I'm 131 # going to pickle the object, and then convert each character of 132 # the string to its ASCII integer value. Then, I'm going to convert 133 # the integers into strings and join them together with commas. 134 # It's not the most efficient way of storing things, but it's 135 # relatively fast. 136 s = cPickle.dumps(obj) 137 intlist = array.array('b', s) 138 strlist = map(str, intlist) 139 return ','.join(strlist)
140
141 - def _toobj(self, str):
142 intlist = map(int, str.split(',')) 143 intlist = array.array('b', intlist) 144 strlist = map(chr, intlist) 145 return cPickle.loads(''.join(strlist))
146 147 Index = _InMemoryIndex 148