Package Bio :: Module File
[hide private]
[frames] | no frames]

Source Code for Module Bio.File

  1  # Copyright 1999 by Jeffrey Chang.  All rights reserved. 
  2  # Copyright 2009-2015 by Peter Cock. All rights reserved. 
  3  # 
  4  # This code is part of the Biopython distribution and governed by its 
  5  # license.  Please see the LICENSE file that should have been included 
  6  # as part of this package. 
  7   
  8  """Code for more fancy file handles. 
  9   
 10   
 11  Classes: 
 12   
 13      - UndoHandle     File object decorator with support for undo-like operations. 
 14   
 15  Additional private classes used in Bio.SeqIO and Bio.SearchIO for indexing 
 16  files are also defined under Bio.File but these are not intended for direct 
 17  use. 
 18  """ 
 19   
 20  from __future__ import print_function 
 21   
 22  import codecs 
 23  import os 
 24  import sys 
 25  import contextlib 
 26  import itertools 
 27   
 28  from Bio._py3k import basestring 
 29   
 30  try: 
 31      from collections import UserDict as _dict_base 
 32  except ImportError: 
 33      from UserDict import DictMixin as _dict_base 
 34   
 35  try: 
 36      from sqlite3 import dbapi2 as _sqlite 
 37      from sqlite3 import IntegrityError as _IntegrityError 
 38      from sqlite3 import OperationalError as _OperationalError 
 39  except ImportError: 
 40      # Not present on Jython, but should be included in Python 2.5 
 41      # or later (unless compiled from source without its dependencies) 
 42      # Still want to offer in-memory indexing. 
 43      _sqlite = None 
 44      pass 
 45   
 46  __docformat__ = "restructuredtext en" 
47 48 49 @contextlib.contextmanager 50 -def as_handle(handleish, mode='r', **kwargs):
51 r"""Context manager to ensure we are using a handle. 52 53 Context manager for arguments that can be passed to 54 SeqIO and AlignIO read, write, and parse methods: either file objects or strings. 55 56 When given a string, returns a file handle open to handleish with provided 57 mode which will be closed when the manager exits. 58 59 All other inputs are returned, and are *not* closed 60 61 - handleish - Either a string or file handle 62 - mode - Mode to open handleish (used only if handleish is a string) 63 - kwargs - Further arguments to pass to open(...) 64 65 Example: 66 67 >>> with as_handle('seqs.fasta', 'w') as fp: 68 ... fp.write('>test\nACGT') 69 >>> fp.closed 70 True 71 72 >>> handle = open('seqs.fasta', 'w') 73 >>> with as_handle(handle) as fp: 74 ... fp.write('>test\nACGT') 75 >>> fp.closed 76 False 77 >>> fp.close() 78 79 Note that if the mode argument includes U (for universal new lines) 80 this will be removed under Python 3 where is is redundant and has 81 been deprecated (this happens automatically in text mode). 82 """ 83 if isinstance(handleish, basestring): 84 if sys.version_info[0] >= 3 and "U" in mode: 85 mode = mode.replace("U", "") 86 if 'encoding' in kwargs: 87 with codecs.open(handleish, mode, **kwargs) as fp: 88 yield fp 89 else: 90 with open(handleish, mode, **kwargs) as fp: 91 yield fp 92 else: 93 yield handleish
94
95 96 -def _open_for_random_access(filename):
97 """Open a file in binary mode, spot if it is BGZF format etc (PRIVATE). 98 99 This functionality is used by the Bio.SeqIO and Bio.SearchIO index 100 and index_db functions. 101 """ 102 handle = open(filename, "rb") 103 from . import bgzf 104 try: 105 return bgzf.BgzfReader(mode="rb", fileobj=handle) 106 except ValueError as e: 107 assert "BGZF" in str(e) 108 # Not a BGZF file after all, rewind to start: 109 handle.seek(0) 110 return handle
111
112 113 -class UndoHandle(object):
114 """A Python handle that adds functionality for saving lines. 115 116 Saves lines in a LIFO fashion. 117 118 Added methods: 119 120 - saveline Save a line to be returned next time. 121 - peekline Peek at the next line without consuming it. 122 123 """
124 - def __init__(self, handle):
125 self._handle = handle 126 self._saved = [] 127 try: 128 # If wrapping an online handle, this this is nice to have: 129 self.url = handle.url 130 except AttributeError: 131 pass
132
133 - def __iter__(self):
134 return self
135
136 - def __next__(self):
137 next = self.readline() 138 if not next: 139 raise StopIteration 140 return next
141 142 if sys.version_info[0] < 3:
143 - def next(self):
144 """Python 2 style alias for Python 3 style __next__ method.""" 145 return self.__next__()
146
147 - def readlines(self, *args, **keywds):
148 lines = self._saved + self._handle.readlines(*args, **keywds) 149 self._saved = [] 150 return lines
151
152 - def readline(self, *args, **keywds):
153 if self._saved: 154 line = self._saved.pop(0) 155 else: 156 line = self._handle.readline(*args, **keywds) 157 return line
158
159 - def read(self, size=-1):
160 if size == -1: 161 saved = "".join(self._saved) 162 self._saved[:] = [] 163 else: 164 saved = '' 165 while size > 0 and self._saved: 166 if len(self._saved[0]) <= size: 167 size = size - len(self._saved[0]) 168 saved = saved + self._saved.pop(0) 169 else: 170 saved = saved + self._saved[0][:size] 171 self._saved[0] = self._saved[0][size:] 172 size = 0 173 return saved + self._handle.read(size)
174
175 - def saveline(self, line):
176 if line: 177 self._saved = [line] + self._saved
178
179 - def peekline(self):
180 if self._saved: 181 line = self._saved[0] 182 else: 183 line = self._handle.readline() 184 self.saveline(line) 185 return line
186
187 - def tell(self):
188 return self._handle.tell() - sum(len(line) for line in self._saved)
189
190 - def seek(self, *args):
191 self._saved = [] 192 self._handle.seek(*args)
193
194 - def __getattr__(self, attr):
195 return getattr(self._handle, attr)
196
197 - def __enter__(self):
198 return self
199
200 - def __exit__(self, type, value, traceback):
201 self._handle.close()
202
203 204 # The rest of this file defines code used in Bio.SeqIO and Bio.SearchIO 205 # for indexing 206 207 -class _IndexedSeqFileProxy(object):
208 """Base class for file format specific random access (PRIVATE). 209 210 This is subclasses in both Bio.SeqIO for indexing as SeqRecord 211 objects, and in Bio.SearchIO for indexing QueryResult objects. 212 213 Subclasses for each file format should define '__iter__', 'get' 214 and optionally 'get_raw' methods. 215 """ 216
217 - def __iter__(self):
218 """Returns (identifier, offset, length in bytes) tuples. 219 220 The length can be zero where it is not implemented or not 221 possible for a particular file format. 222 """ 223 raise NotImplementedError("Subclass should implement this")
224
225 - def get(self, offset):
226 """Returns parsed object for this entry.""" 227 # Most file formats with self contained records can be handled by 228 # parsing StringIO(_bytes_to_string(self.get_raw(offset))) 229 raise NotImplementedError("Subclass should implement this")
230
231 - def get_raw(self, offset):
232 """Returns bytes string (if implemented for this file format).""" 233 # Should be done by each sub-class (if possible) 234 raise NotImplementedError("Not available for this file format.")
235
236 237 -class _IndexedSeqFileDict(_dict_base):
238 """Read only dictionary interface to a sequential record file. 239 240 This code is used in both Bio.SeqIO for indexing as SeqRecord 241 objects, and in Bio.SearchIO for indexing QueryResult objects. 242 243 Keeps the keys and associated file offsets in memory, reads the file 244 to access entries as objects parsing them on demand. This approach 245 is memory limited, but will work even with millions of records. 246 247 Note duplicate keys are not allowed. If this happens, a ValueError 248 exception is raised. 249 250 As used in Bio.SeqIO, by default the SeqRecord's id string is used 251 as the dictionary key. In Bio.SearchIO, the query's id string is 252 used. This can be changed by suppling an optional key_function, 253 a callback function which will be given the record id and must 254 return the desired key. For example, this allows you to parse 255 NCBI style FASTA identifiers, and extract the GI number to use 256 as the dictionary key. 257 258 Note that this dictionary is essentially read only. You cannot 259 add or change values, pop values, nor clear the dictionary. 260 """
261 - def __init__(self, random_access_proxy, key_function, 262 repr, obj_repr):
263 # Use key_function=None for default value 264 self._proxy = random_access_proxy 265 self._key_function = key_function 266 self._repr = repr 267 self._obj_repr = obj_repr 268 if key_function: 269 offset_iter = ( 270 (key_function(k), o, l) for (k, o, l) in random_access_proxy) 271 else: 272 offset_iter = random_access_proxy 273 offsets = {} 274 for key, offset, length in offset_iter: 275 # Note - we don't store the length because I want to minimise the 276 # memory requirements. With the SQLite backend the length is kept 277 # and is used to speed up the get_raw method (by about 3 times). 278 # The length should be provided by all the current backends except 279 # SFF where there is an existing Roche index we can reuse (very fast 280 # but lacks the record lengths) 281 # assert length or format in ["sff", "sff-trim"], \ 282 # "%s at offset %i given length %r (%s format %s)" \ 283 # % (key, offset, length, filename, format) 284 if key in offsets: 285 self._proxy._handle.close() 286 raise ValueError("Duplicate key '%s'" % key) 287 else: 288 offsets[key] = offset 289 self._offsets = offsets
290
291 - def __repr__(self):
292 return self._repr
293
294 - def __str__(self):
295 # TODO - How best to handle the __str__ for SeqIO and SearchIO? 296 if self: 297 return "{%r : %s(...), ...}" % (list(self.keys())[0], self._obj_repr) 298 else: 299 return "{}"
300
301 - def __contains__(self, key):
302 return key in self._offsets
303
304 - def __len__(self):
305 """How many records are there?""" 306 return len(self._offsets)
307
308 - def items(self):
309 """Iterate over the (key, SeqRecord) items. 310 311 This tries to act like a Python 3 dictionary, and does not return 312 a list of (key, value) pairs due to memory concerns. 313 """ 314 for key in self.__iter__(): 315 yield key, self.__getitem__(key)
316
317 - def values(self):
318 """Iterate over the SeqRecord items. 319 320 This tries to act like a Python 3 dictionary, and does not return 321 a list of value due to memory concerns. 322 """ 323 for key in self.__iter__(): 324 yield self.__getitem__(key)
325
326 - def keys(self):
327 """Iterate over the keys. 328 329 This tries to act like a Python 3 dictionary, and does not return 330 a list of keys due to memory concerns. 331 """ 332 return self.__iter__()
333 334 if hasattr(dict, "iteritems"): 335 # Python 2, also define iteritems etc
336 - def itervalues(self):
337 """Iterate over the SeqRecord) items.""" 338 for key in self.__iter__(): 339 yield self.__getitem__(key)
340
341 - def iteritems(self):
342 """Iterate over the (key, SeqRecord) items.""" 343 for key in self.__iter__(): 344 yield key, self.__getitem__(key)
345
346 - def iterkeys(self):
347 """Iterate over the keys.""" 348 return self.__iter__()
349
350 - def __iter__(self):
351 """Iterate over the keys.""" 352 return iter(self._offsets)
353
354 - def __getitem__(self, key):
355 """x.__getitem__(y) <==> x[y]""" 356 # Pass the offset to the proxy 357 record = self._proxy.get(self._offsets[key]) 358 if self._key_function: 359 key2 = self._key_function(record.id) 360 else: 361 key2 = record.id 362 if key != key2: 363 raise ValueError("Key did not match (%s vs %s)" % (key, key2)) 364 return record
365
366 - def get(self, k, d=None):
367 """D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.""" 368 try: 369 return self.__getitem__(k) 370 except KeyError: 371 return d
372
373 - def get_raw(self, key):
374 """Similar to the get method, but returns the record as a raw string. 375 376 If the key is not found, a KeyError exception is raised. 377 378 Note that on Python 3 a bytes string is returned, not a typical 379 unicode string. 380 381 NOTE - This functionality is not supported for every file format. 382 """ 383 # Pass the offset to the proxy 384 return self._proxy.get_raw(self._offsets[key])
385
386 - def __setitem__(self, key, value):
387 """Would allow setting or replacing records, but not implemented.""" 388 raise NotImplementedError("An indexed a sequence file is read only.")
389
390 - def update(self, *args, **kwargs):
391 """Would allow adding more values, but not implemented.""" 392 raise NotImplementedError("An indexed a sequence file is read only.")
393
394 - def pop(self, key, default=None):
395 """Would remove specified record, but not implemented.""" 396 raise NotImplementedError("An indexed a sequence file is read only.")
397
398 - def popitem(self):
399 """Would remove and return a SeqRecord, but not implemented.""" 400 raise NotImplementedError("An indexed a sequence file is read only.")
401
402 - def clear(self):
403 """Would clear dictionary, but not implemented.""" 404 raise NotImplementedError("An indexed a sequence file is read only.")
405
406 - def fromkeys(self, keys, value=None):
407 """A dictionary method which we don't implement.""" 408 raise NotImplementedError("An indexed a sequence file doesn't " 409 "support this.")
410
411 - def copy(self):
412 """A dictionary method which we don't implement.""" 413 raise NotImplementedError("An indexed a sequence file doesn't " 414 "support this.")
415
416 - def close(self):
417 """Close the file handle being used to read the data. 418 419 Once called, further use of the index won't work. The sole purpose 420 of this method is to allow explicit handle closure - for example 421 if you wish to delete the file, on Windows you must first close 422 all open handles to that file. 423 """ 424 self._proxy._handle.close()
425
426 427 -class _SQLiteManySeqFilesDict(_IndexedSeqFileDict):
428 """Read only dictionary interface to many sequential record files. 429 430 This code is used in both Bio.SeqIO for indexing as SeqRecord 431 objects, and in Bio.SearchIO for indexing QueryResult objects. 432 433 Keeps the keys, file-numbers and offsets in an SQLite database. To access 434 a record by key, reads from the offset in the appropriate file and then 435 parses the record into an object. 436 437 There are OS limits on the number of files that can be open at once, 438 so a pool are kept. If a record is required from a closed file, then 439 one of the open handles is closed first. 440 """
441 - def __init__(self, index_filename, filenames, 442 proxy_factory, format, 443 key_function, repr, max_open=10):
444 """Loads or creates an SQLite based index.""" 445 # TODO? - Don't keep filename list in memory (just in DB)? 446 # Should save a chunk of memory if dealing with 1000s of files. 447 # Furthermore could compare a generator to the DB on reloading 448 # (no need to turn it into a list) 449 450 if not _sqlite: 451 # Hack for Jython (of if Python is compiled without it) 452 from Bio import MissingPythonDependencyError 453 raise MissingPythonDependencyError("Requires sqlite3, which is " 454 "included Python 2.5+") 455 if filenames is not None: 456 filenames = list(filenames) # In case it was a generator 457 458 # Cache the arguments as private variables 459 self._index_filename = index_filename 460 self._filenames = filenames 461 self._format = format 462 self._key_function = key_function 463 self._proxy_factory = proxy_factory 464 self._repr = repr 465 self._max_open = max_open 466 self._proxies = {} 467 468 # Note if using SQLite :memory: trick index filename, this will 469 # give $PWD as the relative path (which is fine). 470 self._relative_path = os.path.abspath(os.path.dirname(index_filename)) 471 472 if os.path.isfile(index_filename): 473 self._load_index() 474 else: 475 self._build_index()
476
477 - def _load_index(self):
478 """Called from __init__ to re-use an existing index (PRIVATE).""" 479 index_filename = self._index_filename 480 relative_path = self._relative_path 481 filenames = self._filenames 482 format = self._format 483 proxy_factory = self._proxy_factory 484 485 con = _sqlite.connect(index_filename) 486 self._con = con 487 # Check the count... 488 try: 489 count, = con.execute( 490 "SELECT value FROM meta_data WHERE key=?;", 491 ("count",)).fetchone() 492 self._length = int(count) 493 if self._length == -1: 494 con.close() 495 raise ValueError("Unfinished/partial database") 496 count, = con.execute( 497 "SELECT COUNT(key) FROM offset_data;").fetchone() 498 if self._length != int(count): 499 con.close() 500 raise ValueError("Corrupt database? %i entries not %i" 501 % (int(count), self._length)) 502 self._format, = con.execute( 503 "SELECT value FROM meta_data WHERE key=?;", 504 ("format",)).fetchone() 505 if format and format != self._format: 506 con.close() 507 raise ValueError("Index file says format %s, not %s" 508 % (self._format, format)) 509 try: 510 filenames_relative_to_index, = con.execute( 511 "SELECT value FROM meta_data WHERE key=?;", 512 ("filenames_relative_to_index",)).fetchone() 513 filenames_relative_to_index = (filenames_relative_to_index.upper() == "TRUE") 514 except TypeError: 515 # Original behaviour, assume if meta_data missing 516 filenames_relative_to_index = False 517 self._filenames = [row[0] for row in 518 con.execute("SELECT name FROM file_data " 519 "ORDER BY file_number;").fetchall()] 520 if filenames_relative_to_index: 521 # Not implicitly relative to $PWD, explicitly relative to index file 522 relative_path = os.path.abspath(os.path.dirname(index_filename)) 523 tmp = [] 524 for f in self._filenames: 525 if os.path.isabs(f): 526 tmp.append(f) 527 else: 528 # Would be stored with Unix / path separator, so convert 529 # it to the local OS path separator here: 530 tmp.append(os.path.join(relative_path, f.replace("/", os.path.sep))) 531 self._filenames = tmp 532 del tmp 533 if filenames and len(filenames) != len(self._filenames): 534 con.close() 535 raise ValueError("Index file says %i files, not %i" 536 % (len(self._filenames), len(filenames))) 537 if filenames and filenames != self._filenames: 538 for old, new in zip(self._filenames, filenames): 539 # Want exact match (after making relative to the index above) 540 if os.path.abspath(old) != os.path.abspath(new): 541 con.close() 542 if filenames_relative_to_index: 543 raise ValueError("Index file has different filenames, e.g. %r != %r" 544 % (os.path.abspath(old), os.path.abspath(new))) 545 else: 546 raise ValueError("Index file has different filenames " 547 "[This is an old index where any relative paths " 548 "were relative to the original working directory]. " 549 "e.g. %r != %r" 550 % (os.path.abspath(old), os.path.abspath(new))) 551 # Filenames are equal (after imposing abspath) 552 except _OperationalError as err: 553 con.close() 554 raise ValueError("Not a Biopython index database? %s" % err) 555 # Now we have the format (from the DB if not given to us), 556 if not proxy_factory(self._format): 557 con.close() 558 raise ValueError("Unsupported format '%s'" % self._format)
559
560 - def _build_index(self):
561 """Called from __init__ to create a new index (PRIVATE).""" 562 index_filename = self._index_filename 563 relative_path = self._relative_path 564 filenames = self._filenames 565 format = self._format 566 key_function = self._key_function 567 proxy_factory = self._proxy_factory 568 max_open = self._max_open 569 random_access_proxies = self._proxies 570 571 if not format or not filenames: 572 raise ValueError("Filenames to index and format required to build %r" % index_filename) 573 if not proxy_factory(format): 574 raise ValueError("Unsupported format '%s'" % format) 575 # Create the index 576 con = _sqlite.connect(index_filename) 577 self._con = con 578 # print("Creating index") 579 # Sqlite PRAGMA settings for speed 580 con.execute("PRAGMA synchronous=OFF") 581 con.execute("PRAGMA locking_mode=EXCLUSIVE") 582 # Don't index the key column until the end (faster) 583 # con.execute("CREATE TABLE offset_data (key TEXT PRIMARY KEY, " 584 # "offset INTEGER);") 585 con.execute("CREATE TABLE meta_data (key TEXT, value TEXT);") 586 con.execute("INSERT INTO meta_data (key, value) VALUES (?,?);", 587 ("count", -1)) 588 con.execute("INSERT INTO meta_data (key, value) VALUES (?,?);", 589 ("format", format)) 590 con.execute("INSERT INTO meta_data (key, value) VALUES (?,?);", 591 ("filenames_relative_to_index", "True")) 592 # TODO - Record the alphabet? 593 # TODO - Record the file size and modified date? 594 con.execute( 595 "CREATE TABLE file_data (file_number INTEGER, name TEXT);") 596 con.execute("CREATE TABLE offset_data (key TEXT, file_number INTEGER, offset INTEGER, length INTEGER);") 597 count = 0 598 for i, filename in enumerate(filenames): 599 # Default to storing as an absolute path, 600 f = os.path.abspath(filename) 601 if not os.path.isabs(filename) and not os.path.isabs(index_filename): 602 # Since user gave BOTH filename & index as relative paths, 603 # we will store this relative to the index file even though 604 # if it may now start ../ (meaning up a level) 605 # Note for cross platfrom use (e.g. shared data drive over SAMBA), 606 # convert any Windows slash into Unix style / for relative paths. 607 f = os.path.relpath(filename, relative_path).replace(os.path.sep, "/") 608 elif (os.path.dirname(os.path.abspath(filename)) + os.path.sep).startswith(relative_path + os.path.sep): 609 # Since sequence file is in same directory or sub directory, 610 # might as well make this into a relative path: 611 f = os.path.relpath(filename, relative_path).replace(os.path.sep, "/") 612 assert not f.startswith("../"), f 613 # print("DEBUG - storing %r as [%r] %r" % (filename, relative_path, f)) 614 con.execute( 615 "INSERT INTO file_data (file_number, name) VALUES (?,?);", 616 (i, f)) 617 random_access_proxy = proxy_factory(format, filename) 618 if key_function: 619 offset_iter = ((key_function(k), i, o, l) 620 for (k, o, l) in random_access_proxy) 621 else: 622 offset_iter = ((k, i, o, l) 623 for (k, o, l) in random_access_proxy) 624 while True: 625 batch = list(itertools.islice(offset_iter, 100)) 626 if not batch: 627 break 628 # print("Inserting batch of %i offsets, %s ... %s" 629 # % (len(batch), batch[0][0], batch[-1][0])) 630 con.executemany( 631 "INSERT INTO offset_data (key,file_number,offset,length) VALUES (?,?,?,?);", 632 batch) 633 con.commit() 634 count += len(batch) 635 if len(random_access_proxies) < max_open: 636 random_access_proxies[i] = random_access_proxy 637 else: 638 random_access_proxy._handle.close() 639 self._length = count 640 # print("About to index %i entries" % count) 641 try: 642 con.execute("CREATE UNIQUE INDEX IF NOT EXISTS " 643 "key_index ON offset_data(key);") 644 except _IntegrityError as err: 645 self._proxies = random_access_proxies 646 self.close() 647 con.close() 648 raise ValueError("Duplicate key? %s" % err) 649 con.execute("PRAGMA locking_mode=NORMAL") 650 con.execute("UPDATE meta_data SET value = ? WHERE key = ?;", 651 (count, "count")) 652 con.commit()
653 # print("Index created") 654
655 - def __repr__(self):
656 return self._repr
657
658 - def __contains__(self, key):
659 return bool( 660 self._con.execute("SELECT key FROM offset_data WHERE key=?;", 661 (key,)).fetchone())
662
663 - def __len__(self):
664 """How many records are there?""" 665 return self._length
666 # return self._con.execute("SELECT COUNT(key) FROM offset_data;").fetchone()[0] 667
668 - def __iter__(self):
669 """Iterate over the keys.""" 670 for row in self._con.execute("SELECT key FROM offset_data;"): 671 yield str(row[0])
672 673 if hasattr(dict, "iteritems"): 674 # Python 2, use iteritems but not items etc 675 # Just need to override this...
676 - def keys(self):
677 """Return a list of all the keys (SeqRecord identifiers).""" 678 return [str(row[0]) for row in 679 self._con.execute("SELECT key FROM offset_data;").fetchall()]
680
681 - def __getitem__(self, key):
682 """x.__getitem__(y) <==> x[y]""" 683 # Pass the offset to the proxy 684 row = self._con.execute( 685 "SELECT file_number, offset FROM offset_data WHERE key=?;", 686 (key,)).fetchone() 687 if not row: 688 raise KeyError 689 file_number, offset = row 690 proxies = self._proxies 691 if file_number in proxies: 692 record = proxies[file_number].get(offset) 693 else: 694 if len(proxies) >= self._max_open: 695 # Close an old handle... 696 proxies.popitem()[1]._handle.close() 697 # Open a new handle... 698 proxy = self._proxy_factory(self._format, self._filenames[file_number]) 699 record = proxy.get(offset) 700 proxies[file_number] = proxy 701 if self._key_function: 702 key2 = self._key_function(record.id) 703 else: 704 key2 = record.id 705 if key != key2: 706 raise ValueError("Key did not match (%s vs %s)" % (key, key2)) 707 return record
708
709 - def get(self, k, d=None):
710 """D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.""" 711 try: 712 return self.__getitem__(k) 713 except KeyError: 714 return d
715
716 - def get_raw(self, key):
717 """Similar to the get method, but returns the record as a raw string. 718 719 If the key is not found, a KeyError exception is raised. 720 721 Note that on Python 3 a bytes string is returned, not a typical 722 unicode string. 723 724 **NOTE** - This functionality is not supported for every file format. 725 """ 726 # Pass the offset to the proxy 727 row = self._con.execute( 728 "SELECT file_number, offset, length FROM offset_data WHERE key=?;", 729 (key,)).fetchone() 730 if not row: 731 raise KeyError 732 file_number, offset, length = row 733 proxies = self._proxies 734 if file_number in proxies: 735 if length: 736 # Shortcut if we have the length 737 h = proxies[file_number]._handle 738 h.seek(offset) 739 return h.read(length) 740 else: 741 return proxies[file_number].get_raw(offset) 742 else: 743 # This code is duplicated from __getitem__ to avoid a function call 744 if len(proxies) >= self._max_open: 745 # Close an old handle... 746 proxies.popitem()[1]._handle.close() 747 # Open a new handle... 748 proxy = self._proxy_factory(self._format, self._filenames[file_number]) 749 proxies[file_number] = proxy 750 if length: 751 # Shortcut if we have the length 752 h = proxy._handle 753 h.seek(offset) 754 return h.read(length) 755 else: 756 return proxy.get_raw(offset)
757
758 - def close(self):
759 """Close any open file handles.""" 760 proxies = self._proxies 761 while proxies: 762 proxies.popitem()[1]._handle.close()
763