Package BioSQL :: Module BioSeqDatabase
[hide private]
[frames] | no frames]

Source Code for Module BioSQL.BioSeqDatabase

  1  # Copyright 2002 by Andrew Dalke.  All rights reserved. 
  2  # Revisions 2007-2009 copyright by Peter Cock.  All rights reserved. 
  3  # Revisions 2009 copyright by Cymon J. Cox.  All rights reserved. 
  4  # This code is part of the Biopython distribution and governed by its 
  5  # license.  Please see the LICENSE file that should have been included 
  6  # as part of this package. 
  7  # 
  8  # Note that BioSQL (including the database schema and scripts) is 
  9  # available and licensed separately.  Please consult www.biosql.org 
 10  """Connect with a BioSQL database and load Biopython like objects from it. 
 11   
 12  This provides interfaces for loading biological objects from a relational 
 13  database, and is compatible with the BioSQL standards. 
 14  """ 
 15  from Bio import BiopythonDeprecationWarning 
 16   
 17  import BioSeq 
 18  import Loader 
 19  import DBUtils 
 20   
 21  _POSTGRES_RULES_PRESENT = False  # Hack for BioSQL Bug 2839 
 22   
 23   
24 -def open_database(driver="MySQLdb", **kwargs):
25 """Main interface for loading a existing BioSQL-style database. 26 27 This function is the easiest way to retrieve a connection to a 28 database, doing something like: 29 30 >>> from BioSeq import BioSeqDatabase 31 >>> server = BioSeqDatabase.open_database(user="root", db="minidb") 32 33 the various options are: 34 driver -> The name of the database driver to use for connecting. The 35 driver should implement the python DB API. By default, the MySQLdb 36 driver is used. 37 user -> the username to connect to the database with. 38 password, passwd -> the password to connect with 39 host -> the hostname of the database 40 database or db -> the name of the database 41 """ 42 if driver == "psycopg": 43 raise ValueError("Using BioSQL with psycopg (version one) is no " 44 "longer supported. Use psycopg2 instead.") 45 46 module = __import__(driver) 47 connect = getattr(module, "connect") 48 49 # Different drivers use different keywords... 50 kw = kwargs.copy() 51 if driver == "MySQLdb": 52 if "database" in kw: 53 kw["db"] = kw["database"] 54 del kw["database"] 55 if "password" in kw: 56 kw["passwd"] = kw["password"] 57 del kw["password"] 58 else: 59 # DB-API recommendations 60 if "db" in kw: 61 kw["database"] = kw["db"] 62 del kw["db"] 63 if "passwd" in kw: 64 kw["password"] = kw["passwd"] 65 del kw["passwd"] 66 if driver in ["psycopg2", "pgdb"] and not kw.get("database"): 67 kw["database"] = "template1" 68 # SQLite connect takes the database name as input 69 if driver in ["sqlite3"]: 70 conn = connect(kw["database"]) 71 else: 72 try: 73 conn = connect(**kw) 74 except module.InterfaceError: 75 # Ok, so let's try building a DSN 76 # (older releases of psycopg need this) 77 if "database" in kw: 78 kw["dbname"] = kw["database"] 79 del kw["database"] 80 elif "db" in kw: 81 kw["dbname"] = kw["db"] 82 del kw["db"] 83 dsn = ' '.join(['='.join(i) for i in kw.items()]) 84 conn = connect(dsn) 85 86 server = DBServer(conn, module) 87 88 # TODO - Remove the following once BioSQL Bug 2839 is fixed. 89 # Test for RULES in PostgreSQL schema, see also Bug 2833. 90 if driver in ["psycopg2", "pgdb"]: 91 sql = "SELECT ev_class FROM pg_rewrite WHERE " + \ 92 "rulename='rule_bioentry_i1' OR " + \ 93 "rulename='rule_bioentry_i2';" 94 if server.adaptor.execute_and_fetchall(sql): 95 import warnings 96 from Bio import BiopythonWarning 97 warnings.warn("Your BioSQL PostgreSQL schema includes some " 98 "rules currently required for bioperl-db but " 99 "which may cause problems loading data using " 100 "Biopython (see BioSQL Bug 2839). If you do not " 101 "use BioPerl, please remove these rules. " 102 "Biopython should cope with the rules present, " 103 "but with a performance penalty when loading " 104 "new records.", BiopythonWarning) 105 global _POSTGRES_RULES_PRESENT 106 _POSTGRES_RULES_PRESENT = True 107 108 return server
109 110
111 -class DBServer:
112 """Represents a BioSQL database continaing namespaces (sub-databases). 113 114 This acts like a Python dictionary, giving access to each namespace 115 (defined by a row in the biodatabase table) as a BioSeqDatabase object. 116 """
117 - def __init__(self, conn, module, module_name=None):
118 self.module = module 119 if module_name is None: 120 module_name = module.__name__ 121 self.adaptor = Adaptor(conn, DBUtils.get_dbutils(module_name)) 122 self.module_name = module_name
123
124 - def __repr__(self):
125 return self.__class__.__name__ + "(%r)" % self.adaptor.conn
126
127 - def __getitem__(self, name):
128 return BioSeqDatabase(self.adaptor, name)
129
130 - def __len__(self):
131 """Number of namespaces (sub-databases) in this database.""" 132 sql = "SELECT COUNT(name) FROM biodatabase;" 133 return int(self.adaptor.execute_and_fetch_col0(sql)[0])
134
135 - def __contains__(self, value):
136 """Check if a namespace (sub-database) in this database.""" 137 sql = "SELECT COUNT(name) FROM biodatabase WHERE name=%s;" 138 return bool(self.adaptor.execute_and_fetch_col0(sql, (value,))[0])
139
140 - def __iter__(self):
141 """Iterate over namespaces (sub-databases) in the database.""" 142 #TODO - Iterate over the cursor, much more efficient 143 return iter(self.adaptor.list_biodatabase_names())
144 145 if hasattr(dict, "iteritems"): 146 #Python 2, use iteritems etc
147 - def keys(self):
148 """List of namespaces (sub-databases) in the database.""" 149 return self.adaptor.list_biodatabase_names()
150
151 - def values(self):
152 """List of BioSeqDatabase objects in the database.""" 153 return [self[key] for key in self.keys()]
154
155 - def items(self):
156 """List of (namespace, BioSeqDatabase) for entries in the database.""" 157 return [(key, self[key]) for key in self.keys()]
158
159 - def iterkeys(self):
160 """Iterate over namespaces (sub-databases) in the database.""" 161 return iter(self)
162
163 - def itervalues(self):
164 """Iterate over BioSeqDatabase objects in the database.""" 165 for key in self: 166 yield self[key]
167
168 - def iteritems(self):
169 """Iterate over (namespace, BioSeqDatabase) in the database.""" 170 for key in self: 171 yield key, self[key]
172 else: 173 #Python 3, items etc are all iterators
174 - def keys(self):
175 """Iterate over namespaces (sub-databases) in the database.""" 176 return iter(self)
177
178 - def values(self):
179 """Iterate over BioSeqDatabase objects in the database.""" 180 for key in self: 181 yield self[key]
182
183 - def items(self):
184 """Iterate over (namespace, BioSeqDatabase) in the database.""" 185 for key in self: 186 yield key, self[key]
187
188 - def __delitem__(self, name):
189 """Remove a namespace and all its entries.""" 190 if name not in self: 191 raise KeyError(name) 192 self.remove_database(name)
193
194 - def remove_database(self, db_name):
195 """Remove a namespace and all its entries (OBSOLETE). 196 197 Try to remove all references to items in a database. 198 199 server.remove_database(name) 200 201 In keeping with the dictionary interface, you can now do this: 202 203 del server[name] 204 """ 205 import warnings 206 warnings.warn("This method is deprecated. In keeping with the " 207 "dictionary interface, you can now use 'del " 208 "server[name]' instead", BiopythonDeprecationWarning) 209 db_id = self.adaptor.fetch_dbid_by_dbname(db_name) 210 remover = Loader.DatabaseRemover(self.adaptor, db_id) 211 remover.remove()
212
213 - def new_database(self, db_name, authority=None, description=None):
214 """Add a new database to the server and return it. 215 """ 216 # make the database 217 sql = r"INSERT INTO biodatabase (name, authority, description)" \ 218 r" VALUES (%s, %s, %s)" 219 self.adaptor.execute(sql, (db_name, authority, description)) 220 return BioSeqDatabase(self.adaptor, db_name)
221
222 - def load_database_sql(self, sql_file):
223 """Load a database schema into the given database. 224 225 This is used to create tables, etc when a database is first created. 226 sql_file should specify the complete path to a file containing 227 SQL entries for building the tables. 228 """ 229 # Not sophisticated enough for PG schema. Is it needed by MySQL? 230 # Looks like we need this more complicated way for both. Leaving it 231 # the default and removing the simple-minded approach. 232 233 # read the file with all comment lines removed 234 sql_handle = open(sql_file, "rU") 235 sql = r"" 236 for line in sql_handle: 237 if line.startswith("--"): # don't include comment lines 238 pass 239 elif line.startswith("#"): # ditto for MySQL comments 240 pass 241 elif line.strip(): # only include non-blank lines 242 sql += line.strip() 243 sql += ' ' 244 245 # two ways to load the SQL 246 # 1. PostgreSQL can load it all at once and actually needs to 247 # due to FUNCTION defines at the end of the SQL which mess up 248 # the splitting by semicolons 249 if self.module_name in ["psycopg2", "pgdb"]: 250 self.adaptor.cursor.execute(sql) 251 # 2. MySQL needs the database loading split up into single lines of 252 # SQL executed one at a time 253 elif self.module_name in ["MySQLdb", "sqlite3"]: 254 sql_parts = sql.split(";") # one line per sql command 255 for sql_line in sql_parts[:-1]: # don't use the last item, it's blank 256 self.adaptor.cursor.execute(sql_line) 257 else: 258 raise ValueError("Module %s not supported by the loader." % 259 (self.module_name))
260
261 - def commit(self):
262 """Commits the current transaction to the database.""" 263 return self.adaptor.commit()
264
265 - def rollback(self):
266 """Rolls backs the current transaction.""" 267 return self.adaptor.rollback()
268
269 - def close(self):
270 """Close the connection. No further activity possible.""" 271 return self.adaptor.close()
272 273
274 -class Adaptor:
275 - def __init__(self, conn, dbutils):
276 self.conn = conn 277 self.cursor = conn.cursor() 278 self.dbutils = dbutils
279
280 - def last_id(self, table):
281 return self.dbutils.last_id(self.cursor, table)
282
283 - def autocommit(self, y=True):
284 """Set the autocommit mode. True values enable; False value disable.""" 285 return self.dbutils.autocommit(self.conn, y)
286
287 - def commit(self):
288 """Commits the current transaction.""" 289 return self.conn.commit()
290
291 - def rollback(self):
292 """Rolls backs the current transaction.""" 293 return self.conn.rollback()
294
295 - def close(self):
296 """Close the connection. No further activity possible.""" 297 return self.conn.close()
298
299 - def fetch_dbid_by_dbname(self, dbname):
300 self.execute( 301 r"select biodatabase_id from biodatabase where name = %s", 302 (dbname,)) 303 rv = self.cursor.fetchall() 304 if not rv: 305 raise KeyError("Cannot find biodatabase with name %r" % dbname) 306 # Cannot happen (UK) 307 ## assert len(rv) == 1, "More than one biodatabase with name %r" % dbname 308 return rv[0][0]
309
310 - def fetch_seqid_by_display_id(self, dbid, name):
311 sql = r"select bioentry_id from bioentry where name = %s" 312 fields = [name] 313 if dbid: 314 sql += " and biodatabase_id = %s" 315 fields.append(dbid) 316 self.execute(sql, fields) 317 rv = self.cursor.fetchall() 318 if not rv: 319 raise IndexError("Cannot find display id %r" % name) 320 if len(rv) > 1: 321 raise IndexError("More than one entry with display id %r" % name) 322 return rv[0][0]
323
324 - def fetch_seqid_by_accession(self, dbid, name):
325 sql = r"select bioentry_id from bioentry where accession = %s" 326 fields = [name] 327 if dbid: 328 sql += " and biodatabase_id = %s" 329 fields.append(dbid) 330 self.execute(sql, fields) 331 rv = self.cursor.fetchall() 332 if not rv: 333 raise IndexError("Cannot find accession %r" % name) 334 if len(rv) > 1: 335 raise IndexError("More than one entry with accession %r" % name) 336 return rv[0][0]
337
338 - def fetch_seqids_by_accession(self, dbid, name):
339 sql = r"select bioentry_id from bioentry where accession = %s" 340 fields = [name] 341 if dbid: 342 sql += " and biodatabase_id = %s" 343 fields.append(dbid) 344 return self.execute_and_fetch_col0(sql, fields)
345
346 - def fetch_seqid_by_version(self, dbid, name):
347 acc_version = name.split(".") 348 if len(acc_version) > 2: 349 raise IndexError("Bad version %r" % name) 350 acc = acc_version[0] 351 if len(acc_version) == 2: 352 version = acc_version[1] 353 else: 354 version = "0" 355 sql = r"SELECT bioentry_id FROM bioentry WHERE accession = %s" \ 356 r" AND version = %s" 357 fields = [acc, version] 358 if dbid: 359 sql += " and biodatabase_id = %s" 360 fields.append(dbid) 361 self.execute(sql, fields) 362 rv = self.cursor.fetchall() 363 if not rv: 364 raise IndexError("Cannot find version %r" % name) 365 if len(rv) > 1: 366 raise IndexError("More than one entry with version %r" % name) 367 return rv[0][0]
368
369 - def fetch_seqid_by_identifier(self, dbid, identifier):
370 # YB: was fetch_seqid_by_seqid 371 sql = "SELECT bioentry_id FROM bioentry WHERE identifier = %s" 372 fields = [identifier] 373 if dbid: 374 sql += " and biodatabase_id = %s" 375 fields.append(dbid) 376 self.execute(sql, fields) 377 rv = self.cursor.fetchall() 378 if not rv: 379 raise IndexError("Cannot find display id %r" % identifier) 380 return rv[0][0]
381
382 - def list_biodatabase_names(self):
383 return self.execute_and_fetch_col0( 384 "SELECT name FROM biodatabase")
385
386 - def list_bioentry_ids(self, dbid):
387 return self.execute_and_fetch_col0( 388 "SELECT bioentry_id FROM bioentry WHERE biodatabase_id = %s", 389 (dbid,))
390
391 - def list_bioentry_display_ids(self, dbid):
392 return self.execute_and_fetch_col0( 393 "SELECT name FROM bioentry WHERE biodatabase_id = %s", 394 (dbid,))
395
396 - def list_any_ids(self, sql, args):
397 """Return ids given a SQL statement to select for them. 398 399 This assumes that the given SQL does a SELECT statement that 400 returns a list of items. This parses them out of the 2D list 401 they come as and just returns them in a list. 402 """ 403 return self.execute_and_fetch_col0(sql, args)
404
405 - def execute_one(self, sql, args=None):
406 self.execute(sql, args or ()) 407 rv = self.cursor.fetchall() 408 assert len(rv) == 1, "Expected 1 response, got %d" % len(rv) 409 return rv[0]
410
411 - def execute(self, sql, args=None):
412 """Just execute an sql command. 413 """ 414 self.dbutils.execute(self.cursor, sql, args)
415
416 - def get_subseq_as_string(self, seqid, start, end):
417 length = end - start 418 # XXX Check this on MySQL and PostgreSQL. substr should be general, 419 # does it need dbutils? 420 #return self.execute_one( 421 # """select SUBSTRING(seq FROM %s FOR %s) 422 # from biosequence where bioentry_id = %s""", 423 # (start+1, length, seqid))[0] 424 # 425 # Convert to a string on returning for databases that give back 426 # unicode. Shouldn't need unicode for sequences so this seems safe. 427 return str(self.execute_one( 428 """select SUBSTR(seq, %s, %s) 429 from biosequence where bioentry_id = %s""", 430 (start + 1, length, seqid))[0])
431
432 - def execute_and_fetch_col0(self, sql, args=None):
433 self.execute(sql, args or ()) 434 return [field[0] for field in self.cursor.fetchall()]
435
436 - def execute_and_fetchall(self, sql, args=None):
437 self.execute(sql, args or ()) 438 return self.cursor.fetchall()
439 440 _allowed_lookups = { 441 # Lookup name / function name to get id, function to list all ids 442 'primary_id': "fetch_seqid_by_identifier", 443 'gi': "fetch_seqid_by_identifier", 444 'display_id': "fetch_seqid_by_display_id", 445 'name': "fetch_seqid_by_display_id", 446 'accession': "fetch_seqid_by_accession", 447 'version': "fetch_seqid_by_version", 448 } 449 450
451 -class BioSeqDatabase:
452 """Represents a namespace (sub-database) within the BioSQL database. 453 454 i.e. One row in the biodatabase table, and all all rows in the bioentry 455 table associated with it. 456 """
457 - def __init__(self, adaptor, name):
458 self.adaptor = adaptor 459 self.name = name 460 self.dbid = self.adaptor.fetch_dbid_by_dbname(name)
461
462 - def __repr__(self):
463 return "BioSeqDatabase(%r, %r)" % (self.adaptor, self.name)
464
465 - def get_Seq_by_id(self, name):
466 """Gets a DBSeqRecord object by its name 467 468 Example: seq_rec = db.get_Seq_by_id('ROA1_HUMAN') 469 470 The name of this method is misleading since it returns a DBSeqRecord 471 rather than a DBSeq ojbect, and presumably was to mirror BioPerl. 472 """ 473 seqid = self.adaptor.fetch_seqid_by_display_id(self.dbid, name) 474 return BioSeq.DBSeqRecord(self.adaptor, seqid)
475
476 - def get_Seq_by_acc(self, name):
477 """Gets a DBSeqRecord object by accession number 478 479 Example: seq_rec = db.get_Seq_by_acc('X77802') 480 481 The name of this method is misleading since it returns a DBSeqRecord 482 rather than a DBSeq ojbect, and presumably was to mirror BioPerl. 483 """ 484 seqid = self.adaptor.fetch_seqid_by_accession(self.dbid, name) 485 return BioSeq.DBSeqRecord(self.adaptor, seqid)
486
487 - def get_Seq_by_ver(self, name):
488 """Gets a DBSeqRecord object by version number 489 490 Example: seq_rec = db.get_Seq_by_ver('X77802.1') 491 492 The name of this method is misleading since it returns a DBSeqRecord 493 rather than a DBSeq ojbect, and presumably was to mirror BioPerl. 494 """ 495 seqid = self.adaptor.fetch_seqid_by_version(self.dbid, name) 496 return BioSeq.DBSeqRecord(self.adaptor, seqid)
497
498 - def get_Seqs_by_acc(self, name):
499 """Gets a list of DBSeqRecord objects by accession number 500 501 Example: seq_recs = db.get_Seq_by_acc('X77802') 502 503 The name of this method is misleading since it returns a list of 504 DBSeqRecord objects rather than a list of DBSeq ojbects, and presumably 505 was to mirror BioPerl. 506 """ 507 seqids = self.adaptor.fetch_seqids_by_accession(self.dbid, name) 508 return [BioSeq.DBSeqRecord(self.adaptor, seqid) for seqid in seqids]
509
510 - def get_all_primary_ids(self):
511 """All the primary_ids of the sequences in the database (OBSOLETE). 512 513 These maybe ids (display style) or accession numbers or 514 something else completely different - they *are not* 515 meaningful outside of this database implementation. 516 517 Please use .keys() instead of .get_all_primary_ids() 518 """ 519 import warnings 520 warnings.warn("Use bio_seq_database.keys() instead of " 521 "bio_seq_database.get_all_primary_ids()", 522 BiopythonDeprecationWarning) 523 return self.keys()
524
525 - def __getitem__(self, key):
526 return BioSeq.DBSeqRecord(self.adaptor, key)
527
528 - def __delitem__(self, key):
529 """Remove an entry and all its annotation.""" 530 if key not in self: 531 raise KeyError(key) 532 #Assuming this will automatically cascade to the other tables... 533 sql = "DELETE FROM bioentry " + \ 534 "WHERE biodatabase_id=%s AND bioentry_id=%s;" 535 self.adaptor.execute(sql, (self.dbid, key))
536
537 - def __len__(self):
538 """Number of records in this namespace (sub database).""" 539 sql = "SELECT COUNT(bioentry_id) FROM bioentry " + \ 540 "WHERE biodatabase_id=%s;" 541 return int(self.adaptor.execute_and_fetch_col0(sql, (self.dbid, ))[0])
542
543 - def __contains__(self, value):
544 """Check if a primary (internal) id is this namespace (sub database).""" 545 sql = "SELECT COUNT(bioentry_id) FROM bioentry " + \ 546 "WHERE biodatabase_id=%s AND bioentry_id=%s;" 547 #The bioentry_id field is an integer in the schema. 548 #PostgreSQL will throw an error if we use a non integer in the query. 549 try: 550 bioentry_id = int(value) 551 except ValueError: 552 return False 553 return bool(self.adaptor.execute_and_fetch_col0(sql, 554 (self.dbid, bioentry_id))[0])
555
556 - def __iter__(self):
557 """Iterate over ids (which may not be meaningful outside this database).""" 558 #TODO - Iterate over the cursor, much more efficient 559 return iter(self.adaptor.list_bioentry_ids(self.dbid))
560 561 if hasattr(dict, "iteritems"): 562 #Python 2, use iteritems etc
563 - def keys(self):
564 """List of ids which may not be meaningful outside this database.""" 565 return self.adaptor.list_bioentry_ids(self.dbid)
566
567 - def values(self):
568 """List of DBSeqRecord objects in the namespace (sub database).""" 569 return [self[key] for key in self.keys()]
570
571 - def items(self):
572 """List of (id, DBSeqRecord) for the namespace (sub database).""" 573 return [(key, self[key]) for key in self.keys()]
574
575 - def iterkeys(self):
576 """Iterate over ids (which may not be meaningful outside this database).""" 577 return iter(self)
578
579 - def itervalues(self):
580 """Iterate over DBSeqRecord objects in the namespace (sub database).""" 581 for key in self: 582 yield self[key]
583
584 - def iteritems(self):
585 """Iterate over (id, DBSeqRecord) for the namespace (sub database).""" 586 for key in self: 587 yield key, self[key]
588 else: 589 #Python 3, items etc are all iterators
590 - def keys(self):
591 """Iterate over ids (which may not be meaningful outside this database).""" 592 return iter(self)
593
594 - def values(self):
595 """Iterate over DBSeqRecord objects in the namespace (sub database).""" 596 for key in self: 597 yield self[key]
598
599 - def items(self):
600 """Iterate over (id, DBSeqRecord) for the namespace (sub database).""" 601 for key in self: 602 yield key, self[key]
603
604 - def lookup(self, **kwargs):
605 if len(kwargs) != 1: 606 raise TypeError("single key/value parameter expected") 607 k, v = kwargs.items()[0] 608 if k not in _allowed_lookups: 609 raise TypeError("lookup() expects one of %s, not %r" % 610 (repr(_allowed_lookups.keys())[1:-1], repr(k))) 611 lookup_name = _allowed_lookups[k] 612 lookup_func = getattr(self.adaptor, lookup_name) 613 seqid = lookup_func(self.dbid, v) 614 return BioSeq.DBSeqRecord(self.adaptor, seqid)
615
616 - def get_Seq_by_primary_id(self, seqid):
617 """Get a DBSeqRecord by the primary (internal) id (OBSOLETE). 618 619 Rather than db.get_Seq_by_primary_id(my_id) use db[my_id] 620 621 The name of this method is misleading since it returns a DBSeqRecord 622 rather than a DBSeq ojbect, and presumably was to mirror BioPerl. 623 """ 624 import warnings 625 warnings.warn("Use bio_seq_database[my_id] instead of " 626 "bio_seq_database.get_Seq_by_primary_id(my_id)", 627 BiopythonDeprecationWarning) 628 return self[seqid]
629
630 - def load(self, record_iterator, fetch_NCBI_taxonomy=False):
631 """Load a set of SeqRecords into the BioSQL database. 632 633 record_iterator is either a list of SeqRecord objects, or an 634 Iterator object that returns SeqRecord objects (such as the 635 output from the Bio.SeqIO.parse() function), which will be 636 used to populate the database. 637 638 fetch_NCBI_taxonomy is boolean flag allowing or preventing 639 connection to the taxonomic database on the NCBI server 640 (via Bio.Entrez) to fetch a detailed taxonomy for each 641 SeqRecord. 642 643 Example: 644 from Bio import SeqIO 645 count = db.load(SeqIO.parse(open(filename), format)) 646 647 Returns the number of records loaded. 648 """ 649 db_loader = Loader.DatabaseLoader(self.adaptor, self.dbid, 650 fetch_NCBI_taxonomy) 651 num_records = 0 652 global _POSTGRES_RULES_PRESENT 653 for cur_record in record_iterator: 654 num_records += 1 655 #Hack to work arround BioSQL Bug 2839 - If using PostgreSQL and 656 #the RULES are present check for a duplicate record before loading 657 if _POSTGRES_RULES_PRESENT: 658 #Recreate what the Loader's _load_bioentry_table will do: 659 if cur_record.id.count(".") == 1: 660 accession, version = cur_record.id.split('.') 661 try: 662 version = int(version) 663 except ValueError: 664 accession = cur_record.id 665 version = 0 666 else: 667 accession = cur_record.id 668 version = 0 669 gi = cur_record.annotations.get("gi", None) 670 sql = "SELECT bioentry_id FROM bioentry WHERE (identifier " + \ 671 "= '%s' AND biodatabase_id = '%s') OR (accession = " + \ 672 "'%s' AND version = '%s' AND biodatabase_id = '%s')" 673 self.adaptor.execute(sql % (gi, self.dbid, accession, version, self.dbid)) 674 if self.adaptor.cursor.fetchone(): 675 raise self.adaptor.conn.IntegrityError("Duplicate record " 676 "detected: record has not been inserted") 677 #End of hack 678 db_loader.load_seqrecord(cur_record) 679 return num_records
680