Package Bio :: Package motifs :: Package jaspar :: Module db
[hide private]
[frames] | no frames]

Source Code for Module Bio.motifs.jaspar.db

  1  # Copyright 2013 by David Arenillas and Anthony Mathelier. All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license. Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  """Provides read access to a JASPAR5 formatted database. 
  6   
  7  This modules requires MySQLdb to be installed. 
  8   
  9  Example, substitute the your database credentials as 
 10  appropriate: 
 11   
 12      >>> from Bio.motifs.jaspar.db import JASPAR5 
 13      >>> 
 14      >>> JASPAR_DB_HOST = "hostname.example.org" 
 15      >>> JASPAR_DB_NAME = "JASPAR_2013" 
 16      >>> JASPAR_DB_USER = "guest" 
 17      >>> JASPAR_DB_PASS = "guest" 
 18      >>> 
 19      >>> DFLT_COLLECTION = 'CORE' 
 20      >>> jdb = JASPAR5( 
 21      ...     host=JASPAR_DB_HOST, 
 22      ...     name=JASPAR_DB_NAME, 
 23      ...     user=JASPAR_DB_USER, 
 24      ...     password=JASPAR_DB_PASS 
 25      ... ) 
 26      >>> 
 27      >>> 
 28      >>> ets1 = jdb.fetch_motif_by_id('MA0098') 
 29      >>> print(ets1) 
 30      TF name ETS1 
 31      Matrix ID   MA0098.1 
 32      Collection  CORE 
 33      TF class    Winged Helix-Turn-Helix 
 34      TF family   Ets 
 35      Species 9606 
 36      Taxonomic group vertebrates 
 37      Accession   ['CAG47050'] 
 38      Data type used  SELEX 
 39      Medline 1542566 
 40      PAZAR ID    TF0000070 
 41      Comments    - 
 42      Matrix: 
 43              0      1      2      3      4      5 
 44      A:   4.00  17.00   0.00   0.00   0.00   5.00 
 45      C:  16.00   0.00   1.00  39.00  39.00   3.00 
 46      G:   4.00   0.00   0.00   1.00   0.00  17.00 
 47      T:  16.00  23.00  39.00   0.00   1.00  15.00 
 48   
 49   
 50      >>> 
 51      >>> motifs = jdb.fetch_motifs( 
 52      ...     collection = 'CORE', 
 53      ...     tax_group = ['vertebrates', 'insects'], 
 54      ...     tf_class = 'Winged Helix-Turn-Helix', 
 55      ...     tf_family = ['Forkhead', 'Ets'], 
 56      ...     min_ic = 12 
 57      ... ) 
 58      >>> 
 59      >>> for motif in motifs: 
 60      ...     pass # do something with the motif 
 61   
 62  """ 
 63   
 64  from __future__ import print_function 
 65   
 66  import warnings 
 67  from Bio import BiopythonWarning 
 68  from Bio import MissingPythonDependencyError 
 69   
 70  try: 
 71      import MySQLdb as mdb 
 72  except: 
 73      raise MissingPythonDependencyError("Install MySQLdb if you want to use " 
 74                                         "Bio.motifs.jaspar.db") 
 75   
 76  from Bio.Alphabet.IUPAC import unambiguous_dna as dna 
 77  from Bio.motifs import jaspar, matrix 
 78   
 79  __docformat__ = "restructuredtext en" 
 80   
 81  JASPAR_DFLT_COLLECTION = 'CORE' 
 82   
 83   
84 -class JASPAR5(object):
85 """ 86 Class representing a JASPAR5 DB. The methods within are loosely based 87 on the perl TFBS::DB::JASPAR5 module. 88 89 Note: We will only implement reading of JASPAR motifs from the DB. 90 Unlike the perl module, we will not attempt to implement any methods to 91 store JASPAR motifs or create a new DB at this time. 92 93 """ 94
95 - def __init__(self, host=None, name=None, user=None, password=None):
96 """ 97 Construct a JASPAR5 instance and connect to specified DB 98 99 Arguments: 100 host - host name of the the JASPAR DB server 101 name - name of the JASPAR database 102 user - user name to connect to the JASPAR DB 103 password - JASPAR DB password 104 105 """ 106 107 self.name = name 108 self.host = host 109 self.user = user 110 self.password = password 111 112 self.dbh = mdb.connect(host, user, password, name)
113
114 - def __str__(self):
115 """ 116 Return a string represention of the JASPAR5 DB connection. 117 118 """ 119 120 text = "%s\@%s:%s" % (self.user, self.host, self.name) 121 122 return text
123
124 - def fetch_motif_by_id(self, id):
125 """ 126 Fetch a single JASPAR motif from the DB by it's JASPAR matrix ID 127 (e.g. 'MA0001.1'). 128 129 Arguments: 130 131 - id - JASPAR matrix ID. This may be a fully specified ID including 132 the version number (e.g. MA0049.2) or just the base ID (e.g. 133 MA0049). If only a base ID is provided, the latest version is 134 returned. 135 136 Returns: 137 138 - A Bio.motifs.jaspar.Motif object 139 140 **NOTE:** The perl TFBS module allows you to specify the type of matrix 141 to return (PFM, PWM, ICM) but matrices are always stored in JASPAR as 142 PFMs so this does not really belong here. Once a PFM is fetched the 143 pwm() and pssm() methods can be called to return the normalized and 144 log-odds matrices. 145 146 """ 147 148 # separate stable ID and version number 149 (base_id, version) = jaspar.split_jaspar_id(id) 150 if not version: 151 # if ID contains no version portion, fetch the latest version 152 version = self._fetch_latest_version(base_id) 153 154 # fetch internal JASPAR matrix ID - also a check for validity 155 int_id = None 156 if version: 157 int_id = self._fetch_internal_id(base_id, version) 158 159 # fetch JASPAR motif using internal ID 160 motif = None 161 if int_id: 162 motif = self._fetch_motif_by_internal_id(int_id) 163 164 return motif
165
166 - def fetch_motifs_by_name(self, name):
167 """ 168 Fetch a list of JASPAR motifs from a JASPAR DB by the given TF name(s). 169 170 Arguments: 171 name - a single name or list of names 172 Returns: 173 A list of Bio.motifs.Motif.japar objects 174 175 Notes: 176 Names are not guaranteed to be unique. There may be more than one 177 motif with the same name. Therefore even if name specifies a single 178 name, a list of motifs is returned. This just calls 179 self.fetch_motifs(collection = None, tf_name = name). 180 181 This behaviour is different from the TFBS perl module's 182 get_Matrix_by_name() method which always returns a single matrix, 183 issuing a warning message and returning the first matrix retrieved 184 in the case where multiple matrices have the same name. 185 186 """ 187 188 return self.fetch_motifs(collection=None, tf_name=name)
189
190 - def fetch_motifs( 191 self, collection=JASPAR_DFLT_COLLECTION, tf_name=None, tf_class=None, 192 tf_family=None, matrix_id=None, tax_group=None, species=None, 193 pazar_id=None, data_type=None, medline=None, min_ic=0, min_length=0, 194 min_sites=0, all=False, all_versions=False 195 ):
196 """ 197 Fetch a jaspar.Record (list) of motifs based on the provided selection 198 criteria. 199 200 Arguments:: 201 202 Except where obvious, all selection criteria arguments may be 203 specified as a single value or a list of values. Motifs must 204 meet ALL the specified selection criteria to be returned with 205 the precedent exceptions noted below. 206 207 all - Takes precedent of all other selection criteria. 208 Every motif is returned. If 'all_versions' is also 209 specified, all versions of every motif are returned, 210 otherwise just the latest version of every motif is 211 returned. 212 matrix_id - Takes precedence over all other selection criteria 213 except 'all'. Only motifs with the given JASPAR 214 matrix ID(s) are returned. A matrix ID may be 215 specified as just a base ID or full JASPAR IDs 216 including version number. If only a base ID is 217 provided for specific motif(s), then just the latest 218 version of those motif(s) are returned unless 219 'all_versions' is also specified. 220 collection - Only motifs from the specified JASPAR collection(s) 221 are returned. NOTE - if not specified, the collection 222 defaults to CORE for all other selection criteria 223 except 'all' and 'matrix_id'. To apply the other 224 selection criteria across all JASPAR collections, 225 explicitly set collection=None. 226 tf_name - Only motifs with the given name(s) are returned. 227 tf_class - Only motifs of the given TF class(es) are returned. 228 tf_family - Only motifs from the given TF families are returned. 229 tax_group - Only motifs belonging to the given taxonomic 230 supergroups are returned (e.g. 'vertebrates', 231 'insects', 'nematodes' etc.) 232 species - Only motifs derived from the given species are 233 returned. Species are specified as taxonomy IDs. 234 data_type - Only motifs generated with the given data type (e.g. 235 ('ChIP-seq', 'PBM', 'SELEX' etc.) are returned. 236 NOTE - must match exactly as stored in the database. 237 pazar_id - Only motifs with the given PAZAR TF ID are returned. 238 medline - Only motifs with the given medline (PubmMed IDs) are 239 returned. 240 min_ic - Only motifs whose profile matrices have at least this 241 information content (specificty) are returned. 242 min_length - Only motifs whose profiles are of at least this 243 length are returned. 244 min_sites - Only motifs compiled from at least these many binding 245 sites are returned. 246 all_versions- Unless specified, just the latest version of motifs 247 determined by the other selection criteria are 248 returned. Otherwise all versions of the selected 249 motifs are returned. 250 251 Returns: 252 253 - A Bio.motifs.jaspar.Record (list) of motifs. 254 255 """ 256 257 # Fetch the internal IDs of the motifs using the criteria provided 258 int_ids = self._fetch_internal_id_list( 259 collection=collection, 260 tf_name=tf_name, 261 tf_class=tf_class, 262 tf_family=tf_family, 263 matrix_id=matrix_id, 264 tax_group=tax_group, 265 species=species, 266 pazar_id=pazar_id, 267 data_type=data_type, 268 medline=medline, 269 all=all, 270 all_versions=all_versions 271 ) 272 273 record = jaspar.Record() 274 275 """ 276 Now further filter motifs returned above based on any specified 277 matrix specific criteria. 278 """ 279 for int_id in int_ids: 280 motif = self._fetch_motif_by_internal_id(int_id) 281 282 # Filter motifs to those with matrix IC greater than min_ic 283 if min_ic: 284 if motif.pssm.mean() < min_ic: 285 continue 286 287 # Filter motifs to those with minimum length of min_length 288 if min_length: 289 if motif.length < min_length: 290 continue 291 292 # XXX We could also supply a max_length filter. 293 294 """ 295 Filter motifs to those composed of at least this many sites. 296 The perl TFBS module assumes column sums may be different but 297 this should be strictly enforced here we will ignore this and 298 just use the first column sum. 299 """ 300 if min_sites: 301 num_sites = sum( 302 [motif.counts[nt][0] for nt in motif.alphabet.letters] 303 ) 304 if num_sites < min_sites: 305 continue 306 307 record.append(motif) 308 309 return record
310
311 - def _fetch_latest_version(self, base_id):
312 """ 313 Get the latest version number for the given base_id, 314 315 """ 316 317 cur = self.dbh.cursor() 318 cur.execute("""select VERSION from MATRIX where BASE_id = %s 319 order by VERSION desc limit 1""", (base_id,)) 320 321 row = cur.fetchone() 322 323 latest = None 324 if row: 325 latest = row[0] 326 else: 327 warnings.warn("Failed to fetch latest version number for JASPAR motif with base ID '{0}'. No JASPAR motif with this base ID appears to exist in the database.".format(base_id), BiopythonWarning) 328 329 return latest
330
331 - def _fetch_internal_id(self, base_id, version):
332 """ 333 Fetch the internal id for a base id + version. Also checks if this 334 combo exists or not 335 336 """ 337 338 cur = self.dbh.cursor() 339 cur.execute("""select id from MATRIX where BASE_id = %s 340 and VERSION = %s""", (base_id, version)) 341 342 row = cur.fetchone() 343 344 int_id = None 345 if row: 346 int_id = row[0] 347 else: 348 warnings.warn("Failed to fetch internal database ID for JASPAR motif with matrix ID '{0}.{1}'. No JASPAR motif with this matrix ID appears to exist.".format(base_id, version), BiopythonWarning) 349 350 return int_id
351
352 - def _fetch_motif_by_internal_id(self, int_id):
353 # fetch basic motif information 354 cur = self.dbh.cursor() 355 cur.execute("""select BASE_ID, VERSION, COLLECTION, NAME from MATRIX 356 where id = %s""", (int_id,)) 357 358 row = cur.fetchone() 359 360 # This should never happen as it is an internal method. If it does 361 # we should probably raise an exception 362 if not row: 363 warnings.warn("Could not fetch JASPAR motif with internal ID = {0}".format(int_id), BiopythonWarning) 364 return None 365 366 base_id = row[0] 367 version = row[1] 368 collection = row[2] 369 name = row[3] 370 371 matrix_id = "".join([base_id, '.', str(version)]) 372 373 # fetch the counts matrix 374 counts = self._fetch_counts_matrix(int_id) 375 376 # Create new JASPAR motif 377 motif = jaspar.Motif( 378 matrix_id, name, collection=collection, counts=counts 379 ) 380 381 # fetch species 382 cur.execute("""select TAX_ID from MATRIX_SPECIES 383 where id = %s""", (int_id,)) 384 tax_ids = [] 385 rows = cur.fetchall() 386 for row in rows: 387 tax_ids.append(row[0]) 388 389 # Many JASPAR motifs (especially those not in the CORE collection) 390 # do not have taxonomy IDs. So this warning would get annoying. 391 #if not tax_ids: 392 # warnings.warn("Could not fetch any taxonomy IDs for JASPAR motif {0}".format(motif.matrix_id), BiopythonWarning) 393 394 motif.species = tax_ids 395 396 # fetch protein accession numbers 397 cur.execute("select ACC FROM MATRIX_PROTEIN where id = %s", (int_id,)) 398 accs = [] 399 rows = cur.fetchall() 400 for row in rows: 401 accs.append(row[0]) 402 403 # Similarly as for taxonomy IDs, it would get annoying to print 404 # warnings for JASPAR motifs which do not have accession numbers. 405 406 motif.acc = accs 407 408 # fetch remaining annotation as tags from the ANNOTATION table 409 cur.execute("""select TAG, VAL from MATRIX_ANNOTATION 410 where id = %s""", (int_id,)) 411 rows = cur.fetchall() 412 for row in rows: 413 attr = row[0] 414 val = row[1] 415 if attr == 'class': 416 motif.tf_class = val 417 elif attr == 'family': 418 motif.tf_family = val 419 elif attr == 'tax_group': 420 motif.tax_group = val 421 elif attr == 'type': 422 motif.data_type = val 423 elif attr == 'pazar_tf_id': 424 motif.pazar_id = val 425 elif attr == 'medline': 426 motif.medline = val 427 elif attr == 'comment': 428 motif.comment = val 429 else: 430 """ 431 TODO If we were to implement additional abitrary tags 432 motif.tag(attr, val) 433 """ 434 pass 435 436 return motif
437
438 - def _fetch_counts_matrix(self, int_id):
439 """ 440 Fetch the counts matrix from the JASPAR DB by the internal ID 441 442 Returns a Bio.motifs.matrix.GenericPositionMatrix 443 444 """ 445 counts = {} 446 cur = self.dbh.cursor() 447 448 for base in dna.letters: 449 base_counts = [] 450 451 cur.execute("""select val from MATRIX_DATA where ID = %s 452 and row = %s order by col""", (int_id, base)) 453 454 rows = cur.fetchall() 455 for row in rows: 456 base_counts.append(row[0]) 457 458 counts[base] = [float(x) for x in base_counts] 459 460 return matrix.GenericPositionMatrix(dna, counts)
461
462 - def _fetch_internal_id_list( 463 self, collection=JASPAR_DFLT_COLLECTION, tf_name=None, tf_class=None, 464 tf_family=None, matrix_id=None, tax_group=None, species=None, 465 pazar_id=None, data_type=None, medline=None, all=False, 466 all_versions=False 467 ):
468 """ 469 Fetch a list of internal JASPAR motif IDs based on various passed 470 parameters which may then be used to fetch the rest of the motif data. 471 472 Caller: 473 fetch_motifs() 474 475 Arguments: 476 See arguments sections of fetch_motifs() 477 478 Returns: 479 A list of internal JASPAR motif IDs which match the given 480 selection criteria arguments. 481 482 483 Build an SQL query based on the selection arguments provided. 484 485 1: First add table joins and sub-clauses for criteria corresponding to 486 named fields from the MATRIX and MATRIX_SPECIES tables such as 487 collection, matrix ID, name, species etc. 488 489 2: Then add joins/sub-clauses for tag/value parameters from the 490 MATRIX_ANNOTATION table. 491 492 For the surviving matrices, the responsibility to do matrix-based 493 feature filtering such as ic, number of sites etc, fall on the 494 calling fetch_motifs() method. 495 496 """ 497 498 int_ids = [] 499 500 cur = self.dbh.cursor() 501 502 """ 503 Special case 1: fetch ALL motifs. Highest priority. 504 Ignore all other selection arguments. 505 """ 506 if all: 507 cur.execute("select ID from MATRIX") 508 rows = cur.fetchall() 509 510 for row in rows: 511 int_ids.append(row[0]) 512 513 return int_ids 514 515 """ 516 Special case 2: fetch specific motifs by their JASPAR IDs. This 517 has higher priority than any other except the above 'all' case. 518 Ignore all other selection arguments. 519 """ 520 if matrix_id: 521 """ 522 These might be either stable IDs or stable_ID.version. 523 If just stable ID and if all_versions == 1, return all versions, 524 otherwise just the latest 525 """ 526 if all_versions: 527 for id in matrix_id: 528 # ignore vesion here, this is a stupidity filter 529 (base_id, version) = jaspar.split_jaspar_id(id) 530 cur.execute( 531 "select ID from MATRIX where BASE_ID = %s", (base_id,) 532 ) 533 534 rows = cur.fetchall() 535 for row in rows: 536 int_ids.append(row[0]) 537 else: 538 # only the lastest version, or the requested version 539 for id in matrix_id: 540 (base_id, version) = jaspar.split_jaspar_id(id) 541 542 if not version: 543 version = self._fetch_latest_version(base_id) 544 545 int_id = None 546 if version: 547 int_id = self._fetch_internal_id(base_id, version) 548 549 if int_id: 550 int_ids.append(int_id) 551 552 return int_ids 553 554 tables = ["MATRIX m"] 555 where_clauses = [] 556 557 # Select by MATRIX.COLLECTION 558 if collection: 559 if isinstance(collection, list): 560 # Multiple collections passed in as a list 561 clause = "m.COLLECTION in ('" 562 clause = "".join([clause, "','".join(collection)]) 563 clause = "".join([clause, "')"]) 564 else: 565 # A single collection - typical usage 566 clause = "m.COLLECTION = '%s'" % collection 567 568 where_clauses.append(clause) 569 570 # Select by MATRIX.NAME 571 if tf_name: 572 if isinstance(tf_name, list): 573 # Multiple names passed in as a list 574 clause = "m.NAME in ('" 575 clause = "".join([clause, "','".join(tf_name)]) 576 clause = "".join([clause, "')"]) 577 else: 578 # A single name 579 clause = "m.NAME = '%s'" % tf_name 580 581 where_clauses.append(clause) 582 583 # Select by MATRIX_SPECIES.TAX_ID 584 if species: 585 tables.append("MATRIX_SPECIES ms") 586 where_clauses.append("m.ID = ms.ID") 587 588 """ 589 NOTE: species are numeric taxonomy IDs but stored as varchars 590 in the DB. 591 """ 592 if isinstance(species, list): 593 # Multiple tax IDs passed in as a list 594 clause = "ms.TAX_ID in ('" 595 clause = "".join([clause, "','".join(str(s) for s in species)]) 596 clause = "".join([clause, "')"]) 597 else: 598 # A single tax ID 599 clause = "ms.TAX_ID = '%s'" % str(species) 600 601 where_clauses.append(clause) 602 603 """ 604 Tag based selection from MATRIX_ANNOTATION 605 Differs from perl TFBS module in that the matrix class explicitly 606 has a tag attribute corresponding to the tags in the database. This 607 provides tremendous flexibility in adding new tags to the DB and 608 being able to select based on those tags with out adding new code. 609 In the JASPAR Motif class we have elected to use specific attributes 610 for the most commonly used tags and here correspondingly only allow 611 selection on these attributes. 612 613 The attributes corresponding to the tags for which selection is 614 provided are: 615 616 Attribute Tag 617 tf_class class 618 tf_family family 619 pazar_id pazar_tf_id 620 medline medline 621 data_type type 622 tax_group tax_group 623 """ 624 625 # Select by TF class(es) (MATRIX_ANNOTATION.TAG="class") 626 if tf_class: 627 tables.append("MATRIX_ANNOTATION ma1") 628 where_clauses.append("m.ID = ma1.ID") 629 630 clause = "ma1.TAG = 'class'" 631 if isinstance(tf_class, list): 632 # A list of TF classes 633 clause = "".join([clause, " and ma1.VAL in ('"]) 634 clause = "".join([clause, "','".join(tf_class)]) 635 clause = "".join([clause, "')"]) 636 else: 637 # A single TF class 638 clause = "".join([clause, " and ma1.VAL = '%s' " % tf_class]) 639 640 where_clauses.append(clause) 641 642 # Select by TF families (MATRIX_ANNOTATION.TAG="family") 643 if tf_family: 644 tables.append("MATRIX_ANNOTATION ma2") 645 where_clauses.append("m.ID = ma2.ID") 646 647 clause = "ma2.TAG = 'family'" 648 if isinstance(tf_family, list): 649 # A list of TF families 650 clause = "".join([clause, " and ma2.VAL in ('"]) 651 clause = "".join([clause, "','".join(tf_family)]) 652 clause = "".join([clause, "')"]) 653 else: 654 # A single TF family 655 clause = "".join([clause, " and ma2.VAL = '%s' " % tf_family]) 656 657 where_clauses.append(clause) 658 659 # Select by PAZAR TF ID(s) (MATRIX_ANNOTATION.TAG="pazar_tf_id") 660 if pazar_id: 661 tables.append("MATRIX_ANNOTATION ma3") 662 where_clauses.append("m.ID = ma3.ID") 663 664 clause = "ma3.TAG = 'pazar_tf_id'" 665 if isinstance(pazar_id, list): 666 # A list of PAZAR IDs 667 clause = "".join([clause, " and ma3.VAL in ('"]) 668 clause = "".join([clause, "','".join(pazar_id)]) 669 clause = "".join([clause, "')"]) 670 else: 671 # A single PAZAR ID 672 clause = "".join([" and ma3.VAL = '%s' " % pazar_id]) 673 674 where_clauses.append(clause) 675 676 # Select by PubMed ID(s) (MATRIX_ANNOTATION.TAG="medline") 677 if medline: 678 tables.append("MATRIX_ANNOTATION ma4") 679 where_clauses.append("m.ID = ma4.ID") 680 681 clause = "ma4.TAG = 'medline'" 682 if isinstance(medline, list): 683 # A list of PubMed IDs 684 clause = "".join([clause, " and ma4.VAL in ('"]) 685 clause = "".join([clause, "','".join(medline)]) 686 clause = "".join([clause, "')"]) 687 else: 688 # A single PubMed ID 689 clause = "".join([" and ma4.VAL = '%s' " % medline]) 690 691 where_clauses.append(clause) 692 693 # Select by data type(s) used to compile the matrix 694 # (MATRIX_ANNOTATION.TAG="type") 695 if data_type: 696 tables.append("MATRIX_ANNOTATION ma5") 697 where_clauses.append("m.ID = ma5.ID") 698 699 clause = "ma5.TAG = 'type'" 700 if isinstance(data_type, list): 701 # A list of data types 702 clause = "".join([clause, " and ma5.VAL in ('"]) 703 clause = "".join([clause, "','".join(data_type)]) 704 clause = "".join([clause, "')"]) 705 else: 706 # A single data type 707 clause = "".join([" and ma5.VAL = '%s' " % data_type]) 708 709 where_clauses.append(clause) 710 711 # Select by taxonomic supergroup(s) (MATRIX_ANNOTATION.TAG="tax_group") 712 if tax_group: 713 tables.append("MATRIX_ANNOTATION ma6") 714 where_clauses.append("m.ID = ma6.ID") 715 716 clause = "ma6.TAG = 'tax_group'" 717 if isinstance(tax_group, list): 718 # A list of tax IDs 719 clause = "".join([clause, " and ma6.VAL in ('"]) 720 clause = "".join([clause, "','".join(tax_group)]) 721 clause = "".join([clause, "')"]) 722 else: 723 # A single tax ID 724 clause = "".join([clause, " and ma6.VAL = '%s' " % tax_group]) 725 726 where_clauses.append(clause) 727 728 sql = "".join(["select distinct(m.ID) from ", ", ".join(tables)]) 729 730 if where_clauses: 731 sql = "".join([sql, " where ", " and ".join(where_clauses)]) 732 733 # print "sql = %s" % sql 734 735 cur.execute(sql) 736 rows = cur.fetchall() 737 738 for row in rows: 739 id = row[0] 740 if all_versions: 741 int_ids.append(id) 742 else: 743 # is the latest version? 744 if self._is_latest_version(id): 745 int_ids.append(id) 746 747 if len(int_ids) < 1: 748 warnings.warn("Zero motifs returned with current select critera", BiopythonWarning) 749 750 return int_ids
751
752 - def _is_latest_version(self, int_id):
753 """ 754 Does this internal ID represent the latest version of the JASPAR 755 matrix (collapse on base ids) 756 757 """ 758 cur = self.dbh.cursor() 759 760 cur.execute( 761 """select count(*) from MATRIX 762 where BASE_ID = (select BASE_ID from MATRIX where ID = %s) 763 and VERSION > (select VERSION from MATRIX where ID = %s)""", 764 (int_id, int_id) 765 ) 766 767 row = cur.fetchone() 768 769 count = row[0] 770 771 if count == 0: 772 # no matrices with higher version ID and same base id 773 return True 774 775 return False
776