Package Bio :: Module SeqFeature
[hide private]
[frames] | no frames]

Source Code for Module Bio.SeqFeature

   1  # Copyright 2000-2003 Jeff Chang. 
   2  # Copyright 2001-2008 Brad Chapman. 
   3  # Copyright 2005-2012 by Peter Cock. 
   4  # Copyright 2006-2009 Michiel de Hoon. 
   5  # All rights reserved. 
   6  # This code is part of the Biopython distribution and governed by its 
   7  # license.  Please see the LICENSE file that should have been included 
   8  # as part of this package. 
   9  """Represent a Sequence Feature holding info about a part of a sequence. 
  10   
  11  This is heavily modeled after the Biocorba SeqFeature objects, and 
  12  may be pretty biased towards GenBank stuff since I'm writing it 
  13  for the GenBank parser output... 
  14   
  15  What's here: 
  16   
  17  Base class to hold a Feature 
  18  ---------------------------- 
  19   
  20  classes: 
  21   
  22      - SeqFeature 
  23   
  24  Hold information about a Reference 
  25  ---------------------------------- 
  26   
  27  This is an attempt to create a General class to hold Reference type 
  28  information. 
  29   
  30  classes: 
  31   
  32      - Reference 
  33   
  34  Specify locations of a feature on a Sequence 
  35  -------------------------------------------- 
  36   
  37  This aims to handle, in Ewan's words, 'the dreaded fuzziness issue' in 
  38  much the same way as Biocorba. This has the advantages of allowing us 
  39  to handle fuzzy stuff in case anyone needs it, and also be compatible 
  40  with Biocorba. 
  41   
  42  classes: 
  43   
  44      - FeatureLocation - Specify the start and end location of a feature. 
  45      - CompoundLocation - Collection of FeatureLocation objects (for joins etc). 
  46   
  47      - ExactPosition - Specify the position as being exact. 
  48      - WithinPosition - Specify a position occuring within some range. 
  49      - BetweenPosition - Specify a position occuring between a range (OBSOLETE?). 
  50      - BeforePosition - Specify the position as being found before some base. 
  51      - AfterPosition - Specify the position as being found after some base. 
  52      - OneOfPosition - Specify a position where the location can be multiple positions. 
  53      - UnknownPosition - Represents missing information like '?' in UniProt. 
  54  """ 
  55   
  56  from __future__ import print_function 
  57   
  58  from Bio.Seq import MutableSeq, reverse_complement 
  59   
  60  __docformat__ = "restructuredtext en" 
61 62 -class SeqFeature(object):
63 """Represent a Sequence Feature on an object. 64 65 Attributes: 66 67 - location - the location of the feature on the sequence (FeatureLocation) 68 - type - the specified type of the feature (ie. CDS, exon, repeat...) 69 - location_operator - a string specifying how this SeqFeature may 70 be related to others. For example, in the example GenBank feature 71 shown below, the location_operator would be "join". This is a proxy 72 for feature.location.operator and only applies to compound locations. 73 - strand - A value specifying on which strand (of a DNA sequence, for 74 instance) the feature deals with. 1 indicates the plus strand, -1 75 indicates the minus strand, 0 indicates stranded but unknown (? in GFF3), 76 while the default of None indicates that strand doesn't apply (dot in GFF3, 77 e.g. features on proteins). Note this is a shortcut for accessing the 78 strand property of the feature's location. 79 - id - A string identifier for the feature. 80 - ref - A reference to another sequence. This could be an accession 81 number for some different sequence. Note this is a shortcut for the 82 reference property of the feature's location. 83 - ref_db - A different database for the reference accession number. 84 Note this is a shortcut for the reference property of the location 85 - qualifiers - A dictionary of qualifiers on the feature. These are 86 analogous to the qualifiers from a GenBank feature table. The keys of 87 the dictionary are qualifier names, the values are the qualifier 88 values. 89 - sub_features - Obsolete list of additional SeqFeatures which was 90 used for holding compound locations (e.g. joins in GenBank/EMBL). 91 This is now superceded by a CompoundFeatureLocation as the location, 92 and should not be used (DEPRECATED). 93 """ 94
95 - def __init__(self, location=None, type='', location_operator='', 96 strand=None, id="<unknown id>", 97 qualifiers=None, sub_features=None, 98 ref=None, ref_db=None):
99 """Initialize a SeqFeature on a Sequence. 100 101 location can either be a FeatureLocation (with strand argument also 102 given if required), or None. 103 104 e.g. With no strand, on the forward strand, and on the reverse strand: 105 106 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation 107 >>> f1 = SeqFeature(FeatureLocation(5, 10), type="domain") 108 >>> f1.strand == f1.location.strand == None 109 True 110 >>> f2 = SeqFeature(FeatureLocation(7, 110, strand=1), type="CDS") 111 >>> f2.strand == f2.location.strand == +1 112 True 113 >>> f3 = SeqFeature(FeatureLocation(9, 108, strand=-1), type="CDS") 114 >>> f3.strand == f3.location.strand == -1 115 True 116 117 An invalid strand will trigger an exception: 118 119 >>> f4 = SeqFeature(FeatureLocation(50, 60), strand=2) 120 Traceback (most recent call last): 121 ... 122 ValueError: Strand should be +1, -1, 0 or None, not 2 123 124 Similarly if set via the FeatureLocation directly: 125 126 >>> loc4 = FeatureLocation(50, 60, strand=2) 127 Traceback (most recent call last): 128 ... 129 ValueError: Strand should be +1, -1, 0 or None, not 2 130 131 For exact start/end positions, an integer can be used (as shown above) 132 as shorthand for the ExactPosition object. For non-exact locations, the 133 FeatureLocation must be specified via the appropriate position objects. 134 135 Note that the strand, ref and ref_db arguments to the SeqFeature are 136 now obsolete and will be deprecated in a future release (which will 137 give warning messages) and later removed. Set them via the location 138 object instead. 139 140 Note that location_operator and sub_features arguments can no longer 141 be used, instead do this via the CompoundLocation object. 142 """ 143 if location is not None and not isinstance(location, FeatureLocation) \ 144 and not isinstance(location, CompoundLocation): 145 raise TypeError( 146 "FeatureLocation, CompoundLocation (or None) required for the location") 147 self.location = location 148 self.type = type 149 if location_operator: 150 # TODO - Deprecation warning 151 self.location_operator = location_operator 152 if strand is not None: 153 # TODO - Deprecation warning 154 self.strand = strand 155 self.id = id 156 if qualifiers is None: 157 qualifiers = {} 158 self.qualifiers = qualifiers 159 if sub_features is None: 160 sub_features = [] 161 else: 162 import warnings 163 from Bio import BiopythonDeprecationWarning 164 warnings.warn("Rather than sub_features, use a CompoundFeatureLocation", 165 BiopythonDeprecationWarning) 166 self._sub_features = sub_features 167 if ref is not None: 168 # TODO - Deprecation warning 169 self.ref = ref 170 if ref_db is not None: 171 # TODO - Deprecation warning 172 self.ref_db = ref_db
173
174 - def _get_sub_features(self):
175 if self._sub_features: 176 import warnings 177 from Bio import BiopythonDeprecationWarning 178 warnings.warn("Rather using f.sub_features, f.location should be a CompoundFeatureLocation", 179 BiopythonDeprecationWarning) 180 return self._sub_features
181
182 - def _set_sub_features(self, value):
183 if value: 184 import warnings 185 from Bio import BiopythonDeprecationWarning 186 warnings.warn("Rather than f.sub_features, use a CompoundFeatureLocation for f.location", 187 BiopythonDeprecationWarning) 188 self._sub_features = value
189 sub_features = property(fget=_get_sub_features, fset=_set_sub_features, 190 doc="Obsolete representation of compound locations (DEPRECATED).") 191
192 - def _get_strand(self):
193 return self.location.strand
194
195 - def _set_strand(self, value):
196 try: 197 self.location.strand = value 198 except AttributeError: 199 if self.location is None: 200 if value is not None: 201 raise ValueError("Can't set strand without a location.") 202 else: 203 raise
204 205 strand = property(fget=_get_strand, fset=_set_strand, 206 doc="""Feature's strand 207 208 This is a shortcut for feature.location.strand 209 """) 210
211 - def _get_ref(self):
212 try: 213 return self.location.ref 214 except AttributeError: 215 return None
216
217 - def _set_ref(self, value):
218 try: 219 self.location.ref = value 220 except AttributeError: 221 if self.location is None: 222 if value is not None: 223 raise ValueError("Can't set ref without a location.") 224 else: 225 raise
226 ref = property(fget=_get_ref, fset=_set_ref, 227 doc="""Feature location reference (e.g. accession). 228 229 This is a shortcut for feature.location.ref 230 """) 231
232 - def _get_ref_db(self):
233 try: 234 return self.location.ref_db 235 except AttributeError: 236 return None
237
238 - def _set_ref_db(self, value):
239 self.location.ref_db = value
240 ref_db = property(fget=_get_ref_db, fset=_set_ref_db, 241 doc="""Feature location reference's database. 242 243 This is a shortcut for feature.location.ref_db 244 """) 245
246 - def _get_location_operator(self):
247 try: 248 return self.location.operator 249 except AttributeError: 250 return None
251
252 - def _set_location_operator(self, value):
253 if value: 254 if isinstance(self.location, CompoundLocation): 255 self.location.operator = value 256 elif self.location is None: 257 raise ValueError( 258 "Location is None so can't set its operator (to %r)" % value) 259 else: 260 raise ValueError( 261 "Only CompoundLocation gets an operator (%r)" % value)
262 location_operator = property(fget=_get_location_operator, fset=_set_location_operator, 263 doc="Location operator for compound locations (e.g. join).") 264
265 - def __repr__(self):
266 """A string representation of the record for debugging.""" 267 answer = "%s(%s" % (self.__class__.__name__, repr(self.location)) 268 if self.type: 269 answer += ", type=%s" % repr(self.type) 270 if self.location_operator: 271 answer += ", location_operator=%s" % repr(self.location_operator) 272 if self.id and self.id != "<unknown id>": 273 answer += ", id=%s" % repr(self.id) 274 if self.ref: 275 answer += ", ref=%s" % repr(self.ref) 276 if self.ref_db: 277 answer += ", ref_db=%s" % repr(self.ref_db) 278 answer += ")" 279 return answer
280
281 - def __str__(self):
282 """A readable summary of the feature intended to be printed to screen. 283 """ 284 out = "type: %s\n" % self.type 285 out += "location: %s\n" % self.location 286 if self.id and self.id != "<unknown id>": 287 out += "id: %s\n" % self.id 288 out += "qualifiers:\n" 289 for qual_key in sorted(self.qualifiers): 290 out += " Key: %s, Value: %s\n" % (qual_key, 291 self.qualifiers[qual_key]) 292 # TODO - Remove this from __str__ since deprecated 293 if len(self._sub_features) != 0: 294 out += "Sub-Features\n" 295 for sub_feature in self._sub_features: 296 out += "%s\n" % sub_feature 297 return out
298
299 - def _shift(self, offset):
300 """Returns a copy of the feature with its location shifted (PRIVATE). 301 302 The annotation qaulifiers are copied.""" 303 answer = SeqFeature(location=self.location._shift(offset), 304 type=self.type, 305 location_operator=self.location_operator, 306 id=self.id, 307 qualifiers=dict(self.qualifiers.items())) 308 # This is to avoid the deprecation warning: 309 answer._sub_features = [f._shift(offset) for f in self._sub_features] 310 return answer
311
312 - def _flip(self, length):
313 """Returns a copy of the feature with its location flipped (PRIVATE). 314 315 The argument length gives the length of the parent sequence. For 316 example a location 0..20 (+1 strand) with parent length 30 becomes 317 after flipping 10..30 (-1 strand). Strandless (None) or unknown 318 strand (0) remain like that - just their end points are changed. 319 320 The annotation qaulifiers are copied. 321 """ 322 answer = SeqFeature(location=self.location._flip(length), 323 type=self.type, 324 location_operator=self.location_operator, 325 id=self.id, 326 qualifiers=dict(self.qualifiers.items())) 327 # This is to avoid the deprecation warning: 328 answer._sub_features = [f._flip(length) 329 for f in self._sub_features[::-1]] 330 return answer
331
332 - def extract(self, parent_sequence):
333 """Extract feature sequence from the supplied parent sequence. 334 335 The parent_sequence can be a Seq like object or a string, and will 336 generally return an object of the same type. The exception to this is 337 a MutableSeq as the parent sequence will return a Seq object. 338 339 This should cope with complex locations including complements, joins 340 and fuzzy positions. Even mixed strand features should work! This 341 also covers features on protein sequences (e.g. domains), although 342 here reverse strand features are not permitted. 343 344 >>> from Bio.Seq import Seq 345 >>> from Bio.Alphabet import generic_protein 346 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation 347 >>> seq = Seq("MKQHKAMIVALIVICITAVVAAL", generic_protein) 348 >>> f = SeqFeature(FeatureLocation(8, 15), type="domain") 349 >>> f.extract(seq) 350 Seq('VALIVIC', ProteinAlphabet()) 351 352 Note - currently only sub-features of type "join" are supported. 353 """ 354 return self.location.extract(parent_sequence)
355 356 # Python 3:
357 - def __bool__(self):
358 """Boolean value of an instance of this class (True). 359 360 This behaviour is for backwards compatibility, since until the 361 __len__ method was added, a SeqFeature always evaluated as True. 362 363 Note that in comparison, Seq objects, strings, lists, etc, will all 364 evaluate to False if they have length zero. 365 366 WARNING: The SeqFeature may in future evaluate to False when its 367 length is zero (in order to better match normal python behaviour)! 368 """ 369 return True
370 371 # Python 2: 372 __nonzero__ = __bool__ 373
374 - def __len__(self):
375 """Returns the length of the region described by a feature. 376 377 >>> from Bio.Seq import Seq 378 >>> from Bio.Alphabet import generic_protein 379 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation 380 >>> seq = Seq("MKQHKAMIVALIVICITAVVAAL", generic_protein) 381 >>> f = SeqFeature(FeatureLocation(8, 15), type="domain") 382 >>> len(f) 383 7 384 >>> f.extract(seq) 385 Seq('VALIVIC', ProteinAlphabet()) 386 >>> len(f.extract(seq)) 387 7 388 389 This is a proxy for taking the length of the feature's location: 390 391 >>> len(f.location) 392 7 393 394 For simple features this is the same as the region spanned (end 395 position minus start position using Pythonic counting). However, for 396 a compound location (e.g. a CDS as the join of several exons) the 397 gaps are not counted (e.g. introns). This ensures that len(f) matches 398 len(f.extract(parent_seq)), and also makes sure things work properly 399 with features wrapping the origin etc. 400 """ 401 return len(self.location)
402
403 - def __iter__(self):
404 """Iterate over the parent positions within the feature. 405 406 The iteration order is strand aware, and can be thought of as moving 407 along the feature using the parent sequence coordinates: 408 409 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation 410 >>> f = SeqFeature(FeatureLocation(5, 10), type="domain", strand=-1) 411 >>> len(f) 412 5 413 >>> for i in f: print(i) 414 9 415 8 416 7 417 6 418 5 419 >>> list(f) 420 [9, 8, 7, 6, 5] 421 422 This is a proxy for iterating over the location, 423 424 >>> list(f.location) 425 [9, 8, 7, 6, 5] 426 """ 427 return iter(self.location)
428
429 - def __contains__(self, value):
430 """Check if an integer position is within the feature. 431 432 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation 433 >>> f = SeqFeature(FeatureLocation(5, 10), type="domain", strand=-1) 434 >>> len(f) 435 5 436 >>> [i for i in range(15) if i in f] 437 [5, 6, 7, 8, 9] 438 439 For example, to see which features include a SNP position, you could 440 use this: 441 442 >>> from Bio import SeqIO 443 >>> record = SeqIO.read("GenBank/NC_000932.gb", "gb") 444 >>> for f in record.features: 445 ... if 1750 in f: 446 ... print("%s %s" % (f.type, f.location)) 447 source [0:154478](+) 448 gene [1716:4347](-) 449 tRNA join{[4310:4347](-), [1716:1751](-)} 450 451 Note that for a feature defined as a join of several subfeatures (e.g. 452 the union of several exons) the gaps are not checked (e.g. introns). 453 In this example, the tRNA location is defined in the GenBank file as 454 complement(join(1717..1751,4311..4347)), so that position 1760 falls 455 in the gap: 456 457 >>> for f in record.features: 458 ... if 1760 in f: 459 ... print("%s %s" % (f.type, f.location)) 460 source [0:154478](+) 461 gene [1716:4347](-) 462 463 Note that additional care may be required with fuzzy locations, for 464 example just before a BeforePosition: 465 466 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation 467 >>> from Bio.SeqFeature import BeforePosition 468 >>> f = SeqFeature(FeatureLocation(BeforePosition(3), 8), type="domain") 469 >>> len(f) 470 5 471 >>> [i for i in range(10) if i in f] 472 [3, 4, 5, 6, 7] 473 474 Note that is is a proxy for testing membership on the location. 475 476 >>> [i for i in range(10) if i in f.location] 477 [3, 4, 5, 6, 7] 478 """ 479 return value in self.location
480
481 482 # --- References 483 484 485 # TODO -- Will this hold PubMed and Medline information decently? 486 -class Reference(object):
487 """Represent a Generic Reference object. 488 489 Attributes: 490 o location - A list of Location objects specifying regions of 491 the sequence that the references correspond to. If no locations are 492 specified, the entire sequence is assumed. 493 o authors - A big old string, or a list split by author, of authors 494 for the reference. 495 o title - The title of the reference. 496 o journal - Journal the reference was published in. 497 o medline_id - A medline reference for the article. 498 o pubmed_id - A pubmed reference for the article. 499 o comment - A place to stick any comments about the reference. 500 """ 501
502 - def __init__(self):
503 self.location = [] 504 self.authors = '' 505 self.consrtm = '' 506 self.title = '' 507 self.journal = '' 508 self.medline_id = '' 509 self.pubmed_id = '' 510 self.comment = ''
511
512 - def __str__(self):
513 """Output an informative string for debugging. 514 """ 515 out = "" 516 for single_location in self.location: 517 out += "location: %s\n" % single_location 518 out += "authors: %s\n" % self.authors 519 if self.consrtm: 520 out += "consrtm: %s\n" % self.consrtm 521 out += "title: %s\n" % self.title 522 out += "journal: %s\n" % self.journal 523 out += "medline id: %s\n" % self.medline_id 524 out += "pubmed id: %s\n" % self.pubmed_id 525 out += "comment: %s\n" % self.comment 526 return out
527
528 - def __repr__(self):
529 # TODO - Update this is __init__ later accpets values 530 return "%s(title=%s, ...)" % (self.__class__.__name__, 531 repr(self.title))
532
533 534 # --- Handling feature locations 535 536 -class FeatureLocation(object):
537 """Specify the location of a feature along a sequence. 538 539 The FeatureLocation is used for simple continous features, which can 540 be described as running from a start position to and end position 541 (optionally with a strand and reference information). More complex 542 locations made up from several non-continuous parts (e.g. a coding 543 sequence made up of several exons) are currently described using a 544 SeqFeature with sub-features. 545 546 Note that the start and end location numbering follow Python's scheme, 547 thus a GenBank entry of 123..150 (one based counting) becomes a location 548 of [122:150] (zero based counting). 549 550 >>> from Bio.SeqFeature import FeatureLocation 551 >>> f = FeatureLocation(122, 150) 552 >>> print(f) 553 [122:150] 554 >>> print(f.start) 555 122 556 >>> print(f.end) 557 150 558 >>> print(f.strand) 559 None 560 561 Note the strand defaults to None. If you are working with nucleotide 562 sequences you'd want to be explicit if it is the forward strand: 563 564 >>> from Bio.SeqFeature import FeatureLocation 565 >>> f = FeatureLocation(122, 150, strand=+1) 566 >>> print(f) 567 [122:150](+) 568 >>> print(f.strand) 569 1 570 571 Note that for a parent sequence of length n, the FeatureLocation 572 start and end must satisfy the inequality 0 <= start <= end <= n. 573 This means even for features on the reverse strand of a nucleotide 574 sequence, we expect the 'start' coordinate to be less than the 575 'end'. 576 577 >>> from Bio.SeqFeature import FeatureLocation 578 >>> r = FeatureLocation(122, 150, strand=-1) 579 >>> print(r) 580 [122:150](-) 581 >>> print(r.start) 582 122 583 >>> print(r.end) 584 150 585 >>> print(r.strand) 586 -1 587 588 i.e. Rather than thinking of the 'start' and 'end' biologically in a 589 strand aware manor, think of them as the 'left most' or 'minimum' 590 boundary, and the 'right most' or 'maximum' boundary of the region 591 being described. This is particularly important with compound 592 locations describing non-continuous regions. 593 594 In the example above we have used standard exact positions, but there 595 are also specialised position objects used to represent fuzzy positions 596 as well, for example a GenBank location like complement(<123..150) 597 would use a BeforePosition object for the start. 598 """ 599
600 - def __init__(self, start, end, strand=None, ref=None, ref_db=None):
601 """Specify the start, end, strand etc of a sequence feature. 602 603 start and end arguments specify the values where the feature begins 604 and ends. These can either by any of the ``*Position`` objects that 605 inherit from AbstractPosition, or can just be integers specifying the 606 position. In the case of integers, the values are assumed to be 607 exact and are converted in ExactPosition arguments. This is meant 608 to make it easy to deal with non-fuzzy ends. 609 610 i.e. Short form: 611 612 >>> from Bio.SeqFeature import FeatureLocation 613 >>> loc = FeatureLocation(5, 10, strand=-1) 614 >>> print(loc) 615 [5:10](-) 616 617 Explicit form: 618 619 >>> from Bio.SeqFeature import FeatureLocation, ExactPosition 620 >>> loc = FeatureLocation(ExactPosition(5), ExactPosition(10), strand=-1) 621 >>> print(loc) 622 [5:10](-) 623 624 Other fuzzy positions are used similarly, 625 626 >>> from Bio.SeqFeature import FeatureLocation 627 >>> from Bio.SeqFeature import BeforePosition, AfterPosition 628 >>> loc2 = FeatureLocation(BeforePosition(5), AfterPosition(10), strand=-1) 629 >>> print(loc2) 630 [<5:>10](-) 631 632 For nucleotide features you will also want to specify the strand, 633 use 1 for the forward (plus) strand, -1 for the reverse (negative) 634 strand, 0 for stranded but strand unknown (? in GFF3), or None for 635 when the strand does not apply (dot in GFF3), e.g. features on 636 proteins. 637 638 >>> loc = FeatureLocation(5, 10, strand=+1) 639 >>> print(loc) 640 [5:10](+) 641 >>> print(loc.strand) 642 1 643 644 Normally feature locations are given relative to the parent 645 sequence you are working with, but an explicit accession can 646 be given with the optional ref and db_ref strings: 647 648 >>> loc = FeatureLocation(105172, 108462, ref="AL391218.9", strand=1) 649 >>> print(loc) 650 AL391218.9[105172:108462](+) 651 >>> print(loc.ref) 652 AL391218.9 653 654 """ 655 # TODO - Check 0 <= start <= end (<= length of reference) 656 if isinstance(start, AbstractPosition): 657 self._start = start 658 elif isinstance(start, int) or isinstance(start, long): 659 self._start = ExactPosition(start) 660 else: 661 raise TypeError("start=%r %s" % (start, type(start))) 662 if isinstance(end, AbstractPosition): 663 self._end = end 664 elif isinstance(end, int) or isinstance(end, long): 665 self._end = ExactPosition(end) 666 else: 667 raise TypeError("end=%r %s" % (end, type(end))) 668 self.strand = strand 669 self.ref = ref 670 self.ref_db = ref_db
671
672 - def _get_strand(self):
673 return self._strand
674
675 - def _set_strand(self, value):
676 if value not in [+1, -1, 0, None]: 677 raise ValueError("Strand should be +1, -1, 0 or None, not %r" 678 % value) 679 self._strand = value
680 681 strand = property(fget=_get_strand, fset=_set_strand, 682 doc="Strand of the location (+1, -1, 0 or None).") 683
684 - def __str__(self):
685 """Returns a representation of the location (with python counting). 686 687 For the simple case this uses the python splicing syntax, [122:150] 688 (zero based counting) which GenBank would call 123..150 (one based 689 counting). 690 """ 691 answer = "[%s:%s]" % (self._start, self._end) 692 if self.ref and self.ref_db: 693 answer = "%s:%s%s" % (self.ref_db, self.ref, answer) 694 elif self.ref: 695 answer = self.ref + answer 696 # Is ref_db without ref meaningful? 697 if self.strand is None: 698 return answer 699 elif self.strand == +1: 700 return answer + "(+)" 701 elif self.strand == -1: 702 return answer + "(-)" 703 else: 704 # strand = 0, stranded but strand unknown, ? in GFF3 705 return answer + "(?)"
706
707 - def __repr__(self):
708 """A string representation of the location for debugging.""" 709 optional = "" 710 if self.strand is not None: 711 optional += ", strand=%r" % self.strand 712 if self.ref is not None: 713 optional += ", ref=%r" % self.ref 714 if self.ref_db is not None: 715 optional += ", ref_db=%r" % self.ref_db 716 return "%s(%r, %r%s)" \ 717 % (self.__class__.__name__, self.start, self.end, optional)
718
719 - def __add__(self, other):
720 """Combine location with another feature location, or shift it. 721 722 You can add two feature locations to make a join CompoundLocation: 723 724 >>> from Bio.SeqFeature import FeatureLocation 725 >>> f1 = FeatureLocation(5, 10) 726 >>> f2 = FeatureLocation(20, 30) 727 >>> combined = f1 + f2 728 >>> print(combined) 729 join{[5:10], [20:30]} 730 731 This is thus equivalent to: 732 733 >>> from Bio.SeqFeature import CompoundLocation 734 >>> join = CompoundLocation([f1, f2]) 735 >>> print(join) 736 join{[5:10], [20:30]} 737 738 You can also use sum(...) in this way: 739 740 >>> join = sum([f1, f2]) 741 >>> print(join) 742 join{[5:10], [20:30]} 743 744 Furthermore, you can combine a FeatureLocation with a CompoundLocation 745 in this way. 746 747 Separately, adding an integer will give a new FeatureLocation with 748 its start and end offset by that amount. For example: 749 750 >>> print(f1) 751 [5:10] 752 >>> print(f1 + 100) 753 [105:110] 754 >>> print(200 + f1) 755 [205:210] 756 757 This can be useful when editing annotation. 758 """ 759 if isinstance(other, FeatureLocation): 760 return CompoundLocation([self, other]) 761 elif isinstance(other, int): 762 return self._shift(other) 763 else: 764 # This will allow CompoundLocation's __radd__ to be called: 765 return NotImplemented
766
767 - def __radd__(self, other):
768 if isinstance(other, int): 769 return self._shift(other) 770 else: 771 return NotImplemented
772
773 - def __nonzero__(self):
774 """Returns True regardless of the length of the feature. 775 776 This behaviour is for backwards compatibility, since until the 777 __len__ method was added, a FeatureLocation always evaluated as True. 778 779 Note that in comparison, Seq objects, strings, lists, etc, will all 780 evaluate to False if they have length zero. 781 782 WARNING: The FeatureLocation may in future evaluate to False when its 783 length is zero (in order to better match normal python behaviour)! 784 """ 785 return True
786
787 - def __len__(self):
788 """Returns the length of the region described by the FeatureLocation. 789 790 Note that extra care may be needed for fuzzy locations, e.g. 791 792 >>> from Bio.SeqFeature import FeatureLocation 793 >>> from Bio.SeqFeature import BeforePosition, AfterPosition 794 >>> loc = FeatureLocation(BeforePosition(5), AfterPosition(10)) 795 >>> len(loc) 796 5 797 """ 798 return int(self._end) - int(self._start)
799
800 - def __contains__(self, value):
801 """Check if an integer position is within the FeatureLocation. 802 803 Note that extra care may be needed for fuzzy locations, e.g. 804 805 >>> from Bio.SeqFeature import FeatureLocation 806 >>> from Bio.SeqFeature import BeforePosition, AfterPosition 807 >>> loc = FeatureLocation(BeforePosition(5), AfterPosition(10)) 808 >>> len(loc) 809 5 810 >>> [i for i in range(15) if i in loc] 811 [5, 6, 7, 8, 9] 812 """ 813 if not isinstance(value, int): 814 raise ValueError("Currently we only support checking for integer " 815 "positions being within a FeatureLocation.") 816 if value < self._start or value >= self._end: 817 return False 818 else: 819 return True
820
821 - def __iter__(self):
822 """Iterate over the parent positions within the FeatureLocation. 823 824 >>> from Bio.SeqFeature import FeatureLocation 825 >>> from Bio.SeqFeature import BeforePosition, AfterPosition 826 >>> loc = FeatureLocation(BeforePosition(5), AfterPosition(10)) 827 >>> len(loc) 828 5 829 >>> for i in loc: print(i) 830 5 831 6 832 7 833 8 834 9 835 >>> list(loc) 836 [5, 6, 7, 8, 9] 837 >>> [i for i in range(15) if i in loc] 838 [5, 6, 7, 8, 9] 839 840 Note this is strand aware: 841 842 >>> loc = FeatureLocation(BeforePosition(5), AfterPosition(10), strand = -1) 843 >>> list(loc) 844 [9, 8, 7, 6, 5] 845 """ 846 if self.strand == -1: 847 for i in range(self._end - 1, self._start - 1, -1): 848 yield i 849 else: 850 for i in range(self._start, self._end): 851 yield i
852
853 - def _shift(self, offset):
854 """Returns a copy of the location shifted by the offset (PRIVATE).""" 855 # TODO - What if offset is a fuzzy position? 856 if self.ref or self.ref_db: 857 # TODO - Return self? 858 raise ValueError("Feature references another sequence.") 859 return FeatureLocation(start=self._start._shift(offset), 860 end=self._end._shift(offset), 861 strand=self.strand)
862
863 - def _flip(self, length):
864 """Returns a copy of the location after the parent is reversed (PRIVATE).""" 865 if self.ref or self.ref_db: 866 # TODO - Return self? 867 raise ValueError("Feature references another sequence.") 868 # Note this will flip the start and end too! 869 if self.strand == +1: 870 flip_strand = -1 871 elif self.strand == -1: 872 flip_strand = +1 873 else: 874 # 0 or None 875 flip_strand = self.strand 876 return FeatureLocation(start=self._end._flip(length), 877 end=self._start._flip(length), 878 strand=flip_strand)
879 880 @property
881 - def parts(self):
882 """Read only list of parts (always one, the Feature Location). 883 884 This is a convience property allowing you to write code handling 885 both simple FeatureLocation objects (with one part) and more complex 886 CompoundLocation objects (with multiple parts) interchangably. 887 """ 888 return [self]
889 890 @property
891 - def start(self):
892 """Start location (integer like, possibly a fuzzy position, read only).""" 893 return self._start
894 895 @property
896 - def end(self):
897 """End location (integer like, possibly a fuzzy position, read only).""" 898 return self._end
899 900 @property
901 - def nofuzzy_start(self):
902 """Start position (integer, approximated if fuzzy, read only) (OBSOLETE). 903 904 This is now an alias for int(feature.start), which should be 905 used in preference -- unless you are trying to support old 906 versions of Biopython. 907 """ 908 try: 909 return int(self._start) 910 except TypeError: 911 if isinstance(self._start, UnknownPosition): 912 return None 913 raise
914 915 @property
916 - def nofuzzy_end(self):
917 """End position (integer, approximated if fuzzy, read only) (OBSOLETE). 918 919 This is now an alias for int(feature.end), which should be 920 used in preference -- unless you are trying to support old 921 versions of Biopython. 922 """ 923 try: 924 return int(self._end) 925 except TypeError: 926 if isinstance(self._end, UnknownPosition): 927 return None 928 raise
929
930 - def extract(self, parent_sequence):
931 """Extract feature sequence from the supplied parent sequence.""" 932 if self.ref or self.ref_db: 933 # TODO - Take a dictionary as an optional argument? 934 raise ValueError("Feature references another sequence.") 935 if isinstance(parent_sequence, MutableSeq): 936 # This avoids complications with reverse complements 937 # (the MutableSeq reverse complement acts in situ) 938 parent_sequence = parent_sequence.toseq() 939 f_seq = parent_sequence[self.nofuzzy_start:self.nofuzzy_end] 940 if self.strand == -1: 941 try: 942 f_seq = f_seq.reverse_complement() 943 except AttributeError: 944 assert isinstance(f_seq, str) 945 f_seq = reverse_complement(f_seq) 946 return f_seq
947
948 949 -class CompoundLocation(object):
950 """For handling joins etc where a feature location has several parts.""" 951
952 - def __init__(self, parts, operator="join"):
953 """Create a compound location with several parts. 954 955 >>> from Bio.SeqFeature import FeatureLocation, CompoundLocation 956 >>> f1 = FeatureLocation(10, 40, strand=+1) 957 >>> f2 = FeatureLocation(50, 59, strand=+1) 958 >>> f = CompoundLocation([f1, f2]) 959 >>> len(f) == len(f1) + len(f2) == 39 == len(list(f)) 960 True 961 >>> print(f.operator) 962 join 963 >>> 5 in f 964 False 965 >>> 15 in f 966 True 967 >>> f.strand 968 1 969 970 Notice that the strand of the compound location is computed 971 automatically - in the case of mixed strands on the sub-locations 972 the overall strand is set to None. 973 974 >>> f = CompoundLocation([FeatureLocation(3, 6, strand=+1), 975 ... FeatureLocation(10, 13, strand=-1)]) 976 >>> print(f.strand) 977 None 978 >>> len(f) 979 6 980 >>> list(f) 981 [3, 4, 5, 12, 11, 10] 982 983 The example above doing list(f) iterates over the coordinates within the 984 feature. This allows you to use max and min on the location, to find the 985 range covered: 986 987 >>> min(f) 988 3 989 >>> max(f) 990 12 991 992 More generally, you can use the compound location's start and end which 993 give the full range covered, 0 <= start <= end <= full sequence length. 994 995 >>> f.start == min(f) 996 True 997 >>> f.end == max(f) + 1 998 True 999 1000 This is consistent with the behaviour of the simple FeatureLocation for 1001 a single region, where again the 'start' and 'end' do not necessarily 1002 give the biological start and end, but rather the 'minimal' and 'maximal' 1003 coordinate boundaries. 1004 1005 Note that adding locations provides a more intuitive method of 1006 construction: 1007 1008 >>> f = FeatureLocation(3, 6, strand=+1) + FeatureLocation(10, 13, strand=-1) 1009 >>> len(f) 1010 6 1011 >>> list(f) 1012 [3, 4, 5, 12, 11, 10] 1013 """ 1014 self.operator = operator 1015 self.parts = list(parts) 1016 for loc in self.parts: 1017 if not isinstance(loc, FeatureLocation): 1018 raise ValueError("CompoundLocation should be given a list of " 1019 "FeatureLocation objects, not %s" % loc.__class__) 1020 if len(parts) < 2: 1021 raise ValueError( 1022 "CompoundLocation should have at least 2 parts, not %r" % parts)
1023
1024 - def __str__(self):
1025 """Returns a representation of the location (with python counting).""" 1026 return "%s{%s}" % (self.operator, ", ".join(str(loc) for loc in self.parts))
1027
1028 - def __repr__(self):
1029 """String representation of the location for debugging.""" 1030 return "%s(%r, %r)" % (self.__class__.__name__, 1031 self.parts, self.operator)
1032
1033 - def _get_strand(self):
1034 # Historically a join on the reverse strand has been represented 1035 # in Biopython with both the parent SeqFeature and its children 1036 # (the exons for a CDS) all given a strand of -1. Likewise, for 1037 # a join feature on the forward strand they all have strand +1. 1038 # However, we must also consider evil mixed strand examples like 1039 # this, join(complement(69611..69724),139856..140087,140625..140650) 1040 if len(set(loc.strand for loc in self.parts)) == 1: 1041 return self.parts[0].strand 1042 else: 1043 return None # i.e. mixed strands
1044
1045 - def _set_strand(self, value):
1046 # Should this be allowed/encouraged? 1047 for loc in self.parts: 1048 loc.strand = value
1049 strand = property(fget=_get_strand, fset=_set_strand, 1050 doc="""Overall strand of the compound location. 1051 1052 If all the parts have the same strand, that is returned. Otherwise 1053 for mixed strands, this returns None. 1054 1055 >>> from Bio.SeqFeature import FeatureLocation, CompoundLocation 1056 >>> f1 = FeatureLocation(15, 17, strand=1) 1057 >>> f2 = FeatureLocation(20, 30, strand=-1) 1058 >>> f = f1 + f2 1059 >>> f1.strand 1060 1 1061 >>> f2.strand 1062 -1 1063 >>> f.strand 1064 >>> f.strand is None 1065 True 1066 1067 If you set the strand of a CompoundLocation, this is applied to 1068 all the parts - use with caution: 1069 1070 >>> f.strand = 1 1071 >>> f1.strand 1072 1 1073 >>> f2.strand 1074 1 1075 >>> f.strand 1076 1 1077 1078 """) 1079
1080 - def __add__(self, other):
1081 """Combine locations, or shift the location by an integer offset. 1082 1083 >>> from Bio.SeqFeature import FeatureLocation, CompoundLocation 1084 >>> f1 = FeatureLocation(15, 17) + FeatureLocation(20, 30) 1085 >>> print(f1) 1086 join{[15:17], [20:30]} 1087 1088 You can add another FeatureLocation: 1089 1090 >>> print(f1 + FeatureLocation(40, 50)) 1091 join{[15:17], [20:30], [40:50]} 1092 >>> print(FeatureLocation(5, 10) + f1) 1093 join{[5:10], [15:17], [20:30]} 1094 1095 You can also add another CompoundLocation: 1096 1097 >>> f2 = FeatureLocation(40, 50) + FeatureLocation(60, 70) 1098 >>> print(f2) 1099 join{[40:50], [60:70]} 1100 >>> print(f1 + f2) 1101 join{[15:17], [20:30], [40:50], [60:70]} 1102 1103 Also, as with the FeatureLocation, adding an integer shifts the 1104 location's co-ordinates by that offset: 1105 1106 >>> print(f1 + 100) 1107 join{[115:117], [120:130]} 1108 >>> print(200 + f1) 1109 join{[215:217], [220:230]} 1110 >>> print(f1 + (-5)) 1111 join{[10:12], [15:25]} 1112 """ 1113 if isinstance(other, FeatureLocation): 1114 return CompoundLocation(self.parts + [other], self.operator) 1115 elif isinstance(other, CompoundLocation): 1116 if self.operator != other.operator: 1117 # Handle join+order -> order as a special case? 1118 raise ValueError("Mixed operators %s and %s" 1119 % (self.operator, other.operator)) 1120 return CompoundLocation(self.parts + other.parts, self.operator) 1121 elif isinstance(other, int): 1122 return self._shift(other) 1123 else: 1124 raise NotImplementedError
1125
1126 - def __radd__(self, other):
1127 """Combine locations.""" 1128 if isinstance(other, FeatureLocation): 1129 return CompoundLocation([other] + self.parts, self.operator) 1130 elif isinstance(other, int): 1131 return self._shift(other) 1132 else: 1133 raise NotImplementedError
1134
1135 - def __contains__(self, value):
1136 """Check if an integer position is within the location.""" 1137 for loc in self.parts: 1138 if value in loc: 1139 return True 1140 return False
1141
1142 - def __nonzero__(self):
1143 """Returns True regardless of the length of the feature. 1144 1145 This behaviour is for backwards compatibility, since until the 1146 __len__ method was added, a FeatureLocation always evaluated as True. 1147 1148 Note that in comparison, Seq objects, strings, lists, etc, will all 1149 evaluate to False if they have length zero. 1150 1151 WARNING: The FeatureLocation may in future evaluate to False when its 1152 length is zero (in order to better match normal python behaviour)! 1153 """ 1154 return True
1155
1156 - def __len__(self):
1157 return sum(len(loc) for loc in self.parts)
1158
1159 - def __iter__(self):
1160 for loc in self.parts: 1161 for pos in loc: 1162 yield pos
1163
1164 - def _shift(self, offset):
1165 """Returns a copy of the location shifted by the offset (PRIVATE).""" 1166 return CompoundLocation([loc._shift(offset) for loc in self.parts], 1167 self.operator)
1168
1169 - def _flip(self, length):
1170 """Returns a copy of the location after the parent is reversed (PRIVATE). 1171 1172 Note that the order of the parts is reversed too. 1173 """ 1174 return CompoundLocation([loc._flip(length) for loc in self.parts[::-1]], 1175 self.operator)
1176 1177 @property
1178 - def start(self):
1179 """Start location (integer like, possibly a fuzzy position, read only).""" 1180 return min(loc.start for loc in self.parts)
1181 1182 @property
1183 - def end(self):
1184 """End location (integer like, possibly a fuzzy position, read only).""" 1185 return max(loc.end for loc in self.parts)
1186 1187 @property
1188 - def nofuzzy_start(self):
1189 """Start position (integer, approximated if fuzzy, read only) (OBSOLETE). 1190 1191 This is an alias for int(feature.start), which should be used in 1192 preference -- unless you are trying to support old versions of 1193 Biopython. 1194 """ 1195 try: 1196 return int(self.start) 1197 except TypeError: 1198 if isinstance(self.start, UnknownPosition): 1199 return None 1200 raise
1201 1202 @property
1203 - def nofuzzy_end(self):
1204 """End position (integer, approximated if fuzzy, read only) (OBSOLETE). 1205 1206 This is an alias for int(feature.end), which should be used in 1207 preference -- unless you are trying to support old versions of 1208 Biopython. 1209 """ 1210 try: 1211 return int(self.end) 1212 except TypeError: 1213 if isinstance(self.end, UnknownPosition): 1214 return None 1215 raise
1216 1217 @property
1218 - def ref(self):
1219 """CompoundLocation's don't have a ref (dummy method for API compatibility).""" 1220 return None
1221 1222 @property
1223 - def ref_db(self):
1224 """CompoundLocation's don't have a ref_db (dummy method for API compatibility).""" 1225 return None
1226
1227 - def extract(self, parent_sequence):
1228 """Extract feature sequence from the supplied parent sequence.""" 1229 # This copes with mixed strand features & all on reverse: 1230 parts = [loc.extract(parent_sequence) for loc in self.parts] 1231 # We use addition rather than a join to avoid alphabet issues: 1232 f_seq = parts[0] 1233 for part in parts[1:]: 1234 f_seq += part 1235 return f_seq
1236
1237 1238 -class AbstractPosition(object):
1239 """Abstract base class representing a position.""" 1240
1241 - def __repr__(self):
1242 """String representation of the location for debugging.""" 1243 return "%s(...)" % (self.__class__.__name__)
1244
1245 1246 -class ExactPosition(int, AbstractPosition):
1247 """Specify the specific position of a boundary. 1248 1249 o position - The position of the boundary. 1250 o extension - An optional argument which must be zero since we don't 1251 have an extension. The argument is provided so that the same number of 1252 arguments can be passed to all position types. 1253 1254 In this case, there is no fuzziness associated with the position. 1255 1256 >>> p = ExactPosition(5) 1257 >>> p 1258 ExactPosition(5) 1259 >>> print(p) 1260 5 1261 1262 >>> isinstance(p, AbstractPosition) 1263 True 1264 >>> isinstance(p, int) 1265 True 1266 1267 Integer comparisons and operations should work as expected: 1268 1269 >>> p == 5 1270 True 1271 >>> p < 6 1272 True 1273 >>> p <= 5 1274 True 1275 >>> p + 10 1276 15 1277 1278 """
1279 - def __new__(cls, position, extension=0):
1280 if extension != 0: 1281 raise AttributeError("Non-zero extension %s for exact position." 1282 % extension) 1283 return int.__new__(cls, position)
1284
1285 - def __repr__(self):
1286 """String representation of the ExactPosition location for debugging.""" 1287 return "%s(%i)" % (self.__class__.__name__, int(self))
1288 1289 @property
1290 - def position(self):
1291 """Legacy attribute to get position as integer (OBSOLETE).""" 1292 return int(self)
1293 1294 @property
1295 - def extension(self):
1296 """Legacy attribute to get extension (zero) as integer (OBSOLETE).""" 1297 return 0
1298
1299 - def _shift(self, offset):
1300 # By default preserve any subclass 1301 return self.__class__(int(self) + offset)
1302
1303 - def _flip(self, length):
1304 # By default perserve any subclass 1305 return self.__class__(length - int(self))
1306
1307 1308 -class UncertainPosition(ExactPosition):
1309 """Specify a specific position which is uncertain. 1310 1311 This is used in UniProt, e.g. ?222 for uncertain position 222, or in the 1312 XML format explicitly marked as uncertain. Does not apply to GenBank/EMBL. 1313 """ 1314 pass
1315
1316 1317 -class UnknownPosition(AbstractPosition):
1318 """Specify a specific position which is unknown (has no position). 1319 1320 This is used in UniProt, e.g. ? or in the XML as unknown. 1321 """ 1322
1323 - def __repr__(self):
1324 """String representation of the UnknownPosition location for debugging.""" 1325 return "%s()" % self.__class__.__name__
1326
1327 - def __hash__(self):
1328 return hash(None)
1329 1330 @property
1331 - def position(self):
1332 """Legacy attribute to get position (None) (OBSOLETE).""" 1333 return None
1334 1335 @property
1336 - def extension(self):
1337 """Legacy attribute to get extension (zero) as integer (OBSOLETE).""" 1338 return 0
1339
1340 - def _shift(self, offset):
1341 return self
1342
1343 - def _flip(self, length):
1344 return self
1345
1346 1347 -class WithinPosition(int, AbstractPosition):
1348 """Specify the position of a boundary within some coordinates. 1349 1350 Arguments: 1351 o position - The default integer position 1352 o left - The start (left) position of the boundary 1353 o right - The end (right) position of the boundary 1354 1355 This allows dealing with a position like ((1.4)..100). This 1356 indicates that the start of the sequence is somewhere between 1 1357 and 4. Since this is a start coordinate, it should acts like 1358 it is at position 1 (or in Python counting, 0). 1359 1360 >>> p = WithinPosition(10, 10, 13) 1361 >>> p 1362 WithinPosition(10, left=10, right=13) 1363 >>> print(p) 1364 (10.13) 1365 >>> int(p) 1366 10 1367 1368 Basic integer comparisons and operations should work as though 1369 this were a plain integer: 1370 1371 >>> p == 10 1372 True 1373 >>> p in [9, 10, 11] 1374 True 1375 >>> p < 11 1376 True 1377 >>> p + 10 1378 20 1379 1380 >>> isinstance(p, WithinPosition) 1381 True 1382 >>> isinstance(p, AbstractPosition) 1383 True 1384 >>> isinstance(p, int) 1385 True 1386 1387 Note this also applies for comparison to other position objects, 1388 where again the integer behaviour is used: 1389 1390 >>> p == 10 1391 True 1392 >>> p == ExactPosition(10) 1393 True 1394 >>> p == BeforePosition(10) 1395 True 1396 >>> p == AfterPosition(10) 1397 True 1398 1399 If this were an end point, you would want the position to be 13: 1400 1401 >>> p2 = WithinPosition(13, 10, 13) 1402 >>> p2 1403 WithinPosition(13, left=10, right=13) 1404 >>> print(p2) 1405 (10.13) 1406 >>> int(p2) 1407 13 1408 >>> p2 == 13 1409 True 1410 >>> p2 == ExactPosition(13) 1411 True 1412 1413 The old legacy properties of position and extension give the 1414 starting/lower/left position as an integer, and the distance 1415 to the ending/higher/right position as an integer. Note that 1416 the position object will act like either the left or the right 1417 end-point depending on how it was created: 1418 1419 >>> p.position == p2.position == 10 1420 True 1421 >>> p.extension == p2.extension == 3 1422 True 1423 >>> int(p) == int(p2) 1424 False 1425 >>> p == 10 1426 True 1427 >>> p2 == 13 1428 True 1429 1430 """
1431 - def __new__(cls, position, left, right):
1432 assert position == left or position == right, \ 1433 "WithinPosition: %r should match left %r or right %r" \ 1434 (position, left, right) 1435 obj = int.__new__(cls, position) 1436 obj._left = left 1437 obj._right = right 1438 return obj
1439
1440 - def __repr__(self):
1441 """String representation of the WithinPosition location for debugging.""" 1442 return "%s(%i, left=%i, right=%i)" \ 1443 % (self.__class__.__name__, int(self), 1444 self._left, self._right)
1445
1446 - def __str__(self):
1447 return "(%s.%s)" % (self._left, self._right)
1448 1449 @property
1450 - def position(self):
1451 """Legacy attribute to get (left) position as integer (OBSOLETE).""" 1452 return self._left
1453 1454 @property
1455 - def extension(self):
1456 """Legacy attribute to get extension (from left to right) as an integer (OBSOLETE).""" 1457 return self._right - self._left
1458
1459 - def _shift(self, offset):
1460 return self.__class__(int(self) + offset, 1461 self._left + offset, 1462 self._right + offset)
1463
1464 - def _flip(self, length):
1465 return self.__class__(length - int(self), 1466 length - self._right, 1467 length - self._left)
1468
1469 1470 -class BetweenPosition(int, AbstractPosition):
1471 """Specify the position of a boundary between two coordinates (OBSOLETE?). 1472 1473 Arguments: 1474 o position - The default integer position 1475 o left - The start (left) position of the boundary 1476 o right - The end (right) position of the boundary 1477 1478 This allows dealing with a position like 123^456. This 1479 indicates that the start of the sequence is somewhere between 1480 123 and 456. It is up to the parser to set the position argument 1481 to either boundary point (depending on if this is being used as 1482 a start or end of the feature). For example as a feature end: 1483 1484 >>> p = BetweenPosition(456, 123, 456) 1485 >>> p 1486 BetweenPosition(456, left=123, right=456) 1487 >>> print(p) 1488 (123^456) 1489 >>> int(p) 1490 456 1491 1492 Integer equality and comparison use the given position, 1493 1494 >>> p == 456 1495 True 1496 >>> p in [455, 456, 457] 1497 True 1498 >>> p > 300 1499 True 1500 1501 The old legacy properties of position and extension give the 1502 starting/lower/left position as an integer, and the distance 1503 to the ending/higher/right position as an integer. Note that 1504 the position object will act like either the left or the right 1505 end-point depending on how it was created: 1506 1507 >>> p2 = BetweenPosition(123, left=123, right=456) 1508 >>> p.position == p2.position == 123 1509 True 1510 >>> p.extension 1511 333 1512 >>> p2.extension 1513 333 1514 >>> p.extension == p2.extension == 333 1515 True 1516 >>> int(p) == int(p2) 1517 False 1518 >>> p == 456 1519 True 1520 >>> p2 == 123 1521 True 1522 1523 Note this potentially surprising behaviour: 1524 1525 >>> BetweenPosition(123, left=123, right=456) == ExactPosition(123) 1526 True 1527 >>> BetweenPosition(123, left=123, right=456) == BeforePosition(123) 1528 True 1529 >>> BetweenPosition(123, left=123, right=456) == AfterPosition(123) 1530 True 1531 1532 i.e. For equality (and sorting) the position objects behave like 1533 integers. 1534 """
1535 - def __new__(cls, position, left, right):
1536 assert position == left or position == right 1537 obj = int.__new__(cls, position) 1538 obj._left = left 1539 obj._right = right 1540 return obj
1541
1542 - def __repr__(self):
1543 """String representation of the WithinPosition location for debugging.""" 1544 return "%s(%i, left=%i, right=%i)" \ 1545 % (self.__class__.__name__, int(self), 1546 self._left, self._right)
1547
1548 - def __str__(self):
1549 return "(%s^%s)" % (self._left, self._right)
1550 1551 @property
1552 - def position(self):
1553 """Legacy attribute to get (left) position as integer (OBSOLETE).""" 1554 return self._left
1555 1556 @property
1557 - def extension(self):
1558 """Legacy attribute to get extension (from left to right) as an integer (OBSOLETE).""" 1559 return self._right - self._left
1560
1561 - def _shift(self, offset):
1562 return self.__class__(int(self) + offset, 1563 self._left + offset, 1564 self._right + offset)
1565
1566 - def _flip(self, length):
1567 return self.__class__(length - int(self), 1568 length - self._right, 1569 length - self._left)
1570
1571 1572 -class BeforePosition(int, AbstractPosition):
1573 """Specify a position where the actual location occurs before it. 1574 1575 Arguments: 1576 o position - The upper boundary of where the location can occur. 1577 o extension - An optional argument which must be zero since we don't 1578 have an extension. The argument is provided so that the same number of 1579 arguments can be passed to all position types. 1580 1581 This is used to specify positions like (<10..100) where the location 1582 occurs somewhere before position 10. 1583 1584 >>> p = BeforePosition(5) 1585 >>> p 1586 BeforePosition(5) 1587 >>> print(p) 1588 <5 1589 >>> int(p) 1590 5 1591 >>> p + 10 1592 15 1593 1594 Note this potentially surprising behaviour: 1595 1596 >>> p == ExactPosition(5) 1597 True 1598 >>> p == AfterPosition(5) 1599 True 1600 1601 Just remember that for equality and sorting the position objects act 1602 like integers. 1603 """ 1604 # Subclasses int so can't use __init__
1605 - def __new__(cls, position, extension=0):
1606 if extension != 0: 1607 raise AttributeError("Non-zero extension %s for exact position." 1608 % extension) 1609 return int.__new__(cls, position)
1610 1611 @property
1612 - def position(self):
1613 """Legacy attribute to get position as integer (OBSOLETE).""" 1614 return int(self)
1615 1616 @property
1617 - def extension(self):
1618 """Legacy attribute to get extension (zero) as integer (OBSOLETE).""" 1619 return 0
1620
1621 - def __repr__(self):
1622 """A string representation of the location for debugging.""" 1623 return "%s(%i)" % (self.__class__.__name__, int(self))
1624
1625 - def __str__(self):
1626 return "<%s" % self.position
1627
1628 - def _shift(self, offset):
1629 return self.__class__(int(self) + offset)
1630
1631 - def _flip(self, length):
1632 return AfterPosition(length - int(self))
1633
1634 1635 -class AfterPosition(int, AbstractPosition):
1636 """Specify a position where the actual location is found after it. 1637 1638 Arguments: 1639 o position - The lower boundary of where the location can occur. 1640 o extension - An optional argument which must be zero since we don't 1641 have an extension. The argument is provided so that the same number of 1642 arguments can be passed to all position types. 1643 1644 This is used to specify positions like (>10..100) where the location 1645 occurs somewhere after position 10. 1646 1647 >>> p = AfterPosition(7) 1648 >>> p 1649 AfterPosition(7) 1650 >>> print(p) 1651 >7 1652 >>> int(p) 1653 7 1654 >>> p + 10 1655 17 1656 1657 >>> isinstance(p, AfterPosition) 1658 True 1659 >>> isinstance(p, AbstractPosition) 1660 True 1661 >>> isinstance(p, int) 1662 True 1663 1664 Note this potentially surprising behaviour: 1665 1666 >>> p == ExactPosition(7) 1667 True 1668 >>> p == BeforePosition(7) 1669 True 1670 1671 Just remember that for equality and sorting the position objects act 1672 like integers. 1673 """ 1674 # Subclasses int so can't use __init__
1675 - def __new__(cls, position, extension=0):
1676 if extension != 0: 1677 raise AttributeError("Non-zero extension %s for exact position." 1678 % extension) 1679 return int.__new__(cls, position)
1680 1681 @property
1682 - def position(self):
1683 """Legacy attribute to get position as integer (OBSOLETE).""" 1684 return int(self)
1685 1686 @property
1687 - def extension(self):
1688 """Legacy attribute to get extension (zero) as integer (OBSOLETE).""" 1689 return 0
1690
1691 - def __repr__(self):
1692 """A string representation of the location for debugging.""" 1693 return "%s(%i)" % (self.__class__.__name__, int(self))
1694
1695 - def __str__(self):
1696 return ">%s" % self.position
1697
1698 - def _shift(self, offset):
1699 return self.__class__(int(self) + offset)
1700
1701 - def _flip(self, length):
1702 return BeforePosition(length - int(self))
1703
1704 1705 -class OneOfPosition(int, AbstractPosition):
1706 """Specify a position where the location can be multiple positions. 1707 1708 This models the GenBank 'one-of(1888,1901)' function, and tries 1709 to make this fit within the Biopython Position models. If this was 1710 a start position it should act like 1888, but as an end position 1901. 1711 1712 >>> p = OneOfPosition(1888, [ExactPosition(1888), ExactPosition(1901)]) 1713 >>> p 1714 OneOfPosition(1888, choices=[ExactPosition(1888), ExactPosition(1901)]) 1715 >>> int(p) 1716 1888 1717 1718 Interget comparisons and operators act like using int(p), 1719 1720 >>> p == 1888 1721 True 1722 >>> p <= 1888 1723 True 1724 >>> p > 1888 1725 False 1726 >>> p + 100 1727 1988 1728 1729 >>> isinstance(p, OneOfPosition) 1730 True 1731 >>> isinstance(p, AbstractPosition) 1732 True 1733 >>> isinstance(p, int) 1734 True 1735 1736 The old legacy properties of position and extension give the 1737 starting/lowest/left-most position as an integer, and the 1738 distance to the ending/highest/right-most position as an integer. 1739 Note that the position object will act like one of the list of 1740 possible locations depending on how it was created: 1741 1742 >>> p2 = OneOfPosition(1901, [ExactPosition(1888), ExactPosition(1901)]) 1743 >>> p.position == p2.position == 1888 1744 True 1745 >>> p.extension == p2.extension == 13 1746 True 1747 >>> int(p) == int(p2) 1748 False 1749 >>> p == 1888 1750 True 1751 >>> p2 == 1901 1752 True 1753 1754 """
1755 - def __new__(cls, position, choices):
1756 """Initialize with a set of posssible positions. 1757 1758 position_list is a list of AbstractPosition derived objects, 1759 specifying possible locations. 1760 1761 position is an integer specifying the default behaviour. 1762 """ 1763 assert position in choices, \ 1764 "OneOfPosition: %r should match one of %r" % (position, choices) 1765 obj = int.__new__(cls, position) 1766 obj.position_choices = choices 1767 return obj
1768 1769 @property
1770 - def position(self):
1771 """Legacy attribute to get (left) position as integer (OBSOLETE).""" 1772 return min(int(pos) for pos in self.position_choices)
1773 1774 @property
1775 - def extension(self):
1776 """Legacy attribute to get extension as integer (OBSOLETE).""" 1777 positions = [int(pos) for pos in self.position_choices] 1778 return max(positions) - min(positions)
1779
1780 - def __repr__(self):
1781 """String representation of the OneOfPosition location for debugging.""" 1782 return "%s(%i, choices=%r)" % (self.__class__.__name__, 1783 int(self), self.position_choices)
1784
1785 - def __str__(self):
1786 out = "one-of(" 1787 for position in self.position_choices: 1788 out += "%s," % position 1789 # replace the last comma with the closing parenthesis 1790 out = out[:-1] + ")" 1791 return out
1792
1793 - def _shift(self, offset):
1794 return self.__class__(int(self) + offset, 1795 [p._shift(offset) for p in self.position_choices])
1796
1797 - def _flip(self, length):
1798 return self.__class__(length - int(self), 1799 [p._flip(length) for p in self.position_choices[::-1]])
1800
1801 1802 -class PositionGap(object):
1803 """Simple class to hold information about a gap between positions.""" 1804
1805 - def __init__(self, gap_size):
1806 """Intialize with a position object containing the gap information. 1807 """ 1808 self.gap_size = gap_size
1809
1810 - def __repr__(self):
1811 """A string representation of the position gap for debugging.""" 1812 return "%s(%s)" % (self.__class__.__name__, repr(self.gap_size))
1813
1814 - def __str__(self):
1815 out = "gap(%s)" % self.gap_size 1816 return out
1817 1818 1819 if __name__ == "__main__": 1820 from Bio._utils import run_doctest 1821 run_doctest() 1822