Package Bio :: Module SeqFeature
[hide private]
[frames] | no frames]

Source Code for Module Bio.SeqFeature

   1  # Copyright 2000-2003 Jeff Chang. 
   2  # Copyright 2001-2008 Brad Chapman. 
   3  # Copyright 2005-2012 by Peter Cock. 
   4  # Copyright 2006-2009 Michiel de Hoon. 
   5  # All rights reserved. 
   6  # This code is part of the Biopython distribution and governed by its 
   7  # license.  Please see the LICENSE file that should have been included 
   8  # as part of this package. 
   9  """Represent a Sequence Feature holding info about a part of a sequence. 
  10   
  11  This is heavily modeled after the Biocorba SeqFeature objects, and 
  12  may be pretty biased towards GenBank stuff since I'm writing it 
  13  for the GenBank parser output... 
  14   
  15  What's here: 
  16   
  17  Base class to hold a Feature. 
  18  ---------------------------- 
  19  classes: 
  20  o SeqFeature 
  21   
  22  Hold information about a Reference. 
  23  ---------------------------------- 
  24   
  25  This is an attempt to create a General class to hold Reference type 
  26  information. 
  27   
  28  classes: 
  29  o Reference 
  30   
  31  Specify locations of a feature on a Sequence. 
  32  --------------------------------------------- 
  33   
  34  This aims to handle, in Ewan's words, 'the dreaded fuzziness issue' in 
  35  much the same way as Biocorba. This has the advantages of allowing us 
  36  to handle fuzzy stuff in case anyone needs it, and also be compatible 
  37  with Biocorba. 
  38   
  39  classes: 
  40  o FeatureLocation - Specify the start and end location of a feature. 
  41  o CompoundLocation - Collection of FeatureLocation objects (for joins etc). 
  42   
  43  o ExactPosition - Specify the position as being exact. 
  44  o WithinPosition - Specify a position occuring within some range. 
  45  o BetweenPosition - Specify a position occuring between a range (OBSOLETE?). 
  46  o BeforePosition - Specify the position as being found before some base. 
  47  o AfterPosition - Specify the position as being found after some base. 
  48  o OneOfPosition - Specify a position where the location can be multiple positions. 
  49  o UnknownPosition - Represents missing information like '?' in UniProt. 
  50  """ 
  51   
  52  from __future__ import print_function 
  53   
  54  from Bio.Seq import MutableSeq, reverse_complement 
55 56 57 -class SeqFeature(object):
58 """Represent a Sequence Feature on an object. 59 60 Attributes: 61 o location - the location of the feature on the sequence (FeatureLocation) 62 o type - the specified type of the feature (ie. CDS, exon, repeat...) 63 o location_operator - a string specifying how this SeqFeature may 64 be related to others. For example, in the example GenBank feature 65 shown below, the location_operator would be "join". This is a proxy 66 for feature.location.operator and only applies to compound locations. 67 o strand - A value specifying on which strand (of a DNA sequence, for 68 instance) the feature deals with. 1 indicates the plus strand, -1 69 indicates the minus strand, 0 indicates stranded but unknown (? in GFF3), 70 while the default of None indicates that strand doesn't apply (dot in GFF3, 71 e.g. features on proteins). Note this is a shortcut for accessing the 72 strand property of the feature's location. 73 o id - A string identifier for the feature. 74 o ref - A reference to another sequence. This could be an accession 75 number for some different sequence. Note this is a shortcut for the 76 reference property of the feature's location. 77 o ref_db - A different database for the reference accession number. 78 Note this is a shortcut for the reference property of the location 79 o qualifiers - A dictionary of qualifiers on the feature. These are 80 analogous to the qualifiers from a GenBank feature table. The keys of 81 the dictionary are qualifier names, the values are the qualifier 82 values. 83 o sub_features - Obsolete list of additional SeqFeatures which was 84 used for holding compound locations (e.g. joins in GenBank/EMBL). 85 This is now superceded by a CompoundFeatureLocation as the location, 86 and should not be used (DEPRECATED). 87 """
88 - def __init__(self, location = None, type = '', location_operator = '', 89 strand = None, id = "<unknown id>", 90 qualifiers = None, sub_features = None, 91 ref = None, ref_db = None):
92 """Initialize a SeqFeature on a Sequence. 93 94 location can either be a FeatureLocation (with strand argument also 95 given if required), or None. 96 97 e.g. With no strand, on the forward strand, and on the reverse strand: 98 99 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation 100 >>> f1 = SeqFeature(FeatureLocation(5, 10), type="domain") 101 >>> f1.strand == f1.location.strand == None 102 True 103 >>> f2 = SeqFeature(FeatureLocation(7, 110, strand=1), type="CDS") 104 >>> f2.strand == f2.location.strand == +1 105 True 106 >>> f3 = SeqFeature(FeatureLocation(9, 108, strand=-1), type="CDS") 107 >>> f3.strand == f3.location.strand == -1 108 True 109 110 An invalid strand will trigger an exception: 111 112 >>> f4 = SeqFeature(FeatureLocation(50, 60), strand=2) 113 Traceback (most recent call last): 114 ... 115 ValueError: Strand should be +1, -1, 0 or None, not 2 116 117 Similarly if set via the FeatureLocation directly: 118 119 >>> loc4 = FeatureLocation(50, 60, strand=2) 120 Traceback (most recent call last): 121 ... 122 ValueError: Strand should be +1, -1, 0 or None, not 2 123 124 For exact start/end positions, an integer can be used (as shown above) 125 as shorthand for the ExactPosition object. For non-exact locations, the 126 FeatureLocation must be specified via the appropriate position objects. 127 128 Note that the strand, ref and ref_db arguments to the SeqFeature are 129 now obsolete and will be deprecated in a future release (which will 130 give warning messages) and later removed. Set them via the location 131 object instead. 132 133 Note that location_operator and sub_features arguments can no longer 134 be used, instead do this via the CompoundLocation object. 135 """ 136 if location is not None and not isinstance(location, FeatureLocation) \ 137 and not isinstance(location, CompoundLocation): 138 raise TypeError("FeatureLocation, CompoundLocation (or None) required for the location") 139 self.location = location 140 self.type = type 141 if location_operator: 142 #TODO - Deprecation warning 143 self.location_operator = location_operator 144 if strand is not None: 145 #TODO - Deprecation warning 146 self.strand = strand 147 self.id = id 148 if qualifiers is None: 149 qualifiers = {} 150 self.qualifiers = qualifiers 151 if sub_features is None: 152 sub_features = [] 153 else: 154 import warnings 155 from Bio import BiopythonDeprecationWarning 156 warnings.warn("Rather than sub_features, use a CompoundFeatureLocation", 157 BiopythonDeprecationWarning) 158 self._sub_features = sub_features 159 if ref is not None: 160 #TODO - Deprecation warning 161 self.ref = ref 162 if ref_db is not None: 163 #TODO - Deprecation warning 164 self.ref_db = ref_db
165
166 - def _get_sub_features(self):
167 if self._sub_features: 168 import warnings 169 from Bio import BiopythonDeprecationWarning 170 warnings.warn("Rather using f.sub_features, f.location should be a CompoundFeatureLocation", 171 BiopythonDeprecationWarning) 172 return self._sub_features
173 - def _set_sub_features(self, value):
174 if value: 175 import warnings 176 from Bio import BiopythonDeprecationWarning 177 warnings.warn("Rather than f.sub_features, use a CompoundFeatureLocation for f.location", 178 BiopythonDeprecationWarning) 179 self._sub_features = value
180 sub_features = property(fget = _get_sub_features, fset = _set_sub_features, 181 doc = "Obsolete representation of compound locations (DEPRECATED).") 182
183 - def _get_strand(self):
184 return self.location.strand
185
186 - def _set_strand(self, value):
187 try: 188 self.location.strand = value 189 except AttributeError: 190 if self.location is None: 191 if value is not None: 192 raise ValueError("Can't set strand without a location.") 193 else: 194 raise
195 196 strand = property(fget = _get_strand, fset = _set_strand, 197 doc = """Feature's strand 198 199 This is a shortcut for feature.location.strand 200 """) 201
202 - def _get_ref(self):
203 try: 204 return self.location.ref 205 except AttributeError: 206 return None
207 - def _set_ref(self, value):
208 try: 209 self.location.ref = value 210 except AttributeError: 211 if self.location is None: 212 if value is not None: 213 raise ValueError("Can't set ref without a location.") 214 else: 215 raise
216 ref = property(fget = _get_ref, fset = _set_ref, 217 doc = """Feature location reference (e.g. accession). 218 219 This is a shortcut for feature.location.ref 220 """) 221
222 - def _get_ref_db(self):
223 try: 224 return self.location.ref_db 225 except AttributeError: 226 return None
227 - def _set_ref_db(self, value):
228 self.location.ref_db = value
229 ref_db = property(fget = _get_ref_db, fset = _set_ref_db, 230 doc = """Feature location reference's database. 231 232 This is a shortcut for feature.location.ref_db 233 """) 234
235 - def _get_location_operator(self):
236 try: 237 return self.location.operator 238 except AttributeError: 239 return None
240 - def _set_location_operator(self, value):
241 if value: 242 if isinstance(self.location, CompoundLocation): 243 self.location.operator = value 244 elif self.location is None: 245 raise ValueError("Location is None so can't set its operator (to %r)" % value) 246 else: 247 raise ValueError("Only CompoundLocation gets an operator (%r)" % value)
248 location_operator = property(fget = _get_location_operator, fset = _set_location_operator, 249 doc = "Location operator for compound locations (e.g. join).") 250
251 - def __repr__(self):
252 """A string representation of the record for debugging.""" 253 answer = "%s(%s" % (self.__class__.__name__, repr(self.location)) 254 if self.type: 255 answer += ", type=%s" % repr(self.type) 256 if self.location_operator: 257 answer += ", location_operator=%s" % repr(self.location_operator) 258 if self.id and self.id != "<unknown id>": 259 answer += ", id=%s" % repr(self.id) 260 if self.ref: 261 answer += ", ref=%s" % repr(self.ref) 262 if self.ref_db: 263 answer += ", ref_db=%s" % repr(self.ref_db) 264 answer += ")" 265 return answer
266
267 - def __str__(self):
268 """A readable summary of the feature intended to be printed to screen. 269 """ 270 out = "type: %s\n" % self.type 271 out += "location: %s\n" % self.location 272 if self.id and self.id != "<unknown id>": 273 out += "id: %s\n" % self.id 274 out += "qualifiers: \n" 275 for qual_key in sorted(self.qualifiers): 276 out += " Key: %s, Value: %s\n" % (qual_key, 277 self.qualifiers[qual_key]) 278 #TODO - Remove this from __str__ since deprecated 279 if len(self._sub_features) != 0: 280 out += "Sub-Features\n" 281 for sub_feature in self._sub_features: 282 out +="%s\n" % sub_feature 283 return out
284
285 - def _shift(self, offset):
286 """Returns a copy of the feature with its location shifted (PRIVATE). 287 288 The annotation qaulifiers are copied.""" 289 answer = SeqFeature(location = self.location._shift(offset), 290 type = self.type, 291 location_operator = self.location_operator, 292 id = self.id, 293 qualifiers = dict(self.qualifiers.items())) 294 #This is to avoid the deprecation warning: 295 answer._sub_features = [f._shift(offset) for f in self._sub_features] 296 return answer
297
298 - def _flip(self, length):
299 """Returns a copy of the feature with its location flipped (PRIVATE). 300 301 The argument length gives the length of the parent sequence. For 302 example a location 0..20 (+1 strand) with parent length 30 becomes 303 after flipping 10..30 (-1 strand). Strandless (None) or unknown 304 strand (0) remain like that - just their end points are changed. 305 306 The annotation qaulifiers are copied. 307 """ 308 answer = SeqFeature(location = self.location._flip(length), 309 type = self.type, 310 location_operator = self.location_operator, 311 id = self.id, 312 qualifiers = dict(self.qualifiers.items())) 313 #This is to avoid the deprecation warning: 314 answer._sub_features = [f._flip(length) for f in self._sub_features[::-1]] 315 return answer
316
317 - def extract(self, parent_sequence):
318 """Extract feature sequence from the supplied parent sequence. 319 320 The parent_sequence can be a Seq like object or a string, and will 321 generally return an object of the same type. The exception to this is 322 a MutableSeq as the parent sequence will return a Seq object. 323 324 This should cope with complex locations including complements, joins 325 and fuzzy positions. Even mixed strand features should work! This 326 also covers features on protein sequences (e.g. domains), although 327 here reverse strand features are not permitted. 328 329 >>> from Bio.Seq import Seq 330 >>> from Bio.Alphabet import generic_protein 331 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation 332 >>> seq = Seq("MKQHKAMIVALIVICITAVVAAL", generic_protein) 333 >>> f = SeqFeature(FeatureLocation(8, 15), type="domain") 334 >>> f.extract(seq) 335 Seq('VALIVIC', ProteinAlphabet()) 336 337 Note - currently only sub-features of type "join" are supported. 338 """ 339 return self.location.extract(parent_sequence)
340 341 #Python 3:
342 - def __bool__(self):
343 """Boolean value of an instance of this class (True). 344 345 This behaviour is for backwards compatibility, since until the 346 __len__ method was added, a SeqFeature always evaluated as True. 347 348 Note that in comparison, Seq objects, strings, lists, etc, will all 349 evaluate to False if they have length zero. 350 351 WARNING: The SeqFeature may in future evaluate to False when its 352 length is zero (in order to better match normal python behaviour)! 353 """ 354 return True
355 356 #Python 2: 357 __nonzero__= __bool__ 358
359 - def __len__(self):
360 """Returns the length of the region described by a feature. 361 362 >>> from Bio.Seq import Seq 363 >>> from Bio.Alphabet import generic_protein 364 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation 365 >>> seq = Seq("MKQHKAMIVALIVICITAVVAAL", generic_protein) 366 >>> f = SeqFeature(FeatureLocation(8, 15), type="domain") 367 >>> len(f) 368 7 369 >>> f.extract(seq) 370 Seq('VALIVIC', ProteinAlphabet()) 371 >>> len(f.extract(seq)) 372 7 373 374 This is a proxy for taking the length of the feature's location: 375 376 >>> len(f.location) 377 7 378 379 For simple features this is the same as the region spanned (end 380 position minus start position using Pythonic counting). However, for 381 a compound location (e.g. a CDS as the join of several exons) the 382 gaps are not counted (e.g. introns). This ensures that len(f) matches 383 len(f.extract(parent_seq)), and also makes sure things work properly 384 with features wrapping the origin etc. 385 """ 386 return len(self.location)
387
388 - def __iter__(self):
389 """Iterate over the parent positions within the feature. 390 391 The iteration order is strand aware, and can be thought of as moving 392 along the feature using the parent sequence coordinates: 393 394 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation 395 >>> f = SeqFeature(FeatureLocation(5, 10), type="domain", strand=-1) 396 >>> len(f) 397 5 398 >>> for i in f: print(i) 399 9 400 8 401 7 402 6 403 5 404 >>> list(f) 405 [9, 8, 7, 6, 5] 406 407 This is a proxy for iterating over the location, 408 409 >>> list(f.location) 410 [9, 8, 7, 6, 5] 411 """ 412 return iter(self.location)
413
414 - def __contains__(self, value):
415 """Check if an integer position is within the feature. 416 417 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation 418 >>> f = SeqFeature(FeatureLocation(5, 10), type="domain", strand=-1) 419 >>> len(f) 420 5 421 >>> [i for i in range(15) if i in f] 422 [5, 6, 7, 8, 9] 423 424 For example, to see which features include a SNP position, you could 425 use this: 426 427 >>> from Bio import SeqIO 428 >>> record = SeqIO.read("GenBank/NC_000932.gb", "gb") 429 >>> for f in record.features: 430 ... if 1750 in f: 431 ... print("%s %s" % (f.type, f.location)) 432 source [0:154478](+) 433 gene [1716:4347](-) 434 tRNA join{[4310:4347](-), [1716:1751](-)} 435 436 Note that for a feature defined as a join of several subfeatures (e.g. 437 the union of several exons) the gaps are not checked (e.g. introns). 438 In this example, the tRNA location is defined in the GenBank file as 439 complement(join(1717..1751,4311..4347)), so that position 1760 falls 440 in the gap: 441 442 >>> for f in record.features: 443 ... if 1760 in f: 444 ... print("%s %s" % (f.type, f.location)) 445 source [0:154478](+) 446 gene [1716:4347](-) 447 448 Note that additional care may be required with fuzzy locations, for 449 example just before a BeforePosition: 450 451 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation 452 >>> from Bio.SeqFeature import BeforePosition 453 >>> f = SeqFeature(FeatureLocation(BeforePosition(3), 8), type="domain") 454 >>> len(f) 455 5 456 >>> [i for i in range(10) if i in f] 457 [3, 4, 5, 6, 7] 458 459 Note that is is a proxy for testing membership on the location. 460 461 >>> [i for i in range(10) if i in f.location] 462 [3, 4, 5, 6, 7] 463 """ 464 return value in self.location
465
466 467 # --- References 468 469 470 # TODO -- Will this hold PubMed and Medline information decently? 471 -class Reference(object):
472 """Represent a Generic Reference object. 473 474 Attributes: 475 o location - A list of Location objects specifying regions of 476 the sequence that the references correspond to. If no locations are 477 specified, the entire sequence is assumed. 478 o authors - A big old string, or a list split by author, of authors 479 for the reference. 480 o title - The title of the reference. 481 o journal - Journal the reference was published in. 482 o medline_id - A medline reference for the article. 483 o pubmed_id - A pubmed reference for the article. 484 o comment - A place to stick any comments about the reference. 485 """
486 - def __init__(self):
487 self.location = [] 488 self.authors = '' 489 self.consrtm = '' 490 self.title = '' 491 self.journal = '' 492 self.medline_id = '' 493 self.pubmed_id = '' 494 self.comment = ''
495
496 - def __str__(self):
497 """Output an informative string for debugging. 498 """ 499 out = "" 500 for single_location in self.location: 501 out += "location: %s\n" % single_location 502 out += "authors: %s\n" % self.authors 503 if self.consrtm: 504 out += "consrtm: %s\n" % self.consrtm 505 out += "title: %s\n" % self.title 506 out += "journal: %s\n" % self.journal 507 out += "medline id: %s\n" % self.medline_id 508 out += "pubmed id: %s\n" % self.pubmed_id 509 out += "comment: %s\n" % self.comment 510 return out
511
512 - def __repr__(self):
513 #TODO - Update this is __init__ later accpets values 514 return "%s(title=%s, ...)" % (self.__class__.__name__, 515 repr(self.title))
516
517 518 # --- Handling feature locations 519 520 -class FeatureLocation(object):
521 """Specify the location of a feature along a sequence. 522 523 The FeatureLocation is used for simple continous features, which can 524 be described as running from a start position to and end position 525 (optionally with a strand and reference information). More complex 526 locations made up from several non-continuous parts (e.g. a coding 527 sequence made up of several exons) are currently described using a 528 SeqFeature with sub-features. 529 530 Note that the start and end location numbering follow Python's scheme, 531 thus a GenBank entry of 123..150 (one based counting) becomes a location 532 of [122:150] (zero based counting). 533 534 >>> from Bio.SeqFeature import FeatureLocation 535 >>> f = FeatureLocation(122, 150) 536 >>> print(f) 537 [122:150] 538 >>> print(f.start) 539 122 540 >>> print(f.end) 541 150 542 >>> print(f.strand) 543 None 544 545 Note the strand defaults to None. If you are working with nucleotide 546 sequences you'd want to be explicit if it is the forward strand: 547 548 >>> from Bio.SeqFeature import FeatureLocation 549 >>> f = FeatureLocation(122, 150, strand=+1) 550 >>> print(f) 551 [122:150](+) 552 >>> print(f.strand) 553 1 554 555 Note that for a parent sequence of length n, the FeatureLocation 556 start and end must satisfy the inequality 0 <= start <= end <= n. 557 This means even for features on the reverse strand of a nucleotide 558 sequence, we expect the 'start' coordinate to be less than the 559 'end'. 560 561 >>> from Bio.SeqFeature import FeatureLocation 562 >>> r = FeatureLocation(122, 150, strand=-1) 563 >>> print(r) 564 [122:150](-) 565 >>> print(r.start) 566 122 567 >>> print(r.end) 568 150 569 >>> print(r.strand) 570 -1 571 572 i.e. Rather than thinking of the 'start' and 'end' biologically in a 573 strand aware manor, think of them as the 'left most' or 'minimum' 574 boundary, and the 'right most' or 'maximum' boundary of the region 575 being described. This is particularly important with compound 576 locations describing non-continuous regions. 577 578 In the example above we have used standard exact positions, but there 579 are also specialised position objects used to represent fuzzy positions 580 as well, for example a GenBank location like complement(<123..150) 581 would use a BeforePosition object for the start. 582 """
583 - def __init__(self, start, end, strand=None, ref=None, ref_db=None):
584 """Specify the start, end, strand etc of a sequence feature. 585 586 start and end arguments specify the values where the feature begins 587 and ends. These can either by any of the *Position objects that 588 inherit from AbstractPosition, or can just be integers specifying the 589 position. In the case of integers, the values are assumed to be 590 exact and are converted in ExactPosition arguments. This is meant 591 to make it easy to deal with non-fuzzy ends. 592 593 i.e. Short form: 594 595 >>> from Bio.SeqFeature import FeatureLocation 596 >>> loc = FeatureLocation(5, 10, strand=-1) 597 >>> print(loc) 598 [5:10](-) 599 600 Explicit form: 601 602 >>> from Bio.SeqFeature import FeatureLocation, ExactPosition 603 >>> loc = FeatureLocation(ExactPosition(5), ExactPosition(10), strand=-1) 604 >>> print(loc) 605 [5:10](-) 606 607 Other fuzzy positions are used similarly, 608 609 >>> from Bio.SeqFeature import FeatureLocation 610 >>> from Bio.SeqFeature import BeforePosition, AfterPosition 611 >>> loc2 = FeatureLocation(BeforePosition(5), AfterPosition(10), strand=-1) 612 >>> print(loc2) 613 [<5:>10](-) 614 615 For nucleotide features you will also want to specify the strand, 616 use 1 for the forward (plus) strand, -1 for the reverse (negative) 617 strand, 0 for stranded but strand unknown (? in GFF3), or None for 618 when the strand does not apply (dot in GFF3), e.g. features on 619 proteins. 620 621 >>> loc = FeatureLocation(5, 10, strand=+1) 622 >>> print(loc) 623 [5:10](+) 624 >>> print(loc.strand) 625 1 626 627 Normally feature locations are given relative to the parent 628 sequence you are working with, but an explicit accession can 629 be given with the optional ref and db_ref strings: 630 631 >>> loc = FeatureLocation(105172, 108462, ref="AL391218.9", strand=1) 632 >>> print(loc) 633 AL391218.9[105172:108462](+) 634 >>> print(loc.ref) 635 AL391218.9 636 637 """ 638 #TODO - Check 0 <= start <= end (<= length of reference) 639 if isinstance(start, AbstractPosition): 640 self._start = start 641 elif isinstance(start, int) or isinstance(start, long): 642 self._start = ExactPosition(start) 643 else: 644 raise TypeError("start=%r %s" % (start, type(start))) 645 if isinstance(end, AbstractPosition): 646 self._end = end 647 elif isinstance(end, int) or isinstance(end, long): 648 self._end = ExactPosition(end) 649 else: 650 raise TypeError("end=%r %s" % (end, type(end))) 651 self.strand = strand 652 self.ref = ref 653 self.ref_db = ref_db
654
655 - def _get_strand(self):
656 return self._strand
657
658 - def _set_strand(self, value):
659 if value not in [+1, -1, 0, None]: 660 raise ValueError("Strand should be +1, -1, 0 or None, not %r" 661 % value) 662 self._strand = value
663 664 strand = property(fget = _get_strand, fset = _set_strand, 665 doc = "Strand of the location (+1, -1, 0 or None).") 666
667 - def __str__(self):
668 """Returns a representation of the location (with python counting). 669 670 For the simple case this uses the python splicing syntax, [122:150] 671 (zero based counting) which GenBank would call 123..150 (one based 672 counting). 673 """ 674 answer = "[%s:%s]" % (self._start, self._end) 675 if self.ref and self.ref_db: 676 answer = "%s:%s%s" % (self.ref_db, self.ref, answer) 677 elif self.ref: 678 answer = self.ref + answer 679 #Is ref_db without ref meaningful? 680 if self.strand is None: 681 return answer 682 elif self.strand == +1: 683 return answer + "(+)" 684 elif self.strand == -1: 685 return answer + "(-)" 686 else: 687 #strand = 0, stranded but strand unknown, ? in GFF3 688 return answer + "(?)"
689
690 - def __repr__(self):
691 """A string representation of the location for debugging.""" 692 optional = "" 693 if self.strand is not None: 694 optional += ", strand=%r" % self.strand 695 if self.ref is not None: 696 optional += ", ref=%r" % self.ref 697 if self.ref_db is not None: 698 optional += ", ref_db=%r" % self.ref_db 699 return "%s(%r, %r%s)" \ 700 % (self.__class__.__name__, self.start, self.end, optional)
701
702 - def __add__(self, other):
703 """Combine location with another feature location, or shift it. 704 705 You can add two feature locations to make a join CompoundLocation: 706 707 >>> from Bio.SeqFeature import FeatureLocation 708 >>> f1 = FeatureLocation(5, 10) 709 >>> f2 = FeatureLocation(20, 30) 710 >>> combined = f1 + f2 711 >>> print(combined) 712 join{[5:10], [20:30]} 713 714 This is thus equivalent to: 715 716 >>> from Bio.SeqFeature import CompoundLocation 717 >>> join = CompoundLocation([f1, f2]) 718 >>> print(join) 719 join{[5:10], [20:30]} 720 721 You can also use sum(...) in this way: 722 723 >>> join = sum([f1, f2]) 724 >>> print(join) 725 join{[5:10], [20:30]} 726 727 Furthermore, you can combine a FeatureLocation with a CompoundLocation 728 in this way. 729 730 Separately, adding an integer will give a new FeatureLocation with 731 its start and end offset by that amount. For example: 732 733 >>> print(f1) 734 [5:10] 735 >>> print(f1 + 100) 736 [105:110] 737 >>> print(200 + f1) 738 [205:210] 739 740 This can be useful when editing annotation. 741 """ 742 if isinstance(other, FeatureLocation): 743 return CompoundLocation([self, other]) 744 elif isinstance(other, int): 745 return self._shift(other) 746 else: 747 #This will allow CompoundLocation's __radd__ to be called: 748 return NotImplemented
749
750 - def __radd__(self, other):
751 if isinstance(other, int): 752 return self._shift(other) 753 else: 754 return NotImplemented
755
756 - def __nonzero__(self):
757 """Returns True regardless of the length of the feature. 758 759 This behaviour is for backwards compatibility, since until the 760 __len__ method was added, a FeatureLocation always evaluated as True. 761 762 Note that in comparison, Seq objects, strings, lists, etc, will all 763 evaluate to False if they have length zero. 764 765 WARNING: The FeatureLocation may in future evaluate to False when its 766 length is zero (in order to better match normal python behaviour)! 767 """ 768 return True
769
770 - def __len__(self):
771 """Returns the length of the region described by the FeatureLocation. 772 773 Note that extra care may be needed for fuzzy locations, e.g. 774 775 >>> from Bio.SeqFeature import FeatureLocation 776 >>> from Bio.SeqFeature import BeforePosition, AfterPosition 777 >>> loc = FeatureLocation(BeforePosition(5), AfterPosition(10)) 778 >>> len(loc) 779 5 780 """ 781 return int(self._end) - int(self._start)
782
783 - def __contains__(self, value):
784 """Check if an integer position is within the FeatureLocation. 785 786 Note that extra care may be needed for fuzzy locations, e.g. 787 788 >>> from Bio.SeqFeature import FeatureLocation 789 >>> from Bio.SeqFeature import BeforePosition, AfterPosition 790 >>> loc = FeatureLocation(BeforePosition(5), AfterPosition(10)) 791 >>> len(loc) 792 5 793 >>> [i for i in range(15) if i in loc] 794 [5, 6, 7, 8, 9] 795 """ 796 if not isinstance(value, int): 797 raise ValueError("Currently we only support checking for integer " 798 "positions being within a FeatureLocation.") 799 if value < self._start or value >= self._end: 800 return False 801 else: 802 return True
803
804 - def __iter__(self):
805 """Iterate over the parent positions within the FeatureLocation. 806 807 >>> from Bio.SeqFeature import FeatureLocation 808 >>> from Bio.SeqFeature import BeforePosition, AfterPosition 809 >>> loc = FeatureLocation(BeforePosition(5), AfterPosition(10)) 810 >>> len(loc) 811 5 812 >>> for i in loc: print(i) 813 5 814 6 815 7 816 8 817 9 818 >>> list(loc) 819 [5, 6, 7, 8, 9] 820 >>> [i for i in range(15) if i in loc] 821 [5, 6, 7, 8, 9] 822 823 Note this is strand aware: 824 825 >>> loc = FeatureLocation(BeforePosition(5), AfterPosition(10), strand = -1) 826 >>> list(loc) 827 [9, 8, 7, 6, 5] 828 """ 829 if self.strand == -1: 830 for i in range(self._end - 1, self._start - 1, -1): 831 yield i 832 else: 833 for i in range(self._start, self._end): 834 yield i
835
836 - def _shift(self, offset):
837 """Returns a copy of the location shifted by the offset (PRIVATE).""" 838 #TODO - What if offset is a fuzzy position? 839 if self.ref or self.ref_db: 840 #TODO - Return self? 841 raise ValueError("Feature references another sequence.") 842 return FeatureLocation(start = self._start._shift(offset), 843 end = self._end._shift(offset), 844 strand = self.strand)
845
846 - def _flip(self, length):
847 """Returns a copy of the location after the parent is reversed (PRIVATE).""" 848 if self.ref or self.ref_db: 849 #TODO - Return self? 850 raise ValueError("Feature references another sequence.") 851 #Note this will flip the start and end too! 852 if self.strand == +1: 853 flip_strand = -1 854 elif self.strand == -1: 855 flip_strand = +1 856 else: 857 #0 or None 858 flip_strand = self.strand 859 return FeatureLocation(start = self._end._flip(length), 860 end = self._start._flip(length), 861 strand = flip_strand)
862 863 @property
864 - def parts(self):
865 """Read only list of parts (always one, the Feature Location). 866 867 This is a convience property allowing you to write code handling 868 both simple FeatureLocation objects (with one part) and more complex 869 CompoundLocation objects (with multiple parts) interchangably. 870 """ 871 return [self]
872 873 @property
874 - def start(self):
875 """Start location (integer like, possibly a fuzzy position, read only).""" 876 return self._start
877 878 @property
879 - def end(self):
880 """End location (integer like, possibly a fuzzy position, read only).""" 881 return self._end
882 883 @property
884 - def nofuzzy_start(self):
885 """Start position (integer, approximated if fuzzy, read only) (OBSOLETE). 886 887 This is now an alias for int(feature.start), which should be 888 used in preference -- unless you are trying to support old 889 versions of Biopython. 890 """ 891 try: 892 return int(self._start) 893 except TypeError: 894 if isinstance(self._start, UnknownPosition): 895 return None 896 raise
897 898 @property
899 - def nofuzzy_end(self):
900 """End position (integer, approximated if fuzzy, read only) (OBSOLETE). 901 902 This is now an alias for int(feature.end), which should be 903 used in preference -- unless you are trying to support old 904 versions of Biopython. 905 """ 906 try: 907 return int(self._end) 908 except TypeError: 909 if isinstance(self._end, UnknownPosition): 910 return None 911 raise
912
913 - def extract(self, parent_sequence):
914 """Extract feature sequence from the supplied parent sequence.""" 915 if self.ref or self.ref_db: 916 #TODO - Take a dictionary as an optional argument? 917 raise ValueError("Feature references another sequence.") 918 if isinstance(parent_sequence, MutableSeq): 919 #This avoids complications with reverse complements 920 #(the MutableSeq reverse complement acts in situ) 921 parent_sequence = parent_sequence.toseq() 922 f_seq = parent_sequence[self.nofuzzy_start:self.nofuzzy_end] 923 if self.strand == -1: 924 try: 925 f_seq = f_seq.reverse_complement() 926 except AttributeError: 927 assert isinstance(f_seq, str) 928 f_seq = reverse_complement(f_seq) 929 return f_seq
930
931 932 -class CompoundLocation(object):
933 """For handling joins etc where a feature location has several parts."""
934 - def __init__(self, parts, operator="join"):
935 """Create a compound location with several parts. 936 937 >>> from Bio.SeqFeature import FeatureLocation, CompoundLocation 938 >>> f1 = FeatureLocation(10, 40, strand=+1) 939 >>> f2 = FeatureLocation(50, 59, strand=+1) 940 >>> f = CompoundLocation([f1, f2]) 941 >>> len(f) == len(f1) + len(f2) == 39 == len(list(f)) 942 True 943 >>> print(f.operator) 944 join 945 >>> 5 in f 946 False 947 >>> 15 in f 948 True 949 >>> f.strand 950 1 951 952 Notice that the strand of the compound location is computed 953 automatically - in the case of mixed strands on the sub-locations 954 the overall strand is set to None. 955 956 >>> f = CompoundLocation([FeatureLocation(3, 6, strand=+1), 957 ... FeatureLocation(10, 13, strand=-1)]) 958 >>> print(f.strand) 959 None 960 >>> len(f) 961 6 962 >>> list(f) 963 [3, 4, 5, 12, 11, 10] 964 965 The example above doing list(f) iterates over the coordinates within the 966 feature. This allows you to use max and min on the location, to find the 967 range covered: 968 969 >>> min(f) 970 3 971 >>> max(f) 972 12 973 974 More generally, you can use the compound location's start and end which 975 give the full range covered, 0 <= start <= end <= full sequence length. 976 977 >>> f.start == min(f) 978 True 979 >>> f.end == max(f) + 1 980 True 981 982 This is consistent with the behaviour of the simple FeatureLocation for 983 a single region, where again the 'start' and 'end' do not necessarily 984 give the biological start and end, but rather the 'minimal' and 'maximal' 985 coordinate boundaries. 986 987 Note that adding locations provides a more intuitive method of 988 construction: 989 990 >>> f = FeatureLocation(3, 6, strand=+1) + FeatureLocation(10, 13, strand=-1) 991 >>> len(f) 992 6 993 >>> list(f) 994 [3, 4, 5, 12, 11, 10] 995 """ 996 self.operator = operator 997 self.parts = list(parts) 998 for loc in self.parts: 999 if not isinstance(loc, FeatureLocation): 1000 raise ValueError("CompoundLocation should be given a list of " 1001 "FeatureLocation objects, not %s" % loc.__class__) 1002 if len(self.parts) < 2: 1003 raise ValueError("CompoundLocation should have at least 2 parts")
1004
1005 - def __str__(self):
1006 """Returns a representation of the location (with python counting).""" 1007 return "%s{%s}" % (self.operator, ", ".join(str(loc) for loc in self.parts))
1008
1009 - def __repr__(self):
1010 """String representation of the location for debugging.""" 1011 return "%s(%r, %r)" % (self.__class__.__name__, \ 1012 self.parts, self.operator)
1013
1014 - def _get_strand(self):
1015 # Historically a join on the reverse strand has been represented 1016 # in Biopython with both the parent SeqFeature and its children 1017 # (the exons for a CDS) all given a strand of -1. Likewise, for 1018 # a join feature on the forward strand they all have strand +1. 1019 # However, we must also consider evil mixed strand examples like 1020 # this, join(complement(69611..69724),139856..140087,140625..140650) 1021 if len(set(loc.strand for loc in self.parts))==1: 1022 return self.parts[0].strand 1023 else: 1024 return None # i.e. mixed strands
1025 - def _set_strand(self, value):
1026 # Should this be allowed/encouraged? 1027 for loc in self.parts: 1028 loc.strand = value
1029 strand = property(fget = _get_strand, fset = _set_strand, 1030 doc = """Overall strand of the compound location. 1031 1032 If all the parts have the same strand, that is returned. Otherwise 1033 for mixed strands, this returns None. 1034 1035 >>> from Bio.SeqFeature import FeatureLocation, CompoundLocation 1036 >>> f1 = FeatureLocation(15, 17, strand=1) 1037 >>> f2 = FeatureLocation(20, 30, strand=-1) 1038 >>> f = f1 + f2 1039 >>> f1.strand 1040 1 1041 >>> f2.strand 1042 -1 1043 >>> f.strand 1044 >>> f.strand is None 1045 True 1046 1047 If you set the strand of a CompoundLocation, this is applied to 1048 all the parts - use with caution: 1049 1050 >>> f.strand = 1 1051 >>> f1.strand 1052 1 1053 >>> f2.strand 1054 1 1055 >>> f.strand 1056 1 1057 1058 """) 1059
1060 - def __add__(self, other):
1061 """Combine locations, or shift the location by an integer offset. 1062 1063 >>> from Bio.SeqFeature import FeatureLocation, CompoundLocation 1064 >>> f1 = FeatureLocation(15, 17) + FeatureLocation(20, 30) 1065 >>> print(f1) 1066 join{[15:17], [20:30]} 1067 1068 You can add another FeatureLocation: 1069 1070 >>> print(f1 + FeatureLocation(40, 50)) 1071 join{[15:17], [20:30], [40:50]} 1072 >>> print(FeatureLocation(5, 10) + f1) 1073 join{[5:10], [15:17], [20:30]} 1074 1075 You can also add another CompoundLocation: 1076 1077 >>> f2 = FeatureLocation(40, 50) + FeatureLocation(60, 70) 1078 >>> print(f2) 1079 join{[40:50], [60:70]} 1080 >>> print(f1 + f2) 1081 join{[15:17], [20:30], [40:50], [60:70]} 1082 1083 Also, as with the FeatureLocation, adding an integer shifts the 1084 location's co-ordinates by that offset: 1085 1086 >>> print(f1 + 100) 1087 join{[115:117], [120:130]} 1088 >>> print(200 + f1) 1089 join{[215:217], [220:230]} 1090 >>> print(f1 + (-5)) 1091 join{[10:12], [15:25]} 1092 """ 1093 if isinstance(other, FeatureLocation): 1094 return CompoundLocation(self.parts + [other], self.operator) 1095 elif isinstance(other, CompoundLocation): 1096 if self.operator != other.operator: 1097 #Handle join+order -> order as a special case? 1098 raise ValueError("Mixed operators %s and %s" \ 1099 % (self.operator, other.operator)) 1100 return CompoundLocation(self.parts + other.parts, self.operator) 1101 elif isinstance(other, int): 1102 return self._shift(other) 1103 else: 1104 raise NotImplementedError
1105
1106 - def __radd__(self, other):
1107 """Combine locations.""" 1108 if isinstance(other, FeatureLocation): 1109 return CompoundLocation([other] + self.parts, self.operator) 1110 elif isinstance(other, int): 1111 return self._shift(other) 1112 else: 1113 raise NotImplementedError
1114 1115
1116 - def __contains__(self, value):
1117 """Check if an integer position is within the location.""" 1118 for loc in self.parts: 1119 if value in loc: 1120 return True 1121 return False
1122
1123 - def __nonzero__(self):
1124 """Returns True regardless of the length of the feature. 1125 1126 This behaviour is for backwards compatibility, since until the 1127 __len__ method was added, a FeatureLocation always evaluated as True. 1128 1129 Note that in comparison, Seq objects, strings, lists, etc, will all 1130 evaluate to False if they have length zero. 1131 1132 WARNING: The FeatureLocation may in future evaluate to False when its 1133 length is zero (in order to better match normal python behaviour)! 1134 """ 1135 return True
1136
1137 - def __len__(self):
1138 return sum(len(loc) for loc in self.parts)
1139
1140 - def __iter__(self):
1141 for loc in self.parts: 1142 for pos in loc: 1143 yield pos
1144
1145 - def _shift(self, offset):
1146 """Returns a copy of the location shifted by the offset (PRIVATE).""" 1147 return CompoundLocation([loc._shift(offset) for loc in self.parts], 1148 self.operator)
1149
1150 - def _flip(self, length):
1151 """Returns a copy of the location after the parent is reversed (PRIVATE). 1152 1153 Note that the order of the parts is reversed too. 1154 """ 1155 return CompoundLocation([loc._flip(length) for loc in self.parts[::-1]], 1156 self.operator)
1157 1158 @property
1159 - def start(self):
1160 """Start location (integer like, possibly a fuzzy position, read only).""" 1161 return min(loc.start for loc in self.parts)
1162 1163 @property
1164 - def end(self):
1165 """End location (integer like, possibly a fuzzy position, read only).""" 1166 return max(loc.end for loc in self.parts)
1167 1168 @property
1169 - def nofuzzy_start(self):
1170 """Start position (integer, approximated if fuzzy, read only) (OBSOLETE). 1171 1172 This is an alias for int(feature.start), which should be used in 1173 preference -- unless you are trying to support old versions of 1174 Biopython. 1175 """ 1176 try: 1177 return int(self.start) 1178 except TypeError: 1179 if isinstance(self.start, UnknownPosition): 1180 return None 1181 raise
1182 1183 @property
1184 - def nofuzzy_end(self):
1185 """End position (integer, approximated if fuzzy, read only) (OBSOLETE). 1186 1187 This is an alias for int(feature.end), which should be used in 1188 preference -- unless you are trying to support old versions of 1189 Biopython. 1190 """ 1191 try: 1192 return int(self.end) 1193 except TypeError: 1194 if isinstance(self.end, UnknownPosition): 1195 return None 1196 raise
1197 1198 @property
1199 - def ref(self):
1200 """CompoundLocation's don't have a ref (dummy method for API compatibility).""" 1201 return None
1202 1203 @property
1204 - def ref_db(self):
1205 """CompoundLocation's don't have a ref_db (dummy method for API compatibility).""" 1206 return None
1207
1208 - def extract(self, parent_sequence):
1209 """Extract feature sequence from the supplied parent sequence.""" 1210 #This copes with mixed strand features & all on reverse: 1211 parts = [loc.extract(parent_sequence) for loc in self.parts] 1212 #We use addition rather than a join to avoid alphabet issues: 1213 f_seq = parts[0] 1214 for part in parts[1:]: 1215 f_seq += part 1216 return f_seq
1217
1218 1219 -class AbstractPosition(object):
1220 """Abstract base class representing a position. 1221 """ 1222
1223 - def __repr__(self):
1224 """String representation of the location for debugging.""" 1225 return "%s(...)" % (self.__class__.__name__)
1226
1227 1228 -class ExactPosition(int, AbstractPosition):
1229 """Specify the specific position of a boundary. 1230 1231 o position - The position of the boundary. 1232 o extension - An optional argument which must be zero since we don't 1233 have an extension. The argument is provided so that the same number of 1234 arguments can be passed to all position types. 1235 1236 In this case, there is no fuzziness associated with the position. 1237 1238 >>> p = ExactPosition(5) 1239 >>> p 1240 ExactPosition(5) 1241 >>> print(p) 1242 5 1243 1244 >>> isinstance(p, AbstractPosition) 1245 True 1246 >>> isinstance(p, int) 1247 True 1248 1249 Integer comparisons and operations should work as expected: 1250 1251 >>> p == 5 1252 True 1253 >>> p < 6 1254 True 1255 >>> p <= 5 1256 True 1257 >>> p + 10 1258 15 1259 1260 """
1261 - def __new__(cls, position, extension = 0):
1262 if extension != 0: 1263 raise AttributeError("Non-zero extension %s for exact position." 1264 % extension) 1265 return int.__new__(cls, position)
1266
1267 - def __repr__(self):
1268 """String representation of the ExactPosition location for debugging.""" 1269 return "%s(%i)" % (self.__class__.__name__, int(self))
1270 1271 @property
1272 - def position(self):
1273 """Legacy attribute to get position as integer (OBSOLETE).""" 1274 return int(self)
1275 1276 @property
1277 - def extension(self):
1278 """Legacy attribute to get extension (zero) as integer (OBSOLETE).""" 1279 return 0
1280
1281 - def _shift(self, offset):
1282 #By default preserve any subclass 1283 return self.__class__(int(self) + offset)
1284
1285 - def _flip(self, length):
1286 #By default perserve any subclass 1287 return self.__class__(length - int(self))
1288
1289 1290 -class UncertainPosition(ExactPosition):
1291 """Specify a specific position which is uncertain. 1292 1293 This is used in UniProt, e.g. ?222 for uncertain position 222, or in the 1294 XML format explicitly marked as uncertain. Does not apply to GenBank/EMBL. 1295 """ 1296 pass
1297
1298 1299 -class UnknownPosition(AbstractPosition):
1300 """Specify a specific position which is unknown (has no position). 1301 1302 This is used in UniProt, e.g. ? or in the XML as unknown. 1303 """ 1304
1305 - def __repr__(self):
1306 """String representation of the UnknownPosition location for debugging.""" 1307 return "%s()" % self.__class__.__name__
1308
1309 - def __hash__(self):
1310 return hash(None)
1311 1312 @property
1313 - def position(self):
1314 """Legacy attribute to get position (None) (OBSOLETE).""" 1315 return None
1316 1317 @property
1318 - def extension(self):
1319 """Legacy attribute to get extension (zero) as integer (OBSOLETE).""" 1320 return 0
1321
1322 - def _shift(self, offset):
1323 return self
1324
1325 - def _flip(self, length):
1326 return self
1327
1328 1329 -class WithinPosition(int, AbstractPosition):
1330 """Specify the position of a boundary within some coordinates. 1331 1332 Arguments: 1333 o position - The default integer position 1334 o left - The start (left) position of the boundary 1335 o right - The end (right) position of the boundary 1336 1337 This allows dealing with a position like ((1.4)..100). This 1338 indicates that the start of the sequence is somewhere between 1 1339 and 4. Since this is a start coordinate, it should acts like 1340 it is at position 1 (or in Python counting, 0). 1341 1342 >>> p = WithinPosition(10, 10, 13) 1343 >>> p 1344 WithinPosition(10, left=10, right=13) 1345 >>> print(p) 1346 (10.13) 1347 >>> int(p) 1348 10 1349 1350 Basic integer comparisons and operations should work as though 1351 this were a plain integer: 1352 1353 >>> p == 10 1354 True 1355 >>> p in [9, 10, 11] 1356 True 1357 >>> p < 11 1358 True 1359 >>> p + 10 1360 20 1361 1362 >>> isinstance(p, WithinPosition) 1363 True 1364 >>> isinstance(p, AbstractPosition) 1365 True 1366 >>> isinstance(p, int) 1367 True 1368 1369 Note this also applies for comparison to other position objects, 1370 where again the integer behaviour is used: 1371 1372 >>> p == 10 1373 True 1374 >>> p == ExactPosition(10) 1375 True 1376 >>> p == BeforePosition(10) 1377 True 1378 >>> p == AfterPosition(10) 1379 True 1380 1381 If this were an end point, you would want the position to be 13: 1382 1383 >>> p2 = WithinPosition(13, 10, 13) 1384 >>> p2 1385 WithinPosition(13, left=10, right=13) 1386 >>> print(p2) 1387 (10.13) 1388 >>> int(p2) 1389 13 1390 >>> p2 == 13 1391 True 1392 >>> p2 == ExactPosition(13) 1393 True 1394 1395 The old legacy properties of position and extension give the 1396 starting/lower/left position as an integer, and the distance 1397 to the ending/higher/right position as an integer. Note that 1398 the position object will act like either the left or the right 1399 end-point depending on how it was created: 1400 1401 >>> p.position == p2.position == 10 1402 True 1403 >>> p.extension == p2.extension == 3 1404 True 1405 >>> int(p) == int(p2) 1406 False 1407 >>> p == 10 1408 True 1409 >>> p2 == 13 1410 True 1411 1412 """
1413 - def __new__(cls, position, left, right):
1414 assert position==left or position==right, \ 1415 "WithinPosition: %r should match left %r or right %r" \ 1416 (position, left, right) 1417 obj = int.__new__(cls, position) 1418 obj._left = left 1419 obj._right = right 1420 return obj
1421
1422 - def __repr__(self):
1423 """String representation of the WithinPosition location for debugging.""" 1424 return "%s(%i, left=%i, right=%i)" \ 1425 % (self.__class__.__name__, int(self), 1426 self._left, self._right)
1427
1428 - def __str__(self):
1429 return "(%s.%s)" % (self._left, self._right)
1430 1431 @property
1432 - def position(self):
1433 """Legacy attribute to get (left) position as integer (OBSOLETE).""" 1434 return self._left
1435 1436 @property
1437 - def extension(self):
1438 """Legacy attribute to get extension (from left to right) as an integer (OBSOLETE).""" 1439 return self._right - self._left
1440
1441 - def _shift(self, offset):
1442 return self.__class__(int(self) + offset, 1443 self._left + offset, 1444 self._right + offset)
1445
1446 - def _flip(self, length):
1447 return self.__class__(length - int(self), 1448 length - self._right, 1449 length - self._left)
1450
1451 1452 -class BetweenPosition(int, AbstractPosition):
1453 """Specify the position of a boundary between two coordinates (OBSOLETE?). 1454 1455 Arguments: 1456 o position - The default integer position 1457 o left - The start (left) position of the boundary 1458 o right - The end (right) position of the boundary 1459 1460 This allows dealing with a position like 123^456. This 1461 indicates that the start of the sequence is somewhere between 1462 123 and 456. It is up to the parser to set the position argument 1463 to either boundary point (depending on if this is being used as 1464 a start or end of the feature). For example as a feature end: 1465 1466 >>> p = BetweenPosition(456, 123, 456) 1467 >>> p 1468 BetweenPosition(456, left=123, right=456) 1469 >>> print(p) 1470 (123^456) 1471 >>> int(p) 1472 456 1473 1474 Integer equality and comparison use the given position, 1475 1476 >>> p == 456 1477 True 1478 >>> p in [455, 456, 457] 1479 True 1480 >>> p > 300 1481 True 1482 1483 The old legacy properties of position and extension give the 1484 starting/lower/left position as an integer, and the distance 1485 to the ending/higher/right position as an integer. Note that 1486 the position object will act like either the left or the right 1487 end-point depending on how it was created: 1488 1489 >>> p2 = BetweenPosition(123, left=123, right=456) 1490 >>> p.position == p2.position == 123 1491 True 1492 >>> p.extension 1493 333 1494 >>> p2.extension 1495 333 1496 >>> p.extension == p2.extension == 333 1497 True 1498 >>> int(p) == int(p2) 1499 False 1500 >>> p == 456 1501 True 1502 >>> p2 == 123 1503 True 1504 1505 Note this potentially surprising behaviour: 1506 1507 >>> BetweenPosition(123, left=123, right=456) == ExactPosition(123) 1508 True 1509 >>> BetweenPosition(123, left=123, right=456) == BeforePosition(123) 1510 True 1511 >>> BetweenPosition(123, left=123, right=456) == AfterPosition(123) 1512 True 1513 1514 i.e. For equality (and sorting) the position objects behave like 1515 integers. 1516 """
1517 - def __new__(cls, position, left, right):
1518 assert position==left or position==right 1519 obj = int.__new__(cls, position) 1520 obj._left = left 1521 obj._right = right 1522 return obj
1523
1524 - def __repr__(self):
1525 """String representation of the WithinPosition location for debugging.""" 1526 return "%s(%i, left=%i, right=%i)" \ 1527 % (self.__class__.__name__, int(self), 1528 self._left, self._right)
1529
1530 - def __str__(self):
1531 return "(%s^%s)" % (self._left, self._right)
1532 1533 @property
1534 - def position(self):
1535 """Legacy attribute to get (left) position as integer (OBSOLETE).""" 1536 return self._left
1537 1538 @property
1539 - def extension(self):
1540 """Legacy attribute to get extension (from left to right) as an integer (OBSOLETE).""" 1541 return self._right - self._left
1542
1543 - def _shift(self, offset):
1544 return self.__class__(int(self) + offset, 1545 self._left + offset, 1546 self._right + offset)
1547
1548 - def _flip(self, length):
1549 return self.__class__(length - int(self), 1550 length - self._right, 1551 length - self._left)
1552
1553 1554 -class BeforePosition(int, AbstractPosition):
1555 """Specify a position where the actual location occurs before it. 1556 1557 Arguments: 1558 o position - The upper boundary of where the location can occur. 1559 o extension - An optional argument which must be zero since we don't 1560 have an extension. The argument is provided so that the same number of 1561 arguments can be passed to all position types. 1562 1563 This is used to specify positions like (<10..100) where the location 1564 occurs somewhere before position 10. 1565 1566 >>> p = BeforePosition(5) 1567 >>> p 1568 BeforePosition(5) 1569 >>> print(p) 1570 <5 1571 >>> int(p) 1572 5 1573 >>> p + 10 1574 15 1575 1576 Note this potentially surprising behaviour: 1577 1578 >>> p == ExactPosition(5) 1579 True 1580 >>> p == AfterPosition(5) 1581 True 1582 1583 Just remember that for equality and sorting the position objects act 1584 like integers. 1585 """ 1586 #Subclasses int so can't use __init__
1587 - def __new__(cls, position, extension = 0):
1588 if extension != 0: 1589 raise AttributeError("Non-zero extension %s for exact position." 1590 % extension) 1591 return int.__new__(cls, position)
1592 1593 @property
1594 - def position(self):
1595 """Legacy attribute to get position as integer (OBSOLETE).""" 1596 return int(self)
1597 1598 @property
1599 - def extension(self):
1600 """Legacy attribute to get extension (zero) as integer (OBSOLETE).""" 1601 return 0
1602
1603 - def __repr__(self):
1604 """A string representation of the location for debugging.""" 1605 return "%s(%i)" % (self.__class__.__name__, int(self))
1606
1607 - def __str__(self):
1608 return "<%s" % self.position
1609
1610 - def _shift(self, offset):
1611 return self.__class__(int(self) + offset)
1612
1613 - def _flip(self, length):
1614 return AfterPosition(length - int(self))
1615
1616 1617 -class AfterPosition(int, AbstractPosition):
1618 """Specify a position where the actual location is found after it. 1619 1620 Arguments: 1621 o position - The lower boundary of where the location can occur. 1622 o extension - An optional argument which must be zero since we don't 1623 have an extension. The argument is provided so that the same number of 1624 arguments can be passed to all position types. 1625 1626 This is used to specify positions like (>10..100) where the location 1627 occurs somewhere after position 10. 1628 1629 >>> p = AfterPosition(7) 1630 >>> p 1631 AfterPosition(7) 1632 >>> print(p) 1633 >7 1634 >>> int(p) 1635 7 1636 >>> p + 10 1637 17 1638 1639 >>> isinstance(p, AfterPosition) 1640 True 1641 >>> isinstance(p, AbstractPosition) 1642 True 1643 >>> isinstance(p, int) 1644 True 1645 1646 Note this potentially surprising behaviour: 1647 1648 >>> p == ExactPosition(7) 1649 True 1650 >>> p == BeforePosition(7) 1651 True 1652 1653 Just remember that for equality and sorting the position objects act 1654 like integers. 1655 """ 1656 #Subclasses int so can't use __init__
1657 - def __new__(cls, position, extension = 0):
1658 if extension != 0: 1659 raise AttributeError("Non-zero extension %s for exact position." 1660 % extension) 1661 return int.__new__(cls, position)
1662 1663 @property
1664 - def position(self):
1665 """Legacy attribute to get position as integer (OBSOLETE).""" 1666 return int(self)
1667 1668 @property
1669 - def extension(self):
1670 """Legacy attribute to get extension (zero) as integer (OBSOLETE).""" 1671 return 0
1672
1673 - def __repr__(self):
1674 """A string representation of the location for debugging.""" 1675 return "%s(%i)" % (self.__class__.__name__, int(self))
1676
1677 - def __str__(self):
1678 return ">%s" % self.position
1679
1680 - def _shift(self, offset):
1681 return self.__class__(int(self) + offset)
1682
1683 - def _flip(self, length):
1684 return BeforePosition(length - int(self))
1685
1686 1687 -class OneOfPosition(int, AbstractPosition):
1688 """Specify a position where the location can be multiple positions. 1689 1690 This models the GenBank 'one-of(1888,1901)' function, and tries 1691 to make this fit within the Biopython Position models. If this was 1692 a start position it should act like 1888, but as an end position 1901. 1693 1694 >>> p = OneOfPosition(1888, [ExactPosition(1888), ExactPosition(1901)]) 1695 >>> p 1696 OneOfPosition(1888, choices=[ExactPosition(1888), ExactPosition(1901)]) 1697 >>> int(p) 1698 1888 1699 1700 Interget comparisons and operators act like using int(p), 1701 1702 >>> p == 1888 1703 True 1704 >>> p <= 1888 1705 True 1706 >>> p > 1888 1707 False 1708 >>> p + 100 1709 1988 1710 1711 >>> isinstance(p, OneOfPosition) 1712 True 1713 >>> isinstance(p, AbstractPosition) 1714 True 1715 >>> isinstance(p, int) 1716 True 1717 1718 The old legacy properties of position and extension give the 1719 starting/lowest/left-most position as an integer, and the 1720 distance to the ending/highest/right-most position as an integer. 1721 Note that the position object will act like one of the list of 1722 possible locations depending on how it was created: 1723 1724 >>> p2 = OneOfPosition(1901, [ExactPosition(1888), ExactPosition(1901)]) 1725 >>> p.position == p2.position == 1888 1726 True 1727 >>> p.extension == p2.extension == 13 1728 True 1729 >>> int(p) == int(p2) 1730 False 1731 >>> p == 1888 1732 True 1733 >>> p2 == 1901 1734 True 1735 1736 """
1737 - def __new__(cls, position, choices):
1738 """Initialize with a set of posssible positions. 1739 1740 position_list is a list of AbstractPosition derived objects, 1741 specifying possible locations. 1742 1743 position is an integer specifying the default behaviour. 1744 """ 1745 assert position in choices, \ 1746 "OneOfPosition: %r should match one of %r" % (position, choices) 1747 obj = int.__new__(cls, position) 1748 obj.position_choices = choices 1749 return obj
1750 1751 @property
1752 - def position(self):
1753 """Legacy attribute to get (left) position as integer (OBSOLETE).""" 1754 return min(int(pos) for pos in self.position_choices)
1755 1756 @property
1757 - def extension(self):
1758 """Legacy attribute to get extension as integer (OBSOLETE).""" 1759 positions = [int(pos) for pos in self.position_choices] 1760 return max(positions) - min(positions)
1761
1762 - def __repr__(self):
1763 """String representation of the OneOfPosition location for debugging.""" 1764 return "%s(%i, choices=%r)" % (self.__class__.__name__, 1765 int(self), self.position_choices)
1766
1767 - def __str__(self):
1768 out = "one-of(" 1769 for position in self.position_choices: 1770 out += "%s," % position 1771 # replace the last comma with the closing parenthesis 1772 out = out[:-1] + ")" 1773 return out
1774
1775 - def _shift(self, offset):
1776 return self.__class__(int(self) + offset, 1777 [p._shift(offset) for p in self.position_choices])
1778
1779 - def _flip(self, length):
1780 return self.__class__(length - int(self), 1781 [p._flip(length) for p in self.position_choices[::-1]])
1782
1783 1784 -class PositionGap(object):
1785 """Simple class to hold information about a gap between positions. 1786 """
1787 - def __init__(self, gap_size):
1788 """Intialize with a position object containing the gap information. 1789 """ 1790 self.gap_size = gap_size
1791
1792 - def __repr__(self):
1793 """A string representation of the position gap for debugging.""" 1794 return "%s(%s)" % (self.__class__.__name__, repr(self.gap_size))
1795
1796 - def __str__(self):
1797 out = "gap(%s)" % self.gap_size 1798 return out
1799 1800 1801 if __name__ == "__main__": 1802 from Bio._utils import run_doctest 1803 run_doctest() 1804