Package Bio :: Package Phylo :: Module BaseTree
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo.BaseTree

   1  # Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com) 
   2  # This code is part of the Biopython distribution and governed by its 
   3  # license. Please see the LICENSE file that should have been included 
   4  # as part of this package. 
   5   
   6  """Base classes for Bio.Phylo objects. 
   7   
   8  All object representations for phylogenetic trees should derive from these base 
   9  classes in order to use the common methods defined on them. 
  10  """ 
  11  __docformat__ = "restructuredtext en" 
  12   
  13  import collections 
  14  import copy 
  15  import itertools 
  16  import random 
  17  import re 
  18   
  19  from Bio.Phylo import _sugar 
20 21 # General tree-traversal algorithms 22 23 -def _level_traverse(root, get_children):
24 """Traverse a tree in breadth-first (level) order.""" 25 Q = collections.deque([root]) 26 while Q: 27 v = Q.popleft() 28 yield v 29 Q.extend(get_children(v))
30
31 -def _preorder_traverse(root, get_children):
32 """Traverse a tree in depth-first pre-order (parent before children).""" 33 def dfs(elem): 34 yield elem 35 for v in get_children(elem): 36 for u in dfs(v): 37 yield u
38 for elem in dfs(root): 39 yield elem 40
41 -def _postorder_traverse(root, get_children):
42 """Traverse a tree in depth-first post-order (children before parent).""" 43 def dfs(elem): 44 for v in get_children(elem): 45 for u in dfs(v): 46 yield u 47 yield elem
48 for elem in dfs(root): 49 yield elem 50
51 52 -def _sorted_attrs(elem):
53 """Get a flat list of elem's attributes, sorted for consistency.""" 54 singles = [] 55 lists = [] 56 # Sort attributes for consistent results 57 for attrname, child in sorted(elem.__dict__.iteritems(), 58 key=lambda kv: kv[0]): 59 if child is None: 60 continue 61 if isinstance(child, list): 62 lists.extend(child) 63 else: 64 singles.append(child) 65 return (x for x in singles + lists 66 if isinstance(x, TreeElement))
67
68 69 # Factory functions to generalize searching for clades/nodes 70 71 -def _identity_matcher(target):
72 """Match a node to the target object by identity.""" 73 def match(node): 74 return (node is target)
75 return match 76
77 -def _class_matcher(target_cls):
78 """Match a node if it's an instance of the given class.""" 79 def match(node): 80 return isinstance(node, target_cls)
81 return match 82
83 -def _string_matcher(target):
84 def match(node): 85 return unicode(node) == target
86 return match 87
88 -def _attribute_matcher(kwargs):
89 """Match a node by specified attribute values. 90 91 ``terminal`` is a special case: True restricts the search to external (leaf) 92 nodes, False restricts to internal nodes, and None allows all tree elements 93 to be searched, including phyloXML annotations. 94 95 Otherwise, for a tree element to match the specification (i.e. for the 96 function produced by `_attribute_matcher` to return True when given a tree 97 element), it must have each of the attributes specified by the keys and 98 match each of the corresponding values -- think 'and', not 'or', for 99 multiple keys. 100 """ 101 def match(node): 102 if 'terminal' in kwargs: 103 # Special case: restrict to internal/external/any nodes 104 kwa_copy = kwargs.copy() 105 pattern = kwa_copy.pop('terminal') 106 if (pattern is not None and 107 (not hasattr(node, 'is_terminal') or 108 node.is_terminal() != pattern)): 109 return False 110 else: 111 kwa_copy = kwargs 112 for key, pattern in kwa_copy.iteritems(): 113 # Nodes must match all other specified attributes 114 if not hasattr(node, key): 115 return False 116 target = getattr(node, key) 117 if isinstance(pattern, basestring): 118 return (isinstance(target, basestring) and 119 re.match(pattern+'$', target)) 120 if isinstance(pattern, bool): 121 return (pattern == bool(target)) 122 if isinstance(pattern, int): 123 return (pattern == target) 124 if pattern is None: 125 return (target is None) 126 raise TypeError('invalid query type: %s' % type(pattern)) 127 return True
128 return match 129
130 -def _function_matcher(matcher_func):
131 """Safer attribute lookup -- returns False instead of raising an error.""" 132 def match(node): 133 try: 134 return matcher_func(node) 135 except (LookupError, AttributeError, ValueError, TypeError): 136 return False
137 return match 138
139 -def _object_matcher(obj):
140 """Retrieve a matcher function by passing an arbitrary object. 141 142 i.e. passing a `TreeElement` such as a `Clade` or `Tree` instance returns an 143 identity matcher, passing a type such as the `PhyloXML.Taxonomy` class 144 returns a class matcher, and passing a dictionary returns an attribute 145 matcher. 146 147 The resulting 'match' function returns True when given an object matching 148 the specification (identity, type or attribute values), otherwise False. 149 This is useful for writing functions that search the tree, and probably 150 shouldn't be used directly by the end user. 151 """ 152 if isinstance(obj, TreeElement): 153 return _identity_matcher(obj) 154 if isinstance(obj, type): 155 return _class_matcher(obj) 156 if isinstance(obj, basestring): 157 return _string_matcher(obj) 158 if isinstance(obj, dict): 159 return _attribute_matcher(obj) 160 if callable(obj): 161 return _function_matcher(obj) 162 raise ValueError("%s (type %s) is not a valid type for comparison." 163 % (obj, type(obj)))
164
165 166 -def _combine_matchers(target, kwargs, require_spec):
167 """Merge target specifications with keyword arguments. 168 169 Dispatch the components to the various matcher functions, then merge into a 170 single boolean function. 171 """ 172 if not target: 173 if not kwargs: 174 if require_spec: 175 raise ValueError("you must specify a target object or keyword " 176 "arguments.") 177 return lambda x: True 178 return _attribute_matcher(kwargs) 179 match_obj = _object_matcher(target) 180 if not kwargs: 181 return match_obj 182 match_kwargs = _attribute_matcher(kwargs) 183 return (lambda x: match_obj(x) and match_kwargs(x))
184
185 186 -def _combine_args(first, *rest):
187 """Convert ``[targets]`` or ``*targets`` arguments to a single iterable. 188 189 This helps other functions work like the built-in functions `max` and 190 `min`. 191 """ 192 # Background: is_monophyletic takes a single list or iterable (like the 193 # same method in Bio.Nexus.Trees); root_with_outgroup and common_ancestor 194 # take separate arguments. This mismatch was in the initial release and I 195 # didn't notice the inconsistency until after Biopython 1.55. I can think 196 # of cases where either style is more convenient, so let's support both 197 # (for backward compatibility and consistency between methods). 198 if hasattr(first, '__iter__') and not (isinstance(first, TreeElement) or 199 isinstance(first, type) or isinstance(first, basestring) or 200 isinstance(first, dict)): 201 # `terminals` is an iterable of targets 202 if rest: 203 raise ValueError("Arguments must be either a single list of " 204 "targets, or separately specified targets " 205 "(e.g. foo(t1, t2, t3)), but not both.") 206 return first 207 # `terminals` is a single target -- wrap in a container 208 return itertools.chain([first], rest)
209
210 211 # Class definitions 212 213 -class TreeElement(object):
214 """Base class for all Bio.Phylo classes.""" 215
216 - def __repr__(self):
217 """Show this object's constructor with its primitive arguments.""" 218 def pair_as_kwarg_string(key, val): 219 if isinstance(val, basestring): 220 return "%s='%s'" % (key, _sugar.trim_str(unicode(val))) 221 return "%s=%s" % (key, val)
222 return u'%s(%s)' % (self.__class__.__name__, 223 ', '.join(pair_as_kwarg_string(key, val) 224 for key, val in self.__dict__.iteritems() 225 if val is not None and 226 type(val) in (str, int, float, bool, unicode) 227 ))
228 229 __str__ = __repr__ 230
231 232 -class TreeMixin(object):
233 """Methods for Tree- and Clade-based classes. 234 235 This lets `Tree` and `Clade` support the same traversal and searching 236 operations without requiring Clade to inherit from Tree, so Clade isn't 237 required to have all of Tree's attributes -- just ``root`` (a Clade 238 instance) and ``is_terminal``. 239 """ 240 # Traversal methods 241
242 - def _filter_search(self, filter_func, order, follow_attrs):
243 """Perform a BFS or DFS traversal through all elements in this tree. 244 245 :returns: generator of all elements for which `filter_func` is True. 246 """ 247 order_opts = {'preorder': _preorder_traverse, 248 'postorder': _postorder_traverse, 249 'level': _level_traverse} 250 try: 251 order_func = order_opts[order] 252 except KeyError: 253 raise ValueError("Invalid order '%s'; must be one of: %s" 254 % (order, tuple(order_opts.keys()))) 255 if follow_attrs: 256 get_children = _sorted_attrs 257 root = self 258 else: 259 get_children = lambda elem: elem.clades 260 root = self.root 261 return itertools.ifilter(filter_func, order_func(root, get_children))
262
263 - def find_any(self, *args, **kwargs):
264 """Return the first element found by find_elements(), or None. 265 266 This is also useful for checking whether any matching element exists in 267 the tree, and can be used in a conditional expression. 268 """ 269 hits = self.find_elements(*args, **kwargs) 270 try: 271 return hits.next() 272 except StopIteration: 273 return None
274
275 - def find_elements(self, target=None, terminal=None, order='preorder', 276 **kwargs):
277 """Find all tree elements matching the given attributes. 278 279 The arbitrary keyword arguments indicate the attribute name of the 280 sub-element and the value to match: string, integer or boolean. Strings 281 are evaluated as regular expression matches; integers are compared 282 directly for equality, and booleans evaluate the attribute's truth value 283 (True or False) before comparing. To handle nonzero floats, search with 284 a boolean argument, then filter the result manually. 285 286 If no keyword arguments are given, then just the class type is used for 287 matching. 288 289 The result is an iterable through all matching objects, by depth-first 290 search. (Not necessarily the same order as the elements appear in the 291 source file!) 292 293 :Parameters: 294 target : TreeElement instance, type, dict, or callable 295 Specifies the characteristics to search for. (The default, 296 TreeElement, matches any standard Bio.Phylo type.) 297 terminal : bool 298 A boolean value to select for or against terminal nodes (a.k.a. 299 leaf nodes). True searches for only terminal nodes, False 300 excludes terminal nodes, and the default, None, searches both 301 terminal and non-terminal nodes, as well as any tree elements 302 lacking the ``is_terminal`` method. 303 order : {'preorder', 'postorder', 'level'} 304 Tree traversal order: 'preorder' (default) is depth-first 305 search, 'postorder' is DFS with child nodes preceding parents, 306 and 'level' is breadth-first search. 307 308 Example 309 ------- 310 311 >>> from Bio.Phylo.IO import PhyloXMIO 312 >>> phx = PhyloXMLIO.read('phyloxml_examples.xml') 313 >>> matches = phx.phylogenies[5].find_elements(code='OCTVU') 314 >>> matches.next() 315 Taxonomy(code='OCTVU', scientific_name='Octopus vulgaris') 316 317 """ 318 if terminal is not None: 319 kwargs['terminal'] = terminal 320 is_matching_elem = _combine_matchers(target, kwargs, False) 321 return self._filter_search(is_matching_elem, order, True)
322
323 - def find_clades(self, target=None, terminal=None, order='preorder', 324 **kwargs):
325 """Find each clade containing a matching element. 326 327 That is, find each element as with find_elements(), but return the 328 corresponding clade object. (This is usually what you want.) 329 330 :returns: an iterable through all matching objects, searching 331 depth-first (preorder) by default. 332 """ 333 def match_attrs(elem): 334 orig_clades = elem.__dict__.pop('clades') 335 found = elem.find_any(target, **kwargs) 336 elem.clades = orig_clades 337 return (found is not None)
338 if terminal is None: 339 is_matching_elem = match_attrs 340 else: 341 def is_matching_elem(elem): 342 return ((elem.is_terminal() == terminal) and 343 match_attrs(elem))
344 return self._filter_search(is_matching_elem, order, False) 345
346 - def get_path(self, target=None, **kwargs):
347 """List the clades directly between this root and the given target. 348 349 :returns: list of all clade objects along this path, ending with the 350 given target, but excluding the root clade. 351 """ 352 # Only one path will work -- ignore weights and visits 353 path = [] 354 match = _combine_matchers(target, kwargs, True) 355 def check_in_path(v): 356 if match(v): 357 path.append(v) 358 return True 359 elif v.is_terminal(): 360 return False 361 for child in v: 362 if check_in_path(child): 363 path.append(v) 364 return True 365 return False
366 if not check_in_path(self.root): 367 return None 368 return path[-2::-1] 369
370 - def get_nonterminals(self, order='preorder'):
371 """Get a list of all of this tree's nonterminal (internal) nodes.""" 372 return list(self.find_clades(terminal=False, order=order))
373
374 - def get_terminals(self, order='preorder'):
375 """Get a list of all of this tree's terminal (leaf) nodes.""" 376 return list(self.find_clades(terminal=True, order=order))
377
378 - def trace(self, start, finish):
379 """List of all clade object between two targets in this tree. 380 381 Excluding `start`, including `finish`. 382 """ 383 mrca = self.common_ancestor(start, finish) 384 fromstart = mrca.get_path(start)[-2::-1] 385 to = mrca.get_path(finish) 386 return fromstart + [mrca] + to
387 388 # Information methods 389
390 - def common_ancestor(self, targets, *more_targets):
391 """Most recent common ancestor (clade) of all the given targets. 392 393 Edge cases: 394 - If no target is given, returns self.root 395 - If 1 target is given, returns the target 396 - If any target is not found in this tree, raises a ValueError 397 """ 398 paths = [self.get_path(t) 399 for t in _combine_args(targets, *more_targets)] 400 # Validation -- otherwise izip throws a spooky error below 401 for p, t in zip(paths, targets): 402 if p is None: 403 raise ValueError("target %s is not in this tree" % repr(t)) 404 mrca = self.root 405 for level in itertools.izip(*paths): 406 ref = level[0] 407 for other in level[1:]: 408 if ref is not other: 409 break 410 else: 411 mrca = ref 412 if ref is not mrca: 413 break 414 return mrca
415
416 - def count_terminals(self):
417 """Counts the number of terminal (leaf) nodes within this tree.""" 418 return _sugar.iterlen(self.find_clades(terminal=True))
419
420 - def depths(self, unit_branch_lengths=False):
421 """Create a mapping of tree clades to depths (by branch length). 422 423 :Parameters: 424 unit_branch_lengths : bool 425 If True, count only the number of branches (levels in the tree). 426 By default the distance is the cumulative branch length leading 427 to the clade. 428 429 :returns: dict of {clade: depth}, where keys are all of the Clade 430 instances in the tree, and values are the distance from the root to 431 each clade (including terminals). 432 """ 433 if unit_branch_lengths: 434 depth_of = lambda c: 1 435 else: 436 depth_of = lambda c: c.branch_length or 0 437 depths = {} 438 def update_depths(node, curr_depth): 439 depths[node] = curr_depth 440 for child in node.clades: 441 new_depth = curr_depth + depth_of(child) 442 update_depths(child, new_depth)
443 update_depths(self.root, 0) 444 return depths 445
446 - def distance(self, target1, target2=None):
447 """Calculate the sum of the branch lengths between two targets. 448 449 If only one target is specified, the other is the root of this tree. 450 """ 451 if target2 is None: 452 return sum(n.branch_length for n in self.get_path(target1) 453 if n.branch_length is not None) 454 mrca = self.common_ancestor(target1, target2) 455 return mrca.distance(target1) + mrca.distance(target2)
456
457 - def is_bifurcating(self):
458 """Return True if tree downstream of node is strictly bifurcating. 459 460 I.e., all nodes have either 2 or 0 children (internal or external, 461 respectively). The root may have 3 descendents and still be considered 462 part of a bifurcating tree, because it has no ancestor. 463 """ 464 # Root can be trifurcating 465 if isinstance(self, Tree) and len(self.root) == 3: 466 return (self.root.clades[0].is_bifurcating() and 467 self.root.clades[1].is_bifurcating() and 468 self.root.clades[2].is_bifurcating()) 469 if len(self.root) == 2: 470 return (self.root.clades[0].is_bifurcating() and 471 self.root.clades[1].is_bifurcating()) 472 if len(self.root) == 0: 473 return True 474 return False
475
476 - def is_monophyletic(self, terminals, *more_terminals):
477 """MRCA of terminals if they comprise a complete subclade, or False. 478 479 I.e., there exists a clade such that its terminals are the same set as 480 the given targets. 481 482 The given targets must be terminals of the tree. 483 484 To match both `Bio.Nexus.Trees` and the other multi-target methods in 485 Bio.Phylo, arguments to this method can be specified either of two ways: 486 (i) as a single list of targets, or (ii) separately specified targets, 487 e.g. is_monophyletic(t1, t2, t3) -- but not both. 488 489 For convenience, this method returns the common ancestor (MCRA) of the 490 targets if they are monophyletic (instead of the value True), and False 491 otherwise. 492 493 :returns: common ancestor if terminals are monophyletic, otherwise False. 494 """ 495 target_set = set(_combine_args(terminals, *more_terminals)) 496 current = self.root 497 while True: 498 if set(current.get_terminals()) == target_set: 499 return current 500 # Try a narrower subclade 501 for subclade in current.clades: 502 if set(subclade.get_terminals()).issuperset(target_set): 503 current = subclade 504 break 505 else: 506 return False
507
508 - def is_parent_of(self, target=None, **kwargs):
509 """True if target is a descendent of this tree. 510 511 Not required to be a direct descendent. 512 513 To check only direct descendents of a clade, simply use list membership 514 testing: ``if subclade in clade: ...`` 515 """ 516 return self.get_path(target, **kwargs) is not None
517
518 - def is_preterminal(self):
519 """True if all direct descendents are terminal.""" 520 if self.root.is_terminal(): 521 return False 522 for clade in self.root.clades: 523 if not clade.is_terminal(): 524 return False 525 return True
526
527 - def total_branch_length(self):
528 """Calculate the sum of all the branch lengths in this tree.""" 529 return sum(node.branch_length 530 for node in self.find_clades(branch_length=True))
531 532 # Tree manipulation methods 533
534 - def collapse(self, target=None, **kwargs):
535 """Deletes target from the tree, relinking its children to its parent. 536 537 :returns: the parent clade. 538 """ 539 path = self.get_path(target, **kwargs) 540 if not path: 541 raise ValueError("couldn't collapse %s in this tree" 542 % (target or kwargs)) 543 if len(path) == 1: 544 parent = self.root 545 else: 546 parent = path[-2] 547 popped = parent.clades.pop(parent.clades.index(path[-1])) 548 extra_length = popped.branch_length or 0 549 for child in popped: 550 child.branch_length += extra_length 551 parent.clades.extend(popped.clades) 552 return parent
553
554 - def collapse_all(self, target=None, **kwargs):
555 """Collapse all the descendents of this tree, leaving only terminals. 556 557 Total branch lengths are preserved, i.e. the distance to each terminal 558 stays the same. 559 560 For example, this will safely collapse nodes with poor bootstrap 561 support: 562 563 >>> tree.collapse_all(lambda c: c.confidence is not None and 564 ... c.confidence < 70) 565 566 This implementation avoids strange side-effects by using level-order 567 traversal and testing all clade properties (versus the target 568 specification) up front. In particular, if a clade meets the target 569 specification in the original tree, it will be collapsed. For example, 570 if the condition is: 571 572 >>> tree.collapse_all(lambda c: c.branch_length < 0.1) 573 574 Collapsing a clade's parent node adds the parent's branch length to the 575 child, so during the execution of collapse_all, a clade's branch_length 576 may increase. In this implementation, clades are collapsed according to 577 their properties in the original tree, not the properties when tree 578 traversal reaches the clade. (It's easier to debug.) If you want the 579 other behavior (incremental testing), modifying the source code of this 580 function is straightforward. 581 """ 582 # Read the iterable into a list to protect against in-place changes 583 matches = list(self.find_clades(target, False, 'level', **kwargs)) 584 if not matches: 585 # No matching nodes to collapse 586 return 587 # Skip the root node -- it can't be collapsed 588 if matches[0] == self.root: 589 matches.pop(0) 590 for clade in matches: 591 self.collapse(clade)
592
593 - def ladderize(self, reverse=False):
594 """Sort clades in-place according to the number of terminal nodes. 595 596 Deepest clades are last by default. Use ``reverse=True`` to sort clades 597 deepest-to-shallowest. 598 """ 599 self.root.clades.sort(key=lambda c: c.count_terminals(), 600 reverse=reverse) 601 for subclade in self.root.clades: 602 subclade.ladderize(reverse=reverse)
603
604 - def prune(self, target=None, **kwargs):
605 """Prunes a terminal clade from the tree. 606 607 If taxon is from a bifurcation, the connecting node will be collapsed 608 and its branch length added to remaining terminal node. This might be no 609 longer be a meaningful value. 610 611 :returns: parent clade of the pruned target 612 """ 613 if 'terminal' in kwargs and kwargs['terminal']: 614 raise ValueError("target must be terminal") 615 path = self.get_path(target, terminal=True, **kwargs) 616 if not path: 617 raise ValueError("can't find a matching target below this root") 618 if len(path) == 1: 619 parent = self.root 620 else: 621 parent = path[-2] 622 parent.clades.remove(path[-1]) 623 if len(parent) == 1: 624 # We deleted a branch from a bifurcation 625 if parent == self.root: 626 # If we're at the root, move the root upwards 627 # NB: This loses the length of the original branch 628 newroot = parent.clades[0] 629 newroot.branch_length = None 630 parent = self.root = newroot 631 else: 632 # If we're not at the root, collapse this parent 633 child = parent.clades[0] 634 if child.branch_length is not None: 635 child.branch_length += (parent.branch_length or 0.0) 636 if len(path) < 3: 637 grandparent = self.root 638 else: 639 grandparent = path[-3] 640 # Replace parent with child at the same place in grandparent 641 index = grandparent.clades.index(parent) 642 grandparent.clades.pop(index) 643 grandparent.clades.insert(index, child) 644 parent = grandparent 645 return parent
646
647 - def split(self, n=2, branch_length=1.0):
648 """Generate n (default 2) new descendants. 649 650 In a species tree, this is a speciation event. 651 652 New clades have the given branch_length and the same name as this 653 clade's root plus an integer suffix (counting from 0). For example, 654 splitting a clade named "A" produces sub-clades named "A0" and "A1". 655 """ 656 clade_cls = type(self.root) 657 base_name = self.root.name or '' 658 for i in range(n): 659 clade = clade_cls(name=base_name+str(i), 660 branch_length=branch_length) 661 self.root.clades.append(clade)
662
663 664 -class Tree(TreeElement, TreeMixin):
665 """A phylogenetic tree, containing global info for the phylogeny. 666 667 The structure and node-specific data is accessible through the 'root' 668 clade attached to the Tree instance. 669 670 :Parameters: 671 root : Clade 672 The starting node of the tree. If the tree is rooted, this will 673 usually be the root node. 674 rooted : bool 675 Whether or not the tree is rooted. By default, a tree is assumed to 676 be rooted. 677 id : str 678 The identifier of the tree, if there is one. 679 name : str 680 The name of the tree, in essence a label. 681 """
682 - def __init__(self, root=None, rooted=True, id=None, name=None):
683 self.root = root or Clade() 684 self.rooted = rooted 685 self.id = id 686 self.name = name
687 688 @classmethod
689 - def from_clade(cls, clade, **kwargs):
690 """Create a new Tree object given a clade. 691 692 Keyword arguments are the usual `Tree` constructor parameters. 693 """ 694 root = copy.deepcopy(clade) 695 return cls(root, **kwargs)
696 697 @classmethod
698 - def randomized(cls, taxa, branch_length=1.0, branch_stdev=None):
699 """Create a randomized bifurcating tree given a list of taxa. 700 701 :param taxa: Either an integer specifying the number of taxa to create 702 (automatically named taxon#), or an iterable of taxon names, as 703 strings. 704 705 :returns: a tree of the same type as this class. 706 """ 707 if isinstance(taxa, int): 708 taxa = ['taxon%s' % (i+1) for i in range(taxa)] 709 elif hasattr(taxa, '__iter__'): 710 taxa = list(taxa) 711 else: 712 raise TypeError("taxa argument must be integer (# taxa) or " 713 "iterable of taxon names.") 714 rtree = cls() 715 terminals = [rtree.root] 716 while len(terminals) < len(taxa): 717 newsplit = random.choice(terminals) 718 newterms = newsplit.split(branch_length=branch_length) 719 if branch_stdev: 720 # Add some noise to the branch lengths 721 for nt in newterms: 722 nt.branch_length = max(0, 723 random.gauss(branch_length, branch_stdev)) 724 terminals.remove(newsplit) 725 terminals.extend(newterms) 726 # Distribute taxon labels randomly 727 random.shuffle(taxa) 728 for node, name in zip(terminals, taxa): 729 node.name = name 730 return rtree
731 732 @property
733 - def clade(self):
734 """The first clade in this tree (not itself).""" 735 return self.root
736
737 - def as_phyloxml(self, **kwargs):
738 """Convert this tree to a PhyloXML-compatible Phylogeny. 739 740 This lets you use the additional annotation types PhyloXML defines, and 741 save this information when you write this tree as 'phyloxml'. 742 """ 743 from Bio.Phylo.PhyloXML import Phylogeny 744 return Phylogeny.from_tree(self, **kwargs)
745
746 - def root_with_outgroup(self, outgroup_targets, *more_targets):
747 """Reroot this tree with the outgroup clade containing outgroup_targets. 748 749 Operates in-place. 750 751 Edge cases: 752 753 - If ``outgroup == self.root``, no change 754 - If outgroup is terminal, create new bifurcating root node with a 755 0-length branch to the outgroup 756 - If outgroup is internal, use the given outgroup node as the new 757 trifurcating root, keeping branches the same 758 - If the original root was bifurcating, drop it from the tree, 759 preserving total branch lengths 760 """ 761 # This raises a ValueError if any target is not in this tree 762 # Otherwise, common_ancestor guarantees outgroup is in this tree 763 outgroup = self.common_ancestor(outgroup_targets, *more_targets) 764 outgroup_path = self.get_path(outgroup) 765 if len(outgroup_path) == 0: 766 # Outgroup is the current root -- no change 767 return 768 769 prev_blen = outgroup.branch_length 770 if outgroup.is_terminal(): 771 # Create a new root with a 0-length branch to the outgroup 772 outgroup.branch_length = 0.0 773 new_root = self.root.__class__( 774 branch_length=self.root.branch_length, clades=[outgroup]) 775 # The first branch reversal (see the upcoming loop) is modified 776 if len(outgroup_path) == 1: 777 # Trivial tree like '(A,B); 778 new_parent = new_root 779 else: 780 parent = outgroup_path.pop(-2) 781 parent.clades.pop(parent.clades.index(outgroup)) 782 prev_blen, parent.branch_length = parent.branch_length, prev_blen 783 new_root.clades.insert(0, parent) 784 new_parent = parent 785 else: 786 # Use the given outgroup node as the new (trifurcating) root 787 new_root = outgroup 788 new_root.branch_length = self.root.branch_length 789 new_parent = new_root 790 791 # Tracing the outgroup lineage backwards, reattach the subclades under a 792 # new root clade. Reverse the branches directly above the outgroup in 793 # the tree, but keep the descendants of those clades as they are. 794 for parent in outgroup_path[-2::-1]: 795 parent.clades.pop(parent.clades.index(new_parent)) 796 prev_blen, parent.branch_length = parent.branch_length, prev_blen 797 new_parent.clades.insert(0, parent) 798 new_parent = parent 799 800 # Finally, handle the original root according to number of descendents 801 old_root = self.root 802 if outgroup in old_root.clades: 803 assert len(outgroup_path) == 1 804 old_root.clades.pop(old_root.clades.index(outgroup)) 805 else: 806 old_root.clades.pop(old_root.clades.index(new_parent)) 807 if len(old_root) == 1: 808 # Delete the old bifurcating root & add branch lengths 809 ingroup = old_root.clades[0] 810 if ingroup.branch_length: 811 ingroup.branch_length += prev_blen 812 else: 813 ingroup.branch_length = prev_blen 814 new_parent.clades.insert(0, ingroup) 815 # ENH: If annotations are attached to old_root, do... something. 816 else: 817 # Keep the old trifurcating/polytomous root as an internal node 818 old_root.branch_length = prev_blen 819 new_parent.clades.insert(0, old_root) 820 821 self.root = new_root 822 self.rooted = True 823 return
824 825 # Method assumed by TreeMixin 826
827 - def is_terminal(self):
828 """True if the root of this tree is terminal.""" 829 return (not self.root.clades)
830 831 # Convention from SeqRecord and Alignment classes 832
833 - def __format__(self, format_spec):
834 """Serialize the tree as a string in the specified file format. 835 836 This method supports the ``format`` built-in function added in Python 837 2.6/3.0. 838 839 :param format_spec: a lower-case string supported by `Bio.Phylo.write` 840 as an output file format. 841 """ 842 if format_spec: 843 from StringIO import StringIO 844 from Bio.Phylo import _io 845 handle = StringIO() 846 _io.write([self], handle, format_spec) 847 return handle.getvalue() 848 else: 849 # Follow python convention and default to using __str__ 850 return str(self)
851
852 - def format(self, format):
853 """Serialize the tree as a string in the specified file format. 854 855 This duplicates the __format__ magic method for pre-2.6 Pythons. 856 """ 857 return self.__format__(format)
858 859 # Pretty-printer for the entire tree hierarchy 860
861 - def __str__(self):
862 """String representation of the entire tree. 863 864 Serializes each sub-clade recursively using ``repr`` to create a summary 865 of the object structure. 866 """ 867 TAB = ' ' 868 textlines = [] 869 def print_tree(obj, indent): 870 """Recursively serialize sub-elements. 871 872 This closes over textlines and modifies it in-place. 873 """ 874 textlines.append(TAB*indent + repr(obj)) 875 indent += 1 876 for attr in obj.__dict__: 877 child = getattr(obj, attr) 878 if isinstance(child, TreeElement): 879 print_tree(child, indent) 880 elif isinstance(child, list): 881 for elem in child: 882 if isinstance(elem, TreeElement): 883 print_tree(elem, indent)
884 print_tree(self, 0) 885 return '\n'.join(textlines)
886
887 888 -class Clade(TreeElement, TreeMixin):
889 """A recursively defined sub-tree. 890 891 :Parameters: 892 branch_length : str 893 The length of the branch leading to the root node of this clade. 894 name : str 895 The clade's name (a label). 896 clades : list 897 Sub-trees rooted directly under this tree's root. 898 confidence : number 899 Support. 900 color : BranchColor 901 The display color of the branch and descendents. 902 width : number 903 The display width of the branch and descendents. 904 """
905 - def __init__(self, branch_length=None, name=None, clades=None, 906 confidence=None, color=None, width=None):
907 self.branch_length = branch_length 908 self.name = name 909 self.clades = clades or [] 910 self.confidence = confidence 911 self.color = color 912 self.width = width
913 914 @property
915 - def root(self):
916 """Allow TreeMixin methods to traverse clades properly.""" 917 return self
918
919 - def is_terminal(self):
920 """True if this is a terminal (leaf) node.""" 921 return (not self.clades)
922 923 # Sequence-type behavior methods 924
925 - def __getitem__(self, index):
926 """Get clades by index (integer or slice).""" 927 if isinstance(index, int) or isinstance(index, slice): 928 return self.clades[index] 929 ref = self 930 for idx in index: 931 ref = ref[idx] 932 return ref
933
934 - def __iter__(self):
935 """Iterate through this tree's direct descendent clades (sub-trees).""" 936 return iter(self.clades)
937
938 - def __len__(self):
939 """Number of clades directy under the root.""" 940 return len(self.clades)
941
942 - def __nonzero__(self):
943 """Boolean value of an instance of this class. 944 945 NB: If this method is not defined, but ``__len__`` is, then the object 946 is considered true if the result of ``__len__()`` is nonzero. We want 947 Clade instances to always be considered True. 948 """ 949 return True
950
951 - def __str__(self):
952 if self.name: 953 return _sugar.trim_str(self.name, maxlen=40) 954 return self.__class__.__name__
955 956 # Syntax sugar for setting the branch color
957 - def _get_color(self):
958 return self._color
959
960 - def _set_color(self, arg):
961 if arg is None or isinstance(arg, BranchColor): 962 self._color = arg 963 elif isinstance(arg, basestring): 964 if arg in BranchColor.color_names: 965 # Known color name 966 self._color = BranchColor.from_name(arg) 967 elif arg.startswith('#') and len(arg) == 7: 968 # HTML-style hex string 969 self._color = BranchColor.from_hex(arg) 970 else: 971 raise ValueError("invalid color string %s" % arg) 972 elif hasattr(arg, '__iter__') and len(arg) == 3: 973 # RGB triplet 974 self._color = BranchColor(*arg) 975 else: 976 raise ValueError("invalid color value %s" % arg)
977 978 color = property(_get_color, _set_color, doc="Branch color.")
979
980 981 -class BranchColor(object):
982 """Indicates the color of a clade when rendered graphically. 983 984 The color should be interpreted by client code (e.g. visualization 985 programs) as applying to the whole clade, unless overwritten by the 986 color(s) of sub-clades. 987 988 Color values must be integers from 0 to 255. 989 """ 990 991 color_names = { 992 'red': (255, 0, 0), 993 'r': (255, 0, 0), 994 'yellow': (255, 255, 0), 995 'y': (255, 255, 0), 996 'green': ( 0, 128, 0), 997 'g': ( 0, 128, 0), 998 'cyan': ( 0, 255, 255), 999 'c': ( 0, 255, 255), 1000 'blue': ( 0, 0, 255), 1001 'b': ( 0, 0, 255), 1002 'magenta': (255, 0, 255), 1003 'm': (255, 0, 255), 1004 'black': ( 0, 0, 0), 1005 'k': ( 0, 0, 0), 1006 'white': (255, 255, 255), 1007 'w': (255, 255, 255), 1008 # Names standardized in HTML/CSS spec 1009 # http://w3schools.com/html/html_colornames.asp 1010 'maroon': (128, 0, 0), 1011 'olive': (128, 128, 0), 1012 'lime': ( 0, 255, 0), 1013 'aqua': ( 0, 255, 255), 1014 'teal': ( 0, 128, 128), 1015 'navy': ( 0, 0, 128), 1016 'fuchsia': (255, 0, 255), 1017 'purple': (128, 0, 128), 1018 'silver': (192, 192, 192), 1019 'gray': (128, 128, 128), 1020 # More definitions from matplotlib/gcolor2 1021 'grey': (128, 128, 128), 1022 'pink': (255, 192, 203), 1023 'salmon': (250, 128, 114), 1024 'orange': (255, 165, 0), 1025 'gold': (255, 215, 0), 1026 'tan': (210, 180, 140), 1027 'brown': (165, 42, 42), 1028 } 1029
1030 - def __init__(self, red, green, blue):
1031 for color in (red, green, blue): 1032 assert (isinstance(color, int) and 1033 0 <= color <= 255 1034 ), "Color values must be integers between 0 and 255." 1035 self.red = red 1036 self.green = green 1037 self.blue = blue
1038 1039 @classmethod
1040 - def from_hex(cls, hexstr):
1041 """Construct a BranchColor object from a hexadecimal string. 1042 1043 The string format is the same style used in HTML and CSS, such as 1044 '#FF8000' for an RGB value of (255, 128, 0). 1045 """ 1046 assert (isinstance(hexstr, basestring) and 1047 hexstr.startswith('#') and 1048 len(hexstr) == 7 1049 ), "need a 24-bit hexadecimal string, e.g. #000000" 1050 def unpack(cc): 1051 return int('0x'+cc, base=16)
1052 RGB = hexstr[1:3], hexstr[3:5], hexstr[5:] 1053 return cls(*map(unpack, RGB))
1054 1055 @classmethod
1056 - def from_name(cls, colorname):
1057 """Construct a BranchColor object by the color's name.""" 1058 return cls(*cls.color_names[colorname])
1059
1060 - def to_hex(self):
1061 """Return a 24-bit hexadecimal RGB representation of this color. 1062 1063 The returned string is suitable for use in HTML/CSS, as a color 1064 parameter in matplotlib, and perhaps other situations. 1065 1066 Example: 1067 1068 >>> bc = BranchColor(12, 200, 100) 1069 >>> bc.to_hex() 1070 '#0cc864' 1071 """ 1072 return '#' + hex( 1073 self.red * (16**4) 1074 + self.green * (16**2) 1075 + self.blue)[2:].zfill(6)
1076
1077 - def to_rgb(self):
1078 """Return a tuple of RGB values (0 to 255) representing this color. 1079 1080 Example: 1081 1082 >>> bc = BranchColor(255, 165, 0) 1083 >>> bc.to_rgb() 1084 (255, 165, 0) 1085 """ 1086 return (self.red, self.green, self.blue)
1087
1088 - def __repr__(self):
1089 """Preserve the standard RGB order when representing this object.""" 1090 return (u'%s(red=%d, green=%d, blue=%d)' 1091 % (self.__class__.__name__, self.red, self.green, self.blue))
1092
1093 - def __str__(self):
1094 """Show the color's RGB values.""" 1095 return "(%d, %d, %d)" % (self.red, self.green, self.blue)
1096