Package Bio :: Package Nexus :: Module Trees
[hide private]
[frames] | no frames]

Source Code for Module Bio.Nexus.Trees

  1  # Copyright 2005-2008 by Frank Kauff & Cymon J. Cox. All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license. Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  # 
  6  # Bug reports welcome: fkauff@biologie.uni-kl.de or on Biopython's bugzilla. 
  7  """Tree class to handle phylogenetic trees. 
  8   
  9  Provides a set of methods to read and write newick-format tree descriptions, 
 10  get information about trees (monphyly of taxon sets, congruence between trees, 
 11  common ancestors,...) and to manipulate trees (reroot trees, split terminal 
 12  nodes). 
 13  """ 
 14   
 15  from __future__ import print_function 
 16   
 17  import random 
 18  import sys 
 19  from . import Nodes 
 20   
 21  PRECISION_BRANCHLENGTH=6 
 22  PRECISION_SUPPORT=6 
 23  NODECOMMENT_START='[&' 
 24  NODECOMMENT_END=']' 
 25   
 26   
27 -class TreeError(Exception):
28 pass
29 30
31 -class NodeData(object):
32 """Stores tree-relevant data associated with nodes (e.g. branches or otus)."""
33 - def __init__(self, taxon=None, branchlength=0.0, support=None, comment=None):
34 self.taxon=taxon 35 self.branchlength=branchlength 36 self.support=support 37 self.comment=comment
38 39
40 -class Tree(Nodes.Chain):
41 """Represents a tree using a chain of nodes with on predecessor (=ancestor) 42 and multiple successors (=subclades). 43 """ 44 # A newick tree is parsed into nested list and then converted to a node list in two stages 45 # mostly due to historical reasons. This could be done in one swoop). Note: parentheses ( ) and 46 # colon : are not allowed in taxon names. This is against NEXUS standard, but makes life much 47 # easier when parsing trees. 48 49 # NOTE: Tree should store its data class in something like self.dataclass=data, 50 # so that nodes that are generated have easy access to the data class 51 # Some routines use automatically NodeData, this needs to be more concise 52
53 - def __init__(self, tree=None, weight=1.0, rooted=False, name='', data=NodeData, values_are_support=False, max_support=1.0):
54 """Ntree(self,tree).""" 55 Nodes.Chain.__init__(self) 56 self.dataclass=data 57 self.__values_are_support=values_are_support 58 self.max_support=max_support 59 self.weight=weight 60 self.rooted=rooted 61 self.name=name 62 root=Nodes.Node(data()) 63 self.root = self.add(root) 64 if tree: # use the tree we have 65 # if Tree is called from outside Nexus parser, we need to get rid of linebreaks, etc 66 tree=tree.strip().replace('\n', '').replace('\r', '') 67 # there's discrepancy whether newick allows semicolons et the end 68 tree=tree.rstrip(';') 69 subtree_info, base_info = self._parse(tree) 70 root.data = self._add_nodedata(root.data, [[], base_info]) 71 self._add_subtree(parent_id=root.id, tree=subtree_info)
72
73 - def _parse(self, tree):
74 """Parses (a,b,c...)[[[xx]:]yy] into subcomponents and travels down recursively.""" 75 # Remove any leading/trailing white space - want any string starting 76 # with " (..." should be recognised as a leaf, "(..." 77 tree = tree.strip() 78 if tree.count('(')!=tree.count(')'): 79 raise TreeError('Parentheses do not match in (sub)tree: '+tree) 80 if tree.count('(')==0: # a leaf 81 # check if there's a colon, or a special comment, or both after the taxon name 82 nodecomment=tree.find(NODECOMMENT_START) 83 colon=tree.find(':') 84 if colon==-1 and nodecomment==-1: # none 85 return [tree, [None]] 86 elif colon==-1 and nodecomment>-1: # only special comment 87 return [tree[:nodecomment], self._get_values(tree[nodecomment:])] 88 elif colon>-1 and nodecomment==-1: # only numerical values 89 return [tree[:colon], self._get_values(tree[colon+1:])] 90 elif colon < nodecomment: # taxon name ends at first colon or with special comment 91 return [tree[:colon], self._get_values(tree[colon+1:])] 92 else: 93 return [tree[:nodecomment], self._get_values(tree[nodecomment:])] 94 else: 95 closing=tree.rfind(')') 96 val=self._get_values(tree[closing+1:]) 97 if not val: 98 val=[None] 99 subtrees=[] 100 plevel=0 101 prev=1 102 for p in range(1, closing): 103 if tree[p]=='(': 104 plevel+=1 105 elif tree[p]==')': 106 plevel-=1 107 elif tree[p]==',' and plevel==0: 108 subtrees.append(tree[prev:p]) 109 prev=p+1 110 subtrees.append(tree[prev:closing]) 111 subclades=[self._parse(subtree) for subtree in subtrees] 112 return [subclades, val]
113
114 - def _add_subtree(self, parent_id=None, tree=None):
115 """Adds leaf or tree (in newick format) to a parent_id.""" 116 if parent_id is None: 117 raise TreeError('Need node_id to connect to.') 118 for st in tree: 119 nd=self.dataclass() 120 nd = self._add_nodedata(nd, st) 121 if isinstance(st[0], list): # it's a subtree 122 sn=Nodes.Node(nd) 123 self.add(sn, parent_id) 124 self._add_subtree(sn.id, st[0]) 125 else: # it's a leaf 126 nd.taxon=st[0] 127 leaf=Nodes.Node(nd) 128 self.add(leaf, parent_id)
129
130 - def _add_nodedata(self, nd, st):
131 """Add data to the node parsed from the comments, taxon and support. 132 """ 133 if isinstance(st[1][-1], str) and st[1][-1].startswith(NODECOMMENT_START): 134 nd.comment=st[1].pop(-1) 135 # if the first element is a string, it's the subtree node taxon 136 elif isinstance(st[1][0], str): 137 nd.taxon = st[1][0] 138 st[1] = st[1][1:] 139 if len(st)>1: 140 if len(st[1])>=2: # if there's two values, support comes first. Is that always so? 141 nd.support=st[1][0] 142 if st[1][1] is not None: 143 nd.branchlength=st[1][1] 144 elif len(st[1])==1: # otherwise it could be real branchlengths or support as branchlengths 145 if not self.__values_are_support: # default 146 if st[1][0] is not None: 147 nd.branchlength=st[1][0] 148 else: 149 nd.support=st[1][0] 150 return nd
151
152 - def _get_values(self, text):
153 """Extracts values (support/branchlength) from xx[:yyy], xx.""" 154 155 if text=='': 156 return None 157 nodecomment = None 158 if NODECOMMENT_START in text: # if there's a [&....] comment, cut it out 159 nc_start=text.find(NODECOMMENT_START) 160 nc_end=text.find(NODECOMMENT_END) 161 if nc_end==-1: 162 raise TreeError('Error in tree description: Found %s without matching %s' 163 % (NODECOMMENT_START, NODECOMMENT_END)) 164 nodecomment=text[nc_start:nc_end+1] 165 text=text[:nc_start]+text[nc_end+1:] 166 167 # pase out supports and branchlengths, with internal node taxa info 168 values = [] 169 taxonomy = None 170 for part in [t.strip() for t in text.split(":")]: 171 if part: 172 try: 173 values.append(float(part)) 174 except ValueError: 175 assert taxonomy is None, "Two string taxonomies?" 176 taxonomy = part 177 if taxonomy: 178 values.insert(0, taxonomy) 179 if nodecomment: 180 values.append(nodecomment) 181 return values
182
183 - def _walk(self, node=None):
184 """Return all node_ids downwards from a node.""" 185 186 if node is None: 187 node=self.root 188 for n in self.node(node).succ: 189 yield n 190 for sn in self._walk(n): 191 yield sn
192
193 - def node(self, node_id):
194 """Return the instance of node_id. 195 196 node = node(self,node_id) 197 """ 198 if node_id not in self.chain: 199 raise TreeError('Unknown node_id: %d' % node_id) 200 return self.chain[node_id]
201
202 - def split(self, parent_id=None, n=2, branchlength=1.0):
203 """Speciation: generates n (default two) descendants of a node. 204 205 [new ids] = split(self,parent_id=None,n=2,branchlength=1.0): 206 """ 207 if parent_id is None: 208 raise TreeError('Missing node_id.') 209 ids=[] 210 parent_data=self.chain[parent_id].data 211 for i in range(n): 212 node=Nodes.Node() 213 if parent_data: 214 node.data=self.dataclass() 215 # each node has taxon and branchlength attribute 216 if parent_data.taxon: 217 node.data.taxon=parent_data.taxon+str(i) 218 node.data.branchlength=branchlength 219 ids.append(self.add(node, parent_id)) 220 return ids
221
222 - def search_taxon(self, taxon):
223 """Returns the first matching taxon in self.data.taxon. Not restricted to terminal nodes. 224 225 node_id = search_taxon(self,taxon) 226 """ 227 for id, node in self.chain.items(): 228 if node.data.taxon==taxon: 229 return id 230 return None
231
232 - def prune(self, taxon):
233 """Prunes a terminal taxon from the tree. 234 235 id_of_previous_node = prune(self,taxon) 236 If taxon is from a bifurcation, the connectiong node will be collapsed 237 and its branchlength added to remaining terminal node. This might be no 238 longer a meaningful value' 239 """ 240 241 id=self.search_taxon(taxon) 242 if id is None: 243 raise TreeError('Taxon not found: %s' % taxon) 244 elif id not in self.get_terminals(): 245 raise TreeError('Not a terminal taxon: %s' % taxon) 246 else: 247 prev=self.unlink(id) 248 self.kill(id) 249 if len(self.node(prev).succ)==1: 250 if prev==self.root: # we deleted one branch of a bifurcating root, then we have to move the root upwards 251 self.root=self.node(self.root).succ[0] 252 self.node(self.root).branchlength=0.0 253 self.kill(prev) 254 else: 255 succ=self.node(prev).succ[0] 256 new_bl=self.node(prev).data.branchlength+self.node(succ).data.branchlength 257 self.collapse(prev) 258 self.node(succ).data.branchlength=new_bl 259 return prev
260
261 - def get_taxa(self, node_id=None):
262 """Return a list of all otus downwards from a node. 263 264 nodes = get_taxa(self,node_id=None) 265 """ 266 267 if node_id is None: 268 node_id=self.root 269 if node_id not in self.chain: 270 raise TreeError('Unknown node_id: %d.' % node_id) 271 if self.chain[node_id].succ==[]: 272 if self.chain[node_id].data: 273 return [self.chain[node_id].data.taxon] 274 else: 275 return None 276 else: 277 list=[] 278 for succ in self.chain[node_id].succ: 279 list.extend(self.get_taxa(succ)) 280 return list
281
282 - def get_terminals(self):
283 """Return a list of all terminal nodes.""" 284 return [i for i in self.all_ids() if self.node(i).succ==[]]
285
286 - def is_terminal(self, node):
287 """Returns True if node is a terminal node.""" 288 return self.node(node).succ==[]
289
290 - def is_internal(self, node):
291 """Returns True if node is an internal node.""" 292 return len(self.node(node).succ)>0
293
294 - def is_preterminal(self, node):
295 """Returns True if all successors of a node are terminal ones.""" 296 if self.is_terminal(node): 297 return False not in [self.is_terminal(n) for n in self.node(node).succ] 298 else: 299 return False
300
301 - def count_terminals(self, node=None):
302 """Counts the number of terminal nodes that are attached to a node.""" 303 if node is None: 304 node=self.root 305 return len([n for n in self._walk(node) if self.is_terminal(n)])
306
307 - def collapse_genera(self, space_equals_underscore=True):
308 """Collapses all subtrees which belong to the same genus (i.e share the same first word in their taxon name.)""" 309 310 while True: 311 for n in self._walk(): 312 if self.is_terminal(n): 313 continue 314 taxa=self.get_taxa(n) 315 genera=[] 316 for t in taxa: 317 if space_equals_underscore: 318 t=t.replace(' ', '_') 319 try: 320 genus=t.split('_', 1)[0] 321 except: 322 genus='None' 323 if genus not in genera: 324 genera.append(genus) 325 if len(genera)==1: 326 self.node(n).data.taxon=genera[0]+' <collapsed>' 327 # now we kill all nodes downstream 328 nodes2kill=[kn for kn in self._walk(node=n)] 329 for kn in nodes2kill: 330 self.kill(kn) 331 self.node(n).succ=[] 332 break # break out of for loop because node list from _walk will be inconsistent 333 else: # for loop exhausted: no genera to collapse left 334 break # while
335
336 - def sum_branchlength(self, root=None, node=None):
337 """Adds up the branchlengths from root (default self.root) to node. 338 339 sum = sum_branchlength(self,root=None,node=None) 340 """ 341 342 if root is None: 343 root=self.root 344 if node is None: 345 raise TreeError('Missing node id.') 346 blen=0.0 347 while node is not None and node is not root: 348 blen+=self.node(node).data.branchlength 349 node=self.node(node).prev 350 return blen
351
352 - def set_subtree(self, node):
353 """Return subtree as a set of nested sets. 354 355 sets = set_subtree(self,node) 356 """ 357 358 if self.node(node).succ==[]: 359 return self.node(node).data.taxon 360 else: 361 try: 362 return frozenset(self.set_subtree(n) for n in self.node(node).succ) 363 except: 364 print(node) 365 print(self.node(node).succ) 366 for n in self.node(node).succ: 367 print("%s %s" % (n, self.set_subtree(n))) 368 print([self.set_subtree(n) for n in self.node(node).succ]) 369 raise
370
371 - def is_identical(self, tree2):
372 """Compare tree and tree2 for identity. 373 374 result = is_identical(self,tree2) 375 """ 376 return self.set_subtree(self.root)==tree2.set_subtree(tree2.root)
377
378 - def is_compatible(self, tree2, threshold, strict=True):
379 """Compares branches with support>threshold for compatibility. 380 381 result = is_compatible(self,tree2,threshold) 382 """ 383 384 # check if both trees have the same set of taxa. strict=True enforces this. 385 missing2=set(self.get_taxa())-set(tree2.get_taxa()) 386 missing1=set(tree2.get_taxa())-set(self.get_taxa()) 387 if strict and (missing1 or missing2): 388 if missing1: 389 print('Taxon/taxa %s is/are missing in tree %s' % (','.join(missing1), self.name)) 390 if missing2: 391 print('Taxon/taxa %s is/are missing in tree %s' % (','.join(missing2), tree2.name)) 392 raise TreeError('Can\'t compare trees with different taxon compositions.') 393 t1=[(set(self.get_taxa(n)), self.node(n).data.support) for n in self.all_ids() if 394 self.node(n).succ and 395 (self.node(n).data and self.node(n).data.support and self.node(n).data.support>=threshold)] 396 t2=[(set(tree2.get_taxa(n)), tree2.node(n).data.support) for n in tree2.all_ids() if 397 tree2.node(n).succ and 398 (tree2.node(n).data and tree2.node(n).data.support and tree2.node(n).data.support>=threshold)] 399 conflict=[] 400 for (st1, sup1) in t1: 401 for (st2, sup2) in t2: 402 if not st1.issubset(st2) and not st2.issubset(st1): # don't hiccup on upstream nodes 403 intersect, notin1, notin2=st1 & st2, st2-st1, st1-st2 # all three are non-empty sets 404 # if notin1==missing1 or notin2==missing2 <==> st1.issubset(st2) or st2.issubset(st1) ??? 405 if intersect and not (notin1.issubset(missing1) or notin2.issubset(missing2)): # omit conflicts due to missing taxa 406 conflict.append((st1, sup1, st2, sup2, intersect, notin1, notin2)) 407 return conflict
408
409 - def common_ancestor(self, node1, node2):
410 """Return the common ancestor that connects two nodes. 411 412 node_id = common_ancestor(self,node1,node2) 413 """ 414 415 l1=[self.root]+self.trace(self.root, node1) 416 l2=[self.root]+self.trace(self.root, node2) 417 return [n for n in l1 if n in l2][-1]
418
419 - def distance(self, node1, node2):
420 """Add and return the sum of the branchlengths between two nodes. 421 dist = distance(self,node1,node2) 422 """ 423 424 ca=self.common_ancestor(node1, node2) 425 return self.sum_branchlength(ca, node1)+self.sum_branchlength(ca, node2)
426
427 - def is_monophyletic(self, taxon_list):
428 """Return node_id of common ancestor if taxon_list is monophyletic, -1 otherwise. 429 430 result = is_monophyletic(self,taxon_list) 431 """ 432 if isinstance(taxon_list, str): 433 taxon_set=set([taxon_list]) 434 else: 435 taxon_set=set(taxon_list) 436 node_id=self.root 437 while True: 438 subclade_taxa=set(self.get_taxa(node_id)) 439 if subclade_taxa==taxon_set: # are we there? 440 return node_id 441 else: # check subnodes 442 for subnode in self.chain[node_id].succ: 443 if set(self.get_taxa(subnode)).issuperset(taxon_set): # taxon_set is downstream 444 node_id=subnode 445 break # out of for loop 446 else: 447 return -1 # taxon set was not with successors, for loop exhausted
448
449 - def is_bifurcating(self, node=None):
450 """Return True if tree downstream of node is strictly bifurcating.""" 451 if node is None: 452 node=self.root 453 if node==self.root and len(self.node(node).succ)==3: # root can be trifurcating, because it has no ancestor 454 return self.is_bifurcating(self.node(node).succ[0]) and \ 455 self.is_bifurcating(self.node(node).succ[1]) and \ 456 self.is_bifurcating(self.node(node).succ[2]) 457 if len(self.node(node).succ)==2: 458 return self.is_bifurcating(self.node(node).succ[0]) and self.is_bifurcating(self.node(node).succ[1]) 459 elif len(self.node(node).succ)==0: 460 return True 461 else: 462 return False
463
464 - def branchlength2support(self):
465 """Move values stored in data.branchlength to data.support, and set branchlength to 0.0 466 467 This is necessary when support has been stored as branchlength (e.g. paup), and has thus 468 been read in as branchlength. 469 """ 470 471 for n in self.chain: 472 self.node(n).data.support=self.node(n).data.branchlength 473 self.node(n).data.branchlength=0.0
474
475 - def convert_absolute_support(self, nrep):
476 """Convert absolute support (clade-count) to rel. frequencies. 477 478 Some software (e.g. PHYLIP consense) just calculate how often clades appear, instead of 479 calculating relative frequencies.""" 480 481 for n in self._walk(): 482 if self.node(n).data.support: 483 self.node(n).data.support/=float(nrep)
484
485 - def has_support(self, node=None):
486 """Returns True if any of the nodes has data.support != None.""" 487 for n in self._walk(node): 488 if self.node(n).data.support: 489 return True 490 else: 491 return False
492
493 - def randomize(self, ntax=None, taxon_list=None, branchlength=1.0, branchlength_sd=None, bifurcate=True):
494 """Generates a random tree with ntax taxa and/or taxa from taxlabels. 495 496 new_tree = randomize(self,ntax=None,taxon_list=None,branchlength=1.0,branchlength_sd=None,bifurcate=True) 497 Trees are bifurcating by default. (Polytomies not yet supported). 498 """ 499 500 if not ntax and taxon_list: 501 ntax=len(taxon_list) 502 elif not taxon_list and ntax: 503 taxon_list=['taxon'+str(i+1) for i in range(ntax)] 504 elif not ntax and not taxon_list: 505 raise TreeError('Either numer of taxa or list of taxa must be specified.') 506 elif ntax != len(taxon_list): 507 raise TreeError('Length of taxon list must correspond to ntax.') 508 # initiate self with empty root 509 self.__init__() 510 terminals=self.get_terminals() 511 # bifurcate randomly at terminal nodes until ntax is reached 512 while len(terminals)<ntax: 513 newsplit=random.choice(terminals) 514 new_terminals=self.split(parent_id=newsplit, branchlength=branchlength) 515 # if desired, give some variation to the branch length 516 if branchlength_sd: 517 for nt in new_terminals: 518 bl=random.gauss(branchlength, branchlength_sd) 519 if bl<0: 520 bl=0 521 self.node(nt).data.branchlength=bl 522 terminals.extend(new_terminals) 523 terminals.remove(newsplit) 524 # distribute taxon labels randomly 525 random.shuffle(taxon_list) 526 for (node, name) in zip(terminals, taxon_list): 527 self.node(node).data.taxon=name
528
529 - def display(self):
530 """Quick and dirty lists of all nodes.""" 531 table=[('#', 'taxon', 'prev', 'succ', 'brlen', 'blen (sum)', 'support', 'comment')] 532 # Sort this to be consistent across CPython, Jython, etc 533 for i in sorted(self.all_ids()): 534 n=self.node(i) 535 if not n.data: 536 table.append((str(i), '-', str(n.prev), str(n.succ), '-', '-', '-', '-')) 537 else: 538 tx=n.data.taxon 539 if not tx: 540 tx='-' 541 blength="%0.2f" % n.data.branchlength 542 if blength is None: 543 blength='-' 544 sum_blength='-' 545 else: 546 sum_blength="%0.2f" % self.sum_branchlength(node=i) 547 support=n.data.support 548 if support is None: 549 support='-' 550 else: 551 support="%0.2f" % support 552 comment=n.data.comment 553 if comment is None: 554 comment='-' 555 table.append((str(i), tx, str(n.prev), str(n.succ), 556 blength, sum_blength, support, comment)) 557 print('\n'.join('%3s %32s %15s %15s %8s %10s %8s %20s' % l for l in table)) 558 print('\nRoot: %s' % self.root)
559
560 - def to_string(self, support_as_branchlengths=False, branchlengths_only=False, plain=True, plain_newick=False, ladderize=None, ignore_comments=True):
561 """Return a paup compatible tree line.""" 562 # if there's a conflict in the arguments, we override plain=True 563 if support_as_branchlengths or branchlengths_only: 564 plain=False 565 self.support_as_branchlengths=support_as_branchlengths 566 self.branchlengths_only=branchlengths_only 567 self.ignore_comments=ignore_comments 568 self.plain=plain 569 570 def make_info_string(data, terminal=False): 571 """Creates nicely formatted support/branchlengths.""" 572 # CHECK FORMATTING 573 if self.plain: # plain tree only. That's easy. 574 info_string= '' 575 elif self.support_as_branchlengths: # support as branchlengths (eg. PAUP), ignore actual branchlengths 576 if terminal: # terminal branches have 100% support 577 info_string= ':%1.2f' % self.max_support 578 elif data.support: 579 info_string= ':%1.2f' % (data.support) 580 else: 581 info_string=':0.00' 582 elif self.branchlengths_only: # write only branchlengths, ignore support 583 info_string= ':%1.5f' % (data.branchlength) 584 else: # write suport and branchlengths (e.g. .con tree of mrbayes) 585 if terminal: 586 info_string= ':%1.5f' % (data.branchlength) 587 else: 588 if data.branchlength is not None and data.support is not None: # we have blen and suppport 589 info_string= '%1.2f:%1.5f' % (data.support, data.branchlength) 590 elif data.branchlength is not None: # we have only blen 591 info_string= '0.00000:%1.5f' % (data.branchlength) 592 elif data.support is not None: # we have only support 593 info_string= '%1.2f:0.00000' % (data.support) 594 else: 595 info_string= '0.00:0.00000' 596 if not ignore_comments and hasattr(data, 'nodecomment'): 597 info_string=str(data.nodecomment)+info_string 598 return info_string
599 600 def ladderize_nodes(nodes, ladderize=None): 601 """Sorts node numbers according to the number of terminal nodes.""" 602 if ladderize in ['left', 'LEFT', 'right', 'RIGHT']: 603 succnode_terminals = sorted((self.count_terminals(node=n), n) for n in nodes) 604 if (ladderize=='right' or ladderize=='RIGHT'): 605 succnode_terminals.reverse() 606 if succnode_terminals: 607 succnodes=zip(*succnode_terminals)[1] 608 else: 609 succnodes=[] 610 else: 611 succnodes=nodes 612 return succnodes
613 614 def newickize(node, ladderize=None): 615 """Convert a node tree to a newick tree recursively.""" 616 617 if not self.node(node).succ: # terminal 618 return self.node(node).data.taxon+make_info_string(self.node(node).data, terminal=True) 619 else: 620 succnodes=ladderize_nodes(self.node(node).succ, ladderize=ladderize) 621 subtrees=[newickize(sn, ladderize=ladderize) for sn in succnodes] 622 return '(%s)%s' % (','.join(subtrees), make_info_string(self.node(node).data)) 623 624 treeline=['tree'] 625 if self.name: 626 treeline.append(self.name) 627 else: 628 treeline.append('a_tree') 629 treeline.append('=') 630 if self.weight != 1: 631 treeline.append('[&W%s]' % str(round(float(self.weight), 3))) 632 if self.rooted: 633 treeline.append('[&R]') 634 succnodes=ladderize_nodes(self.node(self.root).succ) 635 subtrees=[newickize(sn, ladderize=ladderize) for sn in succnodes] 636 treeline.append('(%s)' % ','.join(subtrees)) 637 if plain_newick: 638 return treeline[-1] 639 else: 640 return ' '.join(treeline)+';' 641
642 - def __str__(self):
643 """Short version of to_string(), gives plain tree""" 644 return self.to_string(plain=True)
645
646 - def unroot(self):
647 """Defines a unrooted Tree structure, using data of a rooted Tree.""" 648 649 # travel down the rooted tree structure and save all branches and the nodes they connect 650 651 def _get_branches(node): 652 branches=[] 653 for b in self.node(node).succ: 654 branches.append([node, b, self.node(b).data.branchlength, self.node(b).data.support]) 655 branches.extend(_get_branches(b)) 656 return branches
657 658 self.unrooted=_get_branches(self.root) 659 # if root is bifurcating, then it is eliminated 660 if len(self.node(self.root).succ)==2: 661 # find the two branches that connect to root 662 rootbranches=[b for b in self.unrooted if self.root in b[:2]] 663 b1=self.unrooted.pop(self.unrooted.index(rootbranches[0])) 664 b2=self.unrooted.pop(self.unrooted.index(rootbranches[1])) 665 # Connect them two each other. If both have support, it should be identical (or one set to None?). 666 # If both have branchlengths, they will be added 667 newbranch=[b1[1], b2[1], b1[2]+b2[2]] 668 if b1[3] is None: 669 newbranch.append(b2[3]) # either None (both rootbranches are unsupported) or some support 670 elif b2[3] is None: 671 newbranch.append(b1[3]) # dito 672 elif b1[3]==b2[3]: 673 newbranch.append(b1[3]) # identical support 674 elif b1[3]==0 or b2[3]==0: 675 newbranch.append(b1[3]+b2[3]) # one is 0, take the other 676 else: 677 raise TreeError('Support mismatch in bifurcating root: %f, %f' 678 % (float(b1[3]), float(b2[3]))) 679 self.unrooted.append(newbranch) 680
681 - def root_with_outgroup(self, outgroup=None):
682 683 def _connect_subtree(parent, child): 684 """Hook subtree starting with node child to parent.""" 685 for i, branch in enumerate(self.unrooted): 686 if parent in branch[:2] and child in branch[:2]: 687 branch=self.unrooted.pop(i) 688 break 689 else: 690 raise TreeError('Unable to connect nodes for rooting: nodes %d and %d are not connected' 691 % (parent, child)) 692 self.link(parent, child) 693 self.node(child).data.branchlength=branch[2] 694 self.node(child).data.support=branch[3] 695 # now check if there are more branches connected to the child, and if so, connect them 696 child_branches=[b for b in self.unrooted if child in b[:2]] 697 for b in child_branches: 698 if child==b[0]: 699 succ=b[1] 700 else: 701 succ=b[0] 702 _connect_subtree(child, succ)
703 704 # check the outgroup we're supposed to root with 705 if outgroup is None: 706 return self.root 707 outgroup_node=self.is_monophyletic(outgroup) 708 if outgroup_node==-1: 709 return -1 710 # if tree is already rooted with outgroup on a bifurcating root, 711 # or the outgroup includes all taxa on the tree, then we're fine 712 if (len(self.node(self.root).succ)==2 and outgroup_node in self.node(self.root).succ) or outgroup_node==self.root: 713 return self.root 714 715 self.unroot() 716 # now we find the branch that connects outgroup and ingroup 717 # print(self.node(outgroup_node).prev) 718 for i, b in enumerate(self.unrooted): 719 if outgroup_node in b[:2] and self.node(outgroup_node).prev in b[:2]: 720 root_branch=self.unrooted.pop(i) 721 break 722 else: 723 raise TreeError('Unrooted and rooted Tree do not match') 724 if outgroup_node==root_branch[1]: 725 ingroup_node=root_branch[0] 726 else: 727 ingroup_node=root_branch[1] 728 # now we destroy the old tree structure, but keep node data. Nodes will be reconnected according to new outgroup 729 for n in self.all_ids(): 730 self.node(n).prev=None 731 self.node(n).succ=[] 732 # now we just add both subtrees (outgroup and ingroup) branch for branch 733 root=Nodes.Node(data=NodeData()) # new root 734 self.add(root) # add to tree description 735 self.root=root.id # set as root 736 self.unrooted.append([root.id, ingroup_node, root_branch[2], root_branch[3]]) # add branch to ingroup to unrooted tree 737 self.unrooted.append([root.id, outgroup_node, 0.0, 0.0]) # add branch to outgroup to unrooted tree 738 _connect_subtree(root.id, ingroup_node) # add ingroup 739 _connect_subtree(root.id, outgroup_node) # add outgroup 740 # if theres still a lonely node in self.chain, then it's the old root, and we delete it 741 oldroot=[i for i in self.all_ids() if self.node(i).prev is None and i!=self.root] 742 if len(oldroot)>1: 743 raise TreeError('Isolated nodes in tree description: %s' 744 % ','.join(oldroot)) 745 elif len(oldroot)==1: 746 self.kill(oldroot[0]) 747 return self.root 748
749 - def merge_with_support(self, bstrees=None, constree=None, threshold=0.5, outgroup=None):
750 """Merges clade support (from consensus or list of bootstrap-trees) with phylogeny. 751 752 tree=merge_bootstrap(phylo,bs_tree=<list_of_trees>) 753 or 754 tree=merge_bootstrap(phylo,consree=consensus_tree with clade support) 755 """ 756 757 if bstrees and constree: 758 raise TreeError('Specify either list of bootstrap trees or consensus tree, not both') 759 if not (bstrees or constree): 760 raise TreeError('Specify either list of bootstrap trees or consensus tree.') 761 # no outgroup specified: use the smallest clade of the root 762 if outgroup is None: 763 try: 764 succnodes = self.node(self.root).succ 765 smallest = min((len(self.get_taxa(n)), n) for n in succnodes) 766 outgroup = self.get_taxa(smallest[1]) 767 except: 768 raise TreeError("Error determining outgroup.") 769 else: # root with user specified outgroup 770 self.root_with_outgroup(outgroup) 771 772 if bstrees: # calculate consensus 773 constree=consensus(bstrees, threshold=threshold, outgroup=outgroup) 774 else: 775 if not constree.has_support(): 776 constree.branchlength2support() 777 constree.root_with_outgroup(outgroup) 778 # now we travel all nodes, and add support from consensus, if the clade is present in both 779 for pnode in self._walk(): 780 cnode=constree.is_monophyletic(self.get_taxa(pnode)) 781 if cnode>-1: 782 self.node(pnode).data.support=constree.node(cnode).data.support
783 784
785 -def consensus(trees, threshold=0.5, outgroup=None):
786 """Compute a majority rule consensus tree of all clades with relative frequency>=threshold from a list of trees.""" 787 788 total=len(trees) 789 if total==0: 790 return None 791 # shouldn't we make sure that it's NodeData or subclass?? 792 dataclass=trees[0].dataclass 793 max_support=trees[0].max_support 794 clades={} 795 # countclades={} 796 alltaxa=set(trees[0].get_taxa()) 797 # calculate calde frequencies 798 c=0 799 for t in trees: 800 c+=1 801 # if c%100==0: 802 # print(c) 803 if alltaxa!=set(t.get_taxa()): 804 raise TreeError('Trees for consensus must contain the same taxa') 805 t.root_with_outgroup(outgroup=outgroup) 806 for st_node in t._walk(t.root): 807 subclade_taxa=sorted(t.get_taxa(st_node)) 808 subclade_taxa=str(subclade_taxa) # lists are not hashable 809 if subclade_taxa in clades: 810 clades[subclade_taxa]+=float(t.weight)/total 811 else: 812 clades[subclade_taxa]=float(t.weight)/total 813 # if subclade_taxa in countclades: 814 # countclades[subclade_taxa]+=t.weight 815 # else: 816 # countclades[subclade_taxa]=t.weight 817 # weed out clades below threshold 818 delclades=[c for c, p in clades.items() if round(p, 3)<threshold] # round can be necessary 819 for c in delclades: 820 del clades[c] 821 # create a tree with a root node 822 consensus=Tree(name='consensus_%2.1f' % float(threshold), data=dataclass) 823 # each clade needs a node in the new tree, add them as isolated nodes 824 for c, s in clades.items(): 825 node=Nodes.Node(data=dataclass()) 826 node.data.support=s 827 node.data.taxon=set(eval(c)) 828 consensus.add(node) 829 # set root node data 830 consensus.node(consensus.root).data.support=None 831 consensus.node(consensus.root).data.taxon=alltaxa 832 # we sort the nodes by no. of taxa in the clade, so root will be the last 833 consensus_ids=consensus.all_ids() 834 consensus_ids.sort(lambda x, y: len(consensus.node(x).data.taxon)-len(consensus.node(y).data.taxon)) 835 # now we just have to hook each node to the next smallest node that includes all taxa of the current 836 for i, current in enumerate(consensus_ids[:-1]): # skip the last one which is the root 837 # print('----') 838 # print('current: %s' % consensus.node(current).data.taxon) 839 # search remaining nodes 840 for parent in consensus_ids[i+1:]: 841 # print('parent: %s' % consensus.node(parent).data.taxon) 842 if consensus.node(parent).data.taxon.issuperset(consensus.node(current).data.taxon): 843 break 844 else: 845 sys.exit('corrupt tree structure?') 846 # internal nodes don't have taxa 847 if len(consensus.node(current).data.taxon)==1: 848 consensus.node(current).data.taxon=consensus.node(current).data.taxon.pop() 849 # reset the support for terminal nodes to maximum 850 # consensus.node(current).data.support=max_support 851 else: 852 consensus.node(current).data.taxon=None 853 consensus.link(parent, current) 854 # eliminate root taxon name 855 consensus.node(consensus_ids[-1]).data.taxon=None 856 if alltaxa != set(consensus.get_taxa()): 857 raise TreeError('FATAL ERROR: consensus tree is corrupt') 858 return consensus
859