Package Bio :: Package Phylo :: Module _utils
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo._utils

  1  # Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license. Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Utilities for handling, displaying and exporting Phylo trees. 
  7   
  8  Third-party libraries are loaded when the corresponding function is called. 
  9  """ 
 10  __docformat__ = "restructuredtext en" 
 11   
 12  import math 
 13  import sys 
 14   
 15  from Bio import MissingPythonDependencyError 
 16   
 17   
18 -def to_networkx(tree):
19 """Convert a Tree object to a networkx graph. 20 21 The result is useful for graph-oriented analysis, and also interactive 22 plotting with pylab, matplotlib or pygraphviz, though the resulting diagram 23 is usually not ideal for displaying a phylogeny. 24 25 Requires NetworkX version 0.99 or later. 26 """ 27 try: 28 import networkx 29 except ImportError: 30 raise MissingPythonDependencyError( 31 "Install NetworkX if you want to use to_networkx.") 32 33 # NB (1/2010): the networkx API stabilized at v.1.0 34 # 1.0+: edges accept arbitrary data as kwargs, weights are floats 35 # 0.99: edges accept weight as a string, nothing else 36 # pre-0.99: edges accept no additional data 37 # Ubuntu Lucid LTS uses v0.99, let's support everything 38 if networkx.__version__ >= '1.0': 39 def add_edge(graph, n1, n2): 40 graph.add_edge(n1, n2, weight=n2.branch_length or 1.0) 41 # Copy branch color value as hex, if available 42 if hasattr(n2, 'color') and n2.color is not None: 43 graph[n1][n2]['color'] = n2.color.to_hex() 44 elif hasattr(n1, 'color') and n1.color is not None: 45 # Cascading color attributes 46 graph[n1][n2]['color'] = n1.color.to_hex() 47 n2.color = n1.color 48 # Copy branch weight value (float) if available 49 if hasattr(n2, 'width') and n2.width is not None: 50 graph[n1][n2]['width'] = n2.width 51 elif hasattr(n1, 'width') and n1.width is not None: 52 # Cascading width attributes 53 graph[n1][n2]['width'] = n1.width 54 n2.width = n1.width
55 elif networkx.__version__ >= '0.99': 56 def add_edge(graph, n1, n2): 57 graph.add_edge(n1, n2, (n2.branch_length or 1.0)) 58 else: 59 def add_edge(graph, n1, n2): 60 graph.add_edge(n1, n2) 61 62 def build_subgraph(graph, top): 63 """Walk down the Tree, building graphs, edges and nodes.""" 64 for clade in top: 65 graph.add_node(clade.root) 66 add_edge(graph, top.root, clade.root) 67 build_subgraph(graph, clade) 68 69 if tree.rooted: 70 G = networkx.DiGraph() 71 else: 72 G = networkx.Graph() 73 G.add_node(tree.root) 74 build_subgraph(G, tree.root) 75 return G 76 77
78 -def draw_graphviz(tree, label_func=str, prog='twopi', args='', 79 node_color='#c0deff', **kwargs):
80 """Display a tree or clade as a graph, using the graphviz engine. 81 82 Requires NetworkX, matplotlib, Graphviz and either PyGraphviz or pydot. 83 84 The third and fourth parameters apply to Graphviz, and the remaining 85 arbitrary keyword arguments are passed directly to networkx.draw(), which 86 in turn mostly wraps matplotlib/pylab. See the documentation for Graphviz 87 and networkx for detailed explanations. 88 89 The NetworkX/matplotlib parameters are described in the docstrings for 90 networkx.draw() and pylab.scatter(), but the most reasonable options to try 91 are: *alpha, node_color, node_size, node_shape, edge_color, style, 92 font_size, font_color, font_weight, font_family* 93 94 :Parameters: 95 96 label_func : callable 97 A function to extract a label from a node. By default this is str(), 98 but you can use a different function to select another string 99 associated with each node. If this function returns None for a node, 100 no label will be shown for that node. 101 102 The label will also be silently skipped if the throws an exception 103 related to ordinary attribute access (LookupError, AttributeError, 104 ValueError); all other exception types will still be raised. This 105 means you can use a lambda expression that simply attempts to look 106 up the desired value without checking if the intermediate attributes 107 are available: 108 109 >>> Phylo.draw_graphviz(tree, lambda n: n.taxonomies[0].code) 110 111 prog : string 112 The Graphviz program to use when rendering the graph. 'twopi' 113 behaves the best for large graphs, reliably avoiding crossing edges, 114 but for moderate graphs 'neato' looks a bit nicer. For small 115 directed graphs, 'dot' may produce a normal-looking cladogram, but 116 will cross and distort edges in larger graphs. (The programs 'circo' 117 and 'fdp' are not recommended.) 118 args : string 119 Options passed to the external graphviz program. Normally not 120 needed, but offered here for completeness. 121 122 Example 123 ------- 124 125 >>> import pylab 126 >>> from Bio import Phylo 127 >>> tree = Phylo.read('ex/apaf.xml', 'phyloxml') 128 >>> Phylo.draw_graphviz(tree) 129 >>> pylab.show() 130 >>> pylab.savefig('apaf.png') 131 """ 132 try: 133 import networkx 134 except ImportError: 135 raise MissingPythonDependencyError( 136 "Install NetworkX if you want to use to_networkx.") 137 138 G = to_networkx(tree) 139 try: 140 # NetworkX version 1.8 or later (2013-01-20) 141 Gi = networkx.convert_node_labels_to_integers(G, 142 label_attribute='label') 143 int_labels = {} 144 for integer, nodeattrs in Gi.node.items(): 145 int_labels[nodeattrs['label']] = integer 146 except TypeError: 147 # Older NetworkX versions (before 1.8) 148 Gi = networkx.convert_node_labels_to_integers(G, 149 discard_old_labels=False) 150 int_labels = Gi.node_labels 151 152 try: 153 posi = networkx.graphviz_layout(Gi, prog, args=args) 154 except ImportError: 155 raise MissingPythonDependencyError( 156 "Install PyGraphviz or pydot if you want to use draw_graphviz.") 157 158 def get_label_mapping(G, selection): 159 """Apply the user-specified node relabeling.""" 160 for node in G.nodes(): 161 if (selection is None) or (node in selection): 162 try: 163 label = label_func(node) 164 if label not in (None, node.__class__.__name__): 165 yield (node, label) 166 except (LookupError, AttributeError, ValueError): 167 pass
168 169 if 'nodelist' in kwargs: 170 labels = dict(get_label_mapping(G, set(kwargs['nodelist']))) 171 else: 172 labels = dict(get_label_mapping(G, None)) 173 kwargs['nodelist'] = list(labels.keys()) 174 if 'edge_color' not in kwargs: 175 kwargs['edge_color'] = [isinstance(e[2], dict) and 176 e[2].get('color', 'k') or 'k' 177 for e in G.edges(data=True)] 178 if 'width' not in kwargs: 179 kwargs['width'] = [isinstance(e[2], dict) and 180 e[2].get('width', 1.0) or 1.0 181 for e in G.edges(data=True)] 182 183 posn = dict((n, posi[int_labels[n]]) for n in G) 184 networkx.draw(G, posn, labels=labels, with_labels=True, 185 node_color=node_color, **kwargs) 186 187
188 -def draw_ascii(tree, file=None, column_width=80):
189 """Draw an ascii-art phylogram of the given tree. 190 191 The printed result looks like:: 192 193 _________ Orange 194 ______________| 195 | |______________ Tangerine 196 ______________| 197 | | _________________________ Grapefruit 198 _| |_________| 199 | |______________ Pummelo 200 | 201 |__________________________________ Apple 202 203 204 :Parameters: 205 file : file-like object 206 File handle opened for writing the output drawing. (Default: 207 standard output) 208 column_width : int 209 Total number of text columns used by the drawing. 210 """ 211 if file is None: 212 file = sys.stdout 213 214 taxa = tree.get_terminals() 215 # Some constants for the drawing calculations 216 max_label_width = max(len(str(taxon)) for taxon in taxa) 217 drawing_width = column_width - max_label_width - 1 218 drawing_height = 2 * len(taxa) - 1 219 220 def get_col_positions(tree): 221 """Create a mapping of each clade to its column position.""" 222 depths = tree.depths() 223 # If there are no branch lengths, assume unit branch lengths 224 if not max(depths.values()): 225 depths = tree.depths(unit_branch_lengths=True) 226 # Potential drawing overflow due to rounding -- 1 char per tree layer 227 fudge_margin = int(math.ceil(math.log(len(taxa), 2))) 228 cols_per_branch_unit = ((drawing_width - fudge_margin) 229 / float(max(depths.values()))) 230 return dict((clade, int(blen * cols_per_branch_unit + 1.0)) 231 for clade, blen in depths.items())
232 233 def get_row_positions(tree): 234 positions = dict((taxon, 2 * idx) for idx, taxon in enumerate(taxa)) 235 236 def calc_row(clade): 237 for subclade in clade: 238 if subclade not in positions: 239 calc_row(subclade) 240 positions[clade] = ((positions[clade.clades[0]] + 241 positions[clade.clades[-1]]) // 2) 242 243 calc_row(tree.root) 244 return positions 245 246 col_positions = get_col_positions(tree) 247 row_positions = get_row_positions(tree) 248 char_matrix = [[' ' for x in range(drawing_width)] 249 for y in range(drawing_height)] 250 251 def draw_clade(clade, startcol): 252 thiscol = col_positions[clade] 253 thisrow = row_positions[clade] 254 # Draw a horizontal line 255 for col in range(startcol, thiscol): 256 char_matrix[thisrow][col] = '_' 257 if clade.clades: 258 # Draw a vertical line 259 toprow = row_positions[clade.clades[0]] 260 botrow = row_positions[clade.clades[-1]] 261 for row in range(toprow + 1, botrow + 1): 262 char_matrix[row][thiscol] = '|' 263 # NB: Short terminal branches need something to stop rstrip() 264 if (col_positions[clade.clades[0]] - thiscol) < 2: 265 char_matrix[toprow][thiscol] = ',' 266 # Draw descendents 267 for child in clade: 268 draw_clade(child, thiscol + 1) 269 270 draw_clade(tree.root, 0) 271 # Print the complete drawing 272 for idx, row in enumerate(char_matrix): 273 line = ''.join(row).rstrip() 274 # Add labels for terminal taxa in the right margin 275 if idx % 2 == 0: 276 line += ' ' + str(taxa[idx // 2]) 277 file.write(line + '\n') 278 file.write('\n') 279 280
281 -def draw(tree, label_func=str, do_show=True, show_confidence=True, 282 # For power users 283 axes=None, branch_labels=None, *args, **kwargs):
284 """Plot the given tree using matplotlib (or pylab). 285 286 The graphic is a rooted tree, drawn with roughly the same algorithm as 287 draw_ascii. 288 289 Additional keyword arguments passed into this function are used as pyplot 290 options. The input format should be in the form of: 291 pyplot_option_name=(tuple), pyplot_option_name=(tuple, dict), or 292 pyplot_option_name=(dict). 293 294 Example using the pyplot options 'axhspan' and 'axvline': 295 296 >>> Phylo.draw(tree, axhspan=((0.25, 7.75), {'facecolor':'0.5'}), 297 ... axvline={'x':'0', 'ymin':'0', 'ymax':'1'}) 298 299 Visual aspects of the plot can also be modified using pyplot's own functions 300 and objects (via pylab or matplotlib). In particular, the pyplot.rcParams 301 object can be used to scale the font size (rcParams["font.size"]) and line 302 width (rcParams["lines.linewidth"]). 303 304 :Parameters: 305 label_func : callable 306 A function to extract a label from a node. By default this is str(), 307 but you can use a different function to select another string 308 associated with each node. If this function returns None for a node, 309 no label will be shown for that node. 310 do_show : bool 311 Whether to show() the plot automatically. 312 show_confidence : bool 313 Whether to display confidence values, if present on the tree. 314 axes : matplotlib/pylab axes 315 If a valid matplotlib.axes.Axes instance, the phylogram is plotted 316 in that Axes. By default (None), a new figure is created. 317 branch_labels : dict or callable 318 A mapping of each clade to the label that will be shown along the 319 branch leading to it. By default this is the confidence value(s) of 320 the clade, taken from the ``confidence`` attribute, and can be 321 easily toggled off with this function's ``show_confidence`` option. 322 But if you would like to alter the formatting of confidence values, 323 or label the branches with something other than confidence, then use 324 this option. 325 """ 326 327 try: 328 import matplotlib.pyplot as plt 329 except ImportError: 330 try: 331 import pylab as plt 332 except ImportError: 333 raise MissingPythonDependencyError( 334 "Install matplotlib or pylab if you want to use draw.") 335 336 import matplotlib.collections as mpcollections 337 338 # Arrays that store lines for the plot of clades 339 horizontal_linecollections = [] 340 vertical_linecollections = [] 341 342 # Options for displaying branch labels / confidence 343 def conf2str(conf): 344 if int(conf) == conf: 345 return str(int(conf)) 346 return str(conf)
347 if not branch_labels: 348 if show_confidence: 349 def format_branch_label(clade): 350 if hasattr(clade, 'confidences'): 351 # phyloXML supports multiple confidences 352 return '/'.join(conf2str(cnf.value) 353 for cnf in clade.confidences) 354 if clade.confidence: 355 return conf2str(clade.confidence) 356 return None 357 else: 358 def format_branch_label(clade): 359 return None 360 elif isinstance(branch_labels, dict): 361 def format_branch_label(clade): 362 return branch_labels.get(clade) 363 else: 364 assert callable(branch_labels), \ 365 "branch_labels must be either a dict or a callable (function)" 366 format_branch_label = branch_labels 367 368 # Layout 369 370 def get_x_positions(tree): 371 """Create a mapping of each clade to its horizontal position. 372 373 Dict of {clade: x-coord} 374 """ 375 depths = tree.depths() 376 # If there are no branch lengths, assume unit branch lengths 377 if not max(depths.values()): 378 depths = tree.depths(unit_branch_lengths=True) 379 return depths 380 381 def get_y_positions(tree): 382 """Create a mapping of each clade to its vertical position. 383 384 Dict of {clade: y-coord}. 385 Coordinates are negative, and integers for tips. 386 """ 387 maxheight = tree.count_terminals() 388 # Rows are defined by the tips 389 heights = dict((tip, maxheight - i) 390 for i, tip in enumerate(reversed(tree.get_terminals()))) 391 392 # Internal nodes: place at midpoint of children 393 def calc_row(clade): 394 for subclade in clade: 395 if subclade not in heights: 396 calc_row(subclade) 397 # Closure over heights 398 heights[clade] = (heights[clade.clades[0]] + 399 heights[clade.clades[-1]]) / 2.0 400 401 if tree.root.clades: 402 calc_row(tree.root) 403 return heights 404 405 x_posns = get_x_positions(tree) 406 y_posns = get_y_positions(tree) 407 # The function draw_clade closes over the axes object 408 if axes is None: 409 fig = plt.figure() 410 axes = fig.add_subplot(1, 1, 1) 411 elif not isinstance(axes, plt.matplotlib.axes.Axes): 412 raise ValueError("Invalid argument for axes: %s" % axes) 413 414 def draw_clade_lines(use_linecollection=False, orientation='horizontal', 415 y_here=0, x_start=0, x_here=0, y_bot=0, y_top=0, 416 color='black', lw='.1'): 417 """Create a line with or without a line collection object. 418 419 Graphical formatting of the lines representing clades in the plot can be 420 customized by altering this function. 421 """ 422 if (use_linecollection is False and orientation == 'horizontal'): 423 axes.hlines(y_here, x_start, x_here, color=color, lw=lw) 424 elif (use_linecollection is True and orientation == 'horizontal'): 425 horizontal_linecollections.append(mpcollections.LineCollection( 426 [[(x_start, y_here), (x_here, y_here)]], color=color, lw=lw),) 427 elif (use_linecollection is False and orientation == 'vertical'): 428 axes.vlines(x_here, y_bot, y_top, color=color) 429 elif (use_linecollection is True and orientation == 'vertical'): 430 vertical_linecollections.append(mpcollections.LineCollection( 431 [[(x_here, y_bot), (x_here, y_top)]], color=color, lw=lw),) 432 433 def draw_clade(clade, x_start, color, lw): 434 """Recursively draw a tree, down from the given clade.""" 435 x_here = x_posns[clade] 436 y_here = y_posns[clade] 437 # phyloXML-only graphics annotations 438 if hasattr(clade, 'color') and clade.color is not None: 439 color = clade.color.to_hex() 440 if hasattr(clade, 'width') and clade.width is not None: 441 lw = clade.width * plt.rcParams['lines.linewidth'] 442 # Draw a horizontal line from start to here 443 draw_clade_lines(use_linecollection=True, orientation='horizontal', 444 y_here=y_here, x_start=x_start, x_here=x_here, color=color, lw=lw) 445 # Add node/taxon labels 446 label = label_func(clade) 447 if label not in (None, clade.__class__.__name__): 448 axes.text(x_here, y_here, ' %s' % 449 label, verticalalignment='center') 450 # Add label above the branch (optional) 451 conf_label = format_branch_label(clade) 452 if conf_label: 453 axes.text(0.5 * (x_start + x_here), y_here, conf_label, 454 fontsize='small', horizontalalignment='center') 455 if clade.clades: 456 # Draw a vertical line connecting all children 457 y_top = y_posns[clade.clades[0]] 458 y_bot = y_posns[clade.clades[-1]] 459 # Only apply widths to horizontal lines, like Archaeopteryx 460 draw_clade_lines(use_linecollection=True, orientation='vertical', 461 x_here=x_here, y_bot=y_bot, y_top=y_top, color=color, lw=lw) 462 # Draw descendents 463 for child in clade: 464 draw_clade(child, x_here, color, lw) 465 466 draw_clade(tree.root, 0, 'k', plt.rcParams['lines.linewidth']) 467 468 # If line collections were used to create clade lines, here they are added 469 # to the pyplot plot. 470 for i in horizontal_linecollections: 471 axes.add_collection(i) 472 for i in vertical_linecollections: 473 axes.add_collection(i) 474 475 # Aesthetics 476 477 if hasattr(tree, 'name') and tree.name: 478 axes.set_title(tree.name) 479 axes.set_xlabel('branch length') 480 axes.set_ylabel('taxa') 481 # Add margins around the tree to prevent overlapping the axes 482 xmax = max(x_posns.values()) 483 axes.set_xlim(-0.05 * xmax, 1.25 * xmax) 484 # Also invert the y-axis (origin at the top) 485 # Add a small vertical margin, but avoid including 0 and N+1 on the y axis 486 axes.set_ylim(max(y_posns.values()) + 0.8, 0.2) 487 488 # Parse and process key word arguments as pyplot options 489 for key, value in kwargs.items(): 490 try: 491 # Check that the pyplot option input is iterable, as required 492 [i for i in value] 493 except TypeError: 494 raise ValueError('Keyword argument "%s=%s" is not in the format ' 495 'pyplot_option_name=(tuple), pyplot_option_name=(tuple, dict),' 496 ' or pyplot_option_name=(dict) ' 497 % (key, value)) 498 if isinstance(value, dict): 499 getattr(plt, str(key))(**dict(value)) 500 elif not (isinstance(value[0], tuple)): 501 getattr(plt, str(key))(*value) 502 elif (isinstance(value[0], tuple)): 503 getattr(plt, str(key))(*value[0], **dict(value[1])) 504 505 if do_show: 506 plt.show() 507