Package Bio :: Package Phylo :: Module _utils
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo._utils

  1  # Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license. Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Utilities for handling, displaying and exporting Phylo trees. 
  7   
  8  Third-party libraries are loaded when the corresponding function is called. 
  9  """ 
 10  __docformat__ = "restructuredtext en" 
 11   
 12  import math 
 13  import sys 
 14   
 15   
16 -def to_networkx(tree):
17 """Convert a Tree object to a networkx graph. 18 19 The result is useful for graph-oriented analysis, and also interactive 20 plotting with pylab, matplotlib or pygraphviz, though the resulting diagram 21 is usually not ideal for displaying a phylogeny. 22 23 Requires NetworkX version 0.99 or later. 24 """ 25 try: 26 import networkx 27 except ImportError: 28 from Bio import MissingPythonDependencyError 29 raise MissingPythonDependencyError( 30 "Install NetworkX if you want to use to_networkx.") 31 32 # NB (1/2010): the networkx API stabilized at v.1.0 33 # 1.0+: edges accept arbitrary data as kwargs, weights are floats 34 # 0.99: edges accept weight as a string, nothing else 35 # pre-0.99: edges accept no additional data 36 # Ubuntu Lucid LTS uses v0.99, let's support everything 37 if networkx.__version__ >= '1.0': 38 def add_edge(graph, n1, n2): 39 graph.add_edge(n1, n2, weight=n2.branch_length or 1.0) 40 # Copy branch color value as hex, if available 41 if hasattr(n2, 'color') and n2.color is not None: 42 graph[n1][n2]['color'] = n2.color.to_hex() 43 elif hasattr(n1, 'color') and n1.color is not None: 44 # Cascading color attributes 45 graph[n1][n2]['color'] = n1.color.to_hex() 46 n2.color = n1.color 47 # Copy branch weight value (float) if available 48 if hasattr(n2, 'width') and n2.width is not None: 49 graph[n1][n2]['width'] = n2.width 50 elif hasattr(n1, 'width') and n1.width is not None: 51 # Cascading width attributes 52 graph[n1][n2]['width'] = n1.width 53 n2.width = n1.width
54 elif networkx.__version__ >= '0.99': 55 def add_edge(graph, n1, n2): 56 graph.add_edge(n1, n2, (n2.branch_length or 1.0)) 57 else: 58 def add_edge(graph, n1, n2): 59 graph.add_edge(n1, n2) 60 61 def build_subgraph(graph, top): 62 """Walk down the Tree, building graphs, edges and nodes.""" 63 for clade in top: 64 graph.add_node(clade.root) 65 add_edge(graph, top.root, clade.root) 66 build_subgraph(graph, clade) 67 68 if tree.rooted: 69 G = networkx.DiGraph() 70 else: 71 G = networkx.Graph() 72 G.add_node(tree.root) 73 build_subgraph(G, tree.root) 74 return G 75 76
77 -def draw_graphviz(tree, label_func=str, prog='twopi', args='', 78 node_color='#c0deff', **kwargs):
79 """Display a tree or clade as a graph, using the graphviz engine. 80 81 Requires NetworkX, matplotlib, Graphviz and either PyGraphviz or pydot. 82 83 The third and fourth parameters apply to Graphviz, and the remaining 84 arbitrary keyword arguments are passed directly to networkx.draw(), which 85 in turn mostly wraps matplotlib/pylab. See the documentation for Graphviz 86 and networkx for detailed explanations. 87 88 The NetworkX/matplotlib parameters are described in the docstrings for 89 networkx.draw() and pylab.scatter(), but the most reasonable options to try 90 are: *alpha, node_color, node_size, node_shape, edge_color, style, 91 font_size, font_color, font_weight, font_family* 92 93 :Parameters: 94 95 label_func : callable 96 A function to extract a label from a node. By default this is str(), 97 but you can use a different function to select another string 98 associated with each node. If this function returns None for a node, 99 no label will be shown for that node. 100 101 The label will also be silently skipped if the throws an exception 102 related to ordinary attribute access (LookupError, AttributeError, 103 ValueError); all other exception types will still be raised. This 104 means you can use a lambda expression that simply attempts to look 105 up the desired value without checking if the intermediate attributes 106 are available: 107 108 >>> Phylo.draw_graphviz(tree, lambda n: n.taxonomies[0].code) 109 110 prog : string 111 The Graphviz program to use when rendering the graph. 'twopi' 112 behaves the best for large graphs, reliably avoiding crossing edges, 113 but for moderate graphs 'neato' looks a bit nicer. For small 114 directed graphs, 'dot' may produce a normal-looking cladogram, but 115 will cross and distort edges in larger graphs. (The programs 'circo' 116 and 'fdp' are not recommended.) 117 args : string 118 Options passed to the external graphviz program. Normally not 119 needed, but offered here for completeness. 120 121 Example 122 ------- 123 124 >>> import pylab 125 >>> from Bio import Phylo 126 >>> tree = Phylo.read('ex/apaf.xml', 'phyloxml') 127 >>> Phylo.draw_graphviz(tree) 128 >>> pylab.show() 129 >>> pylab.savefig('apaf.png') 130 """ 131 try: 132 import networkx 133 except ImportError: 134 from Bio import MissingPythonDependencyError 135 raise MissingPythonDependencyError( 136 "Install NetworkX if you want to use to_networkx.") 137 138 G = to_networkx(tree) 139 Gi = networkx.convert_node_labels_to_integers(G, discard_old_labels=False) 140 try: 141 posi = networkx.graphviz_layout(Gi, prog, args=args) 142 except ImportError: 143 raise MissingPythonDependencyError( 144 "Install PyGraphviz or pydot if you want to use draw_graphviz.") 145 146 def get_label_mapping(G, selection): 147 for node in G.nodes(): 148 if (selection is None) or (node in selection): 149 try: 150 label = label_func(node) 151 if label not in (None, node.__class__.__name__): 152 yield (node, label) 153 except (LookupError, AttributeError, ValueError): 154 pass
155 156 if 'nodelist' in kwargs: 157 labels = dict(get_label_mapping(G, set(kwargs['nodelist']))) 158 else: 159 labels = dict(get_label_mapping(G, None)) 160 kwargs['nodelist'] = labels.keys() 161 if 'edge_color' not in kwargs: 162 kwargs['edge_color'] = [isinstance(e[2], dict) and 163 e[2].get('color', 'k') or 'k' 164 for e in G.edges(data=True)] 165 if 'width' not in kwargs: 166 kwargs['width'] = [isinstance(e[2], dict) and 167 e[2].get('width', 1.0) or 1.0 168 for e in G.edges(data=True)] 169 170 posn = dict((n, posi[Gi.node_labels[n]]) for n in G) 171 networkx.draw(G, posn, labels=labels, node_color=node_color, **kwargs) 172 173
174 -def draw_ascii(tree, file=sys.stdout, column_width=80):
175 """Draw an ascii-art phylogram of the given tree. 176 177 The printed result looks like:: 178 179 _________ Orange 180 ______________| 181 | |______________ Tangerine 182 ______________| 183 | | _________________________ Grapefruit 184 _| |_________| 185 | |______________ Pummelo 186 | 187 |__________________________________ Apple 188 189 190 :Parameters: 191 file : file-like object 192 File handle opened for writing the output drawing. 193 column_width : int 194 Total number of text columns used by the drawing. 195 """ 196 taxa = tree.get_terminals() 197 # Some constants for the drawing calculations 198 max_label_width = max(len(str(taxon)) for taxon in taxa) 199 drawing_width = column_width - max_label_width - 1 200 drawing_height = 2 * len(taxa) - 1 201 202 def get_col_positions(tree): 203 """Create a mapping of each clade to its column position.""" 204 depths = tree.depths() 205 # If there are no branch lengths, assume unit branch lengths 206 if not max(depths.itervalues()): 207 depths = tree.depths(unit_branch_lengths=True) 208 # Potential drawing overflow due to rounding -- 1 char per tree layer 209 fudge_margin = int(math.ceil(math.log(len(taxa), 2))) 210 cols_per_branch_unit = ((drawing_width - fudge_margin) 211 / float(max(depths.itervalues()))) 212 return dict((clade, int(round(blen*cols_per_branch_unit + 0.5))) 213 for clade, blen in depths.iteritems())
214 215 def get_row_positions(tree): 216 positions = dict((taxon, 2*idx) for idx, taxon in enumerate(taxa)) 217 218 def calc_row(clade): 219 for subclade in clade: 220 if subclade not in positions: 221 calc_row(subclade) 222 positions[clade] = (positions[clade.clades[0]] + 223 positions[clade.clades[-1]]) / 2 224 225 calc_row(tree.root) 226 return positions 227 228 col_positions = get_col_positions(tree) 229 row_positions = get_row_positions(tree) 230 char_matrix = [[' ' for x in range(drawing_width)] 231 for y in range(drawing_height)] 232 233 def draw_clade(clade, startcol): 234 thiscol = col_positions[clade] 235 thisrow = row_positions[clade] 236 # Draw a horizontal line 237 for col in range(startcol, thiscol): 238 char_matrix[thisrow][col] = '_' 239 if clade.clades: 240 # Draw a vertical line 241 toprow = row_positions[clade.clades[0]] 242 botrow = row_positions[clade.clades[-1]] 243 for row in range(toprow+1, botrow+1): 244 char_matrix[row][thiscol] = '|' 245 # NB: Short terminal branches need something to stop rstrip() 246 if (col_positions[clade.clades[0]] - thiscol) < 2: 247 char_matrix[toprow][thiscol] = ',' 248 # Draw descendents 249 for child in clade: 250 draw_clade(child, thiscol+1) 251 252 draw_clade(tree.root, 0) 253 # Print the complete drawing 254 for idx, row in enumerate(char_matrix): 255 line = ''.join(row).rstrip() 256 # Add labels for terminal taxa in the right margin 257 if idx % 2 == 0: 258 line += ' ' + str(taxa[idx/2]) 259 file.write(line + '\n') 260 file.write('\n') 261 262
263 -def draw(tree, label_func=str, do_show=True, show_confidence=True, 264 # For power users 265 axes=None, branch_labels=None):
266 """Plot the given tree using matplotlib (or pylab). 267 268 The graphic is a rooted tree, drawn with roughly the same algorithm as 269 draw_ascii. 270 271 Visual aspects of the plot can be modified using pyplot's own functions and 272 objects (via pylab or matplotlib). In particular, the pyplot.rcParams 273 object can be used to scale the font size (rcParams["font.size"]) and line 274 width (rcParams["lines.linewidth"]). 275 276 :Parameters: 277 label_func : callable 278 A function to extract a label from a node. By default this is str(), 279 but you can use a different function to select another string 280 associated with each node. If this function returns None for a node, 281 no label will be shown for that node. 282 do_show : bool 283 Whether to show() the plot automatically. 284 show_confidence : bool 285 Whether to display confidence values, if present on the tree. 286 axes : matplotlib/pylab axes 287 If a valid matplotlib.axes.Axes instance, the phylogram is plotted 288 in that Axes. By default (None), a new figure is created. 289 branch_labels : dict or callable 290 A mapping of each clade to the label that will be shown along the 291 branch leading to it. By default this is the confidence value(s) of 292 the clade, taken from the ``confidence`` attribute, and can be 293 easily toggled off with this function's ``show_confidence`` option. 294 But if you would like to alter the formatting of confidence values, 295 or label the branches with something other than confidence, then use 296 this option. 297 """ 298 try: 299 import matplotlib.pyplot as plt 300 except ImportError: 301 try: 302 import pylab as plt 303 except ImportError: 304 from Bio import MissingPythonDependencyError 305 raise MissingPythonDependencyError( 306 "Install matplotlib or pylab if you want to use draw.") 307 308 # Options for displaying branch labels / confidence 309 def conf2str(conf): 310 if int(conf) == conf: 311 return str(int(conf)) 312 return str(conf)
313 if not branch_labels: 314 if show_confidence: 315 def format_branch_label(clade): 316 if hasattr(clade, 'confidences'): 317 # phyloXML supports multiple confidences 318 return '/'.join(conf2str(cnf.value) 319 for cnf in clade.confidences) 320 if clade.confidence: 321 return conf2str(clade.confidence) 322 return None 323 else: 324 def format_branch_label(clade): 325 return None 326 elif isinstance(branch_labels, dict): 327 def format_branch_label(clade): 328 return branch_labels.get(clade) 329 else: 330 assert callable(branch_labels), \ 331 "branch_labels must be either a dict or a callable (function)" 332 format_branch_label = branch_labels 333 334 # Layout 335 336 def get_x_positions(tree): 337 """Create a mapping of each clade to its horizontal position. 338 339 Dict of {clade: x-coord} 340 """ 341 depths = tree.depths() 342 # If there are no branch lengths, assume unit branch lengths 343 if not max(depths.itervalues()): 344 depths = tree.depths(unit_branch_lengths=True) 345 return depths 346 347 def get_y_positions(tree): 348 """Create a mapping of each clade to its vertical position. 349 350 Dict of {clade: y-coord}. 351 Coordinates are negative, and integers for tips. 352 """ 353 maxheight = tree.count_terminals() 354 # Rows are defined by the tips 355 heights = dict((tip, maxheight - i) 356 for i, tip in enumerate(reversed(tree.get_terminals()))) 357 358 # Internal nodes: place at midpoint of children 359 def calc_row(clade): 360 for subclade in clade: 361 if subclade not in heights: 362 calc_row(subclade) 363 # Closure over heights 364 heights[clade] = (heights[clade.clades[0]] + 365 heights[clade.clades[-1]]) / 2.0 366 367 if tree.root.clades: 368 calc_row(tree.root) 369 return heights 370 371 x_posns = get_x_positions(tree) 372 y_posns = get_y_positions(tree) 373 # The function draw_clade closes over the axes object 374 if axes is None: 375 fig = plt.figure() 376 axes = fig.add_subplot(1, 1, 1) 377 elif not isinstance(axes, plt.matplotlib.axes.Axes): 378 raise ValueError("Invalid argument for axes: %s" % axes) 379 380 def draw_clade(clade, x_start, color, lw): 381 """Recursively draw a tree, down from the given clade.""" 382 x_here = x_posns[clade] 383 y_here = y_posns[clade] 384 # phyloXML-only graphics annotations 385 if hasattr(clade, 'color') and clade.color is not None: 386 color = clade.color.to_hex() 387 if hasattr(clade, 'width') and clade.width is not None: 388 lw = clade.width * plt.rcParams['lines.linewidth'] 389 # Draw a horizontal line from start to here 390 axes.hlines(y_here, x_start, x_here, color=color, lw=lw) 391 # Add node/taxon labels 392 label = label_func(clade) 393 if label not in (None, clade.__class__.__name__): 394 axes.text(x_here, y_here, ' %s' % label, verticalalignment='center') 395 # Add label above the branch (optional) 396 conf_label = format_branch_label(clade) 397 if conf_label: 398 axes.text(0.5*(x_start + x_here), y_here, conf_label, 399 fontsize='small', horizontalalignment='center') 400 if clade.clades: 401 # Draw a vertical line connecting all children 402 y_top = y_posns[clade.clades[0]] 403 y_bot = y_posns[clade.clades[-1]] 404 # Only apply widths to horizontal lines, like Archaeopteryx 405 axes.vlines(x_here, y_bot, y_top, color=color) 406 # Draw descendents 407 for child in clade: 408 draw_clade(child, x_here, color, lw) 409 410 draw_clade(tree.root, 0, 'k', plt.rcParams['lines.linewidth']) 411 412 # Aesthetics 413 414 if hasattr(tree, 'name') and tree.name: 415 axes.set_title(tree.name) 416 axes.set_xlabel('branch length') 417 axes.set_ylabel('taxa') 418 # Add margins around the tree to prevent overlapping the axes 419 xmax = max(x_posns.itervalues()) 420 axes.set_xlim(-0.05 * xmax, 1.25 * xmax) 421 # Also invert the y-axis (origin at the top) 422 # Add a small vertical margin, but avoid including 0 and N+1 on the y axis 423 axes.set_ylim(max(y_posns.itervalues()) + 0.8, 0.2) 424 if do_show: 425 plt.show() 426