Package Bio :: Package Phylo :: Module NeXMLIO
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo.NeXMLIO

  1  # Copyright (C) 2013 by Ben Morris (ben@bendmorris.com) 
  2  # Based on Bio.Nexus, copyright 2005-2008 by Frank Kauff & Cymon J. Cox 
  3  # and Bio.Phylo.Newick, copyright 2009 by Eric Talevich. 
  4  # All rights reserved. 
  5  # This code is part of the Biopython distribution and governed by its 
  6  # license. Please see the LICENSE file that should have been included 
  7  # as part of this package. 
  8   
  9  """I/O function wrappers for the NeXML file format. 
 10   
 11  See: http://www.nexml.org 
 12  """ 
 13   
 14  __docformat__ = "restructuredtext en" 
 15   
 16  from Bio._py3k import StringIO 
 17   
 18  from Bio.Phylo import NeXML 
 19  from xml.dom import minidom 
 20  import sys 
 21  from ._cdao_owl import cdao_elements, cdao_namespaces, resolve_uri 
 22   
 23   
 24  # For speed try to use cElementTree rather than ElementTree 
 25  try: 
 26      if (3, 0) <= sys.version_info[:2] <= (3, 1): 
 27          # Workaround for bug in python 3.0 and 3.1, 
 28          # see http://bugs.python.org/issue9257 
 29          from xml.etree import ElementTree as ElementTree 
 30      else: 
 31          from xml.etree import cElementTree as ElementTree 
 32  except ImportError: 
 33      from xml.etree import ElementTree as ElementTree 
 34   
 35  NAMESPACES = { 
 36      'xsi': 'http://www.w3.org/2001/XMLSchema-instance', 
 37      'xml': 'http://www.w3.org/XML/1998/namespace', 
 38      'nex': 'http://www.nexml.org/2009', 
 39      'xsd': 'http://www.w3.org/2001/XMLSchema#', 
 40  } 
 41  NAMESPACES.update(cdao_namespaces) 
 42  DEFAULT_NAMESPACE = NAMESPACES['nex'] 
 43  VERSION = '0.9' 
 44  SCHEMA = 'http://www.nexml.org/2009/nexml/xsd/nexml.xsd' 
 45   
 46   
 47  try: 
 48      register_namespace = ElementTree.register_namespace 
 49  except AttributeError: 
 50      if not hasattr(ElementTree, '_namespace_map'): 
 51          # cElementTree needs the pure-Python xml.etree.ElementTree 
 52          from xml.etree import ElementTree as ET_py 
 53          ElementTree._namespace_map = ET_py._namespace_map 
54 55 - def register_namespace(prefix, uri):
56 ElementTree._namespace_map[uri] = prefix
57 58 for prefix, uri in NAMESPACES.items(): 59 register_namespace(prefix, uri)
60 61 62 -def qUri(s):
63 """Given a prefixed URI, return the full URI.""" 64 return resolve_uri(s, namespaces=NAMESPACES, xml_style=True)
65
66 67 -def cdao_to_obo(s):
68 """Optionally converts a CDAO-prefixed URI into an OBO-prefixed URI.""" 69 return 'obo:%s' % cdao_elements[s[len('cdao:'):]]
70
71 72 -def matches(s):
73 """Check for matches in both CDAO and OBO namespaces.""" 74 if s.startswith('cdao:'): 75 return (s, cdao_to_obo(s)) 76 else: 77 return (s,)
78
79 80 -class NeXMLError(Exception):
81 """Exception raised when NeXML object construction cannot continue.""" 82 pass
83
84 85 # --------------------------------------------------------- 86 # Public API 87 88 -def parse(handle, **kwargs):
89 """Iterate over the trees in a NeXML file handle. 90 91 :returns: generator of Bio.Phylo.NeXML.Tree objects. 92 """ 93 return Parser(handle).parse(**kwargs)
94
95 96 -def write(trees, handle, plain=False, **kwargs):
97 """Write a trees in NeXML format to the given file handle. 98 99 :returns: number of trees written. 100 """ 101 return Writer(trees).write(handle, plain=plain, **kwargs)
102
103 104 # --------------------------------------------------------- 105 # Input 106 107 -class Parser(object):
108 """Parse a NeXML tree given a file handle. 109 110 Based on the parser in `Bio.Nexus.Trees`. 111 """ 112
113 - def __init__(self, handle):
114 self.handle = handle
115 116 @classmethod
117 - def from_string(cls, treetext):
118 handle = StringIO(treetext) 119 return cls(handle)
120
121 - def add_annotation(self, node_dict, meta_node):
122 if 'property' in meta_node.attrib: 123 prop = meta_node.attrib['property'] 124 else: 125 prop = 'meta' 126 127 if prop in matches('cdao:has_Support_Value'): 128 node_dict['confidence'] = float(meta_node.text) 129 else: 130 node_dict[prop] = meta_node.text
131
132 - def parse(self, values_are_confidence=False, rooted=False):
133 """Parse the text stream this object was initialized with.""" 134 135 nexml_doc = ElementTree.iterparse(self.handle, events=('end',)) 136 137 for event, node in nexml_doc: 138 if node.tag == qUri('nex:tree'): 139 node_dict = {} 140 node_children = {} 141 root = None 142 143 child_tags = node.getchildren() 144 nodes = [] 145 edges = [] 146 for child in child_tags: 147 if child.tag == qUri('nex:node'): 148 nodes.append(child) 149 if child.tag == qUri('nex:edge'): 150 edges.append(child) 151 152 for node in nodes: 153 node_id = node.attrib['id'] 154 this_node = node_dict[node_id] = {} 155 if 'otu' in node.attrib and node.attrib['otu']: 156 this_node['name'] = node.attrib['otu'] 157 if 'root' in node.attrib and node.attrib['root'] == 'true': 158 root = node_id 159 160 for child in node.getchildren(): 161 if child.tag == qUri('nex:meta'): 162 self.add_annotation(node_dict[node_id], child) 163 164 srcs = set() 165 tars = set() 166 for edge in edges: 167 src, tar = edge.attrib['source'], edge.attrib['target'] 168 srcs.add(src) 169 tars.add(tar) 170 if src not in node_children: 171 node_children[src] = set() 172 173 node_children[src].add(tar) 174 if 'length' in edge.attrib: 175 node_dict[tar]['branch_length'] = float(edge.attrib['length']) 176 if 'property' in edge.attrib and edge.attrib['property'] in matches('cdao:has_Support_Value'): 177 node_dict[tar]['confidence'] = float(edge.attrib['content']) 178 179 for child in edge.getchildren(): 180 if child.tag == qUri('nex:meta'): 181 self.add_annotation(node_dict[tar], child) 182 183 if root is None: 184 # if no root specified, start the recursive tree creation function 185 # with the first node that's not a child of any other nodes 186 rooted = False 187 possible_roots = (node.attrib['id'] for node in nodes 188 if node.attrib['id'] in srcs 189 and not node.attrib['id'] in tars) 190 root = next(possible_roots) 191 else: 192 rooted = True 193 194 yield NeXML.Tree(root=self._make_tree(root, node_dict, node_children), rooted=rooted)
195 196 @classmethod
197 - def _make_tree(cls, node, node_dict, children):
198 """Traverse the tree creating a nested clade structure. 199 200 Return a NeXML.Clade, and calls itself recursively for each child, 201 traversing the entire tree and creating a nested structure of NeXML.Clade 202 objects. 203 """ 204 205 this_node = node_dict[node] 206 clade = NeXML.Clade(**this_node) 207 208 if node in children: 209 clade.clades = [cls._make_tree(child, node_dict, children) 210 for child in children[node]] 211 212 return clade
213
214 # --------------------------------------------------------- 215 # Output 216 217 218 -class Writer(object):
219 """Based on the writer in Bio.Nexus.Trees (str, to_string).""" 220
221 - def __init__(self, trees):
222 self.trees = trees 223 224 self.node_counter = 0 225 self.edge_counter = 0 226 self.tree_counter = 0
227
228 - def new_label(self, obj_type):
229 counter = '%s_counter' % obj_type 230 setattr(self, counter, getattr(self, counter) + 1) 231 return '%s%s' % (obj_type, getattr(self, counter))
232
233 - def write(self, handle, cdao_to_obo=True, **kwargs):
234 """Write this instance's trees to a file handle.""" 235 236 self.cdao_to_obo = cdao_to_obo 237 238 # set XML namespaces 239 root_node = ElementTree.Element('nex:nexml') 240 root_node.set('version', VERSION) 241 root_node.set('xmlns', DEFAULT_NAMESPACE) 242 root_node.set('xsi:schemaLocation', SCHEMA) 243 244 for prefix, uri in NAMESPACES.items(): 245 root_node.set('xmlns:%s' % prefix, uri) 246 247 otus = ElementTree.SubElement(root_node, 'otus', 248 **{'id': 'tax', 'label': 'RootTaxaBlock'}) 249 250 # create trees 251 trees = ElementTree.SubElement(root_node, 'trees', 252 **{'id': 'Trees', 'label': 'TreesBlockFromXML', 'otus': 'tax'}) 253 count = 0 254 tus = set() 255 for tree in self.trees: 256 this_tree = ElementTree.SubElement(trees, 'tree', 257 **{'id': self.new_label('tree')}) 258 259 first_clade = tree.clade 260 tus.update(self._write_tree(first_clade, this_tree, rooted=tree.rooted)) 261 262 count += 1 263 264 # create OTUs 265 for tu in tus: 266 otu = ElementTree.SubElement(otus, 'otu', **{'id': tu}) 267 268 # write XML document to file handle 269 # xml_doc = ElementTree.ElementTree(root_node) 270 # xml_doc.write(handle, 271 # xml_declaration=True, encoding='utf-8', 272 # method='xml') 273 274 # use xml.dom.minodom for pretty printing 275 rough_string = ElementTree.tostring(root_node, 'utf-8') 276 reparsed = minidom.parseString(rough_string) 277 try: 278 handle.write(reparsed.toprettyxml(indent=" ")) 279 except TypeError: 280 # for compatibility with Python 3 281 handle.write(bytes(reparsed.toprettyxml(indent=" "), 'utf8')) 282 283 return count
284
285 - def _write_tree(self, clade, tree, parent=None, rooted=False):
286 """Recursively process tree, adding nodes and edges to Tree object. 287 288 Returns a set of all OTUs encountered. 289 """ 290 tus = set() 291 292 convert_uri = cdao_to_obo if self.cdao_to_obo else (lambda s: s) 293 294 node_id = self.new_label('node') 295 clade.node_id = node_id 296 attrib = {'id': node_id, 'label': node_id} 297 root = rooted and parent is None 298 if root: 299 attrib['root'] = 'true' 300 if clade.name: 301 tus.add(clade.name) 302 attrib['otu'] = clade.name 303 node = ElementTree.SubElement(tree, 'node', **attrib) 304 305 if parent is not None: 306 edge_id = self.new_label('edge') 307 attrib = { 308 'id': edge_id, 'source': parent.node_id, 'target': node_id, 309 'length': str(clade.branch_length), 310 'typeof': convert_uri('cdao:Edge'), 311 } 312 if hasattr(clade, 'confidence') and clade.confidence is not None: 313 attrib.update({ 314 'property': convert_uri('cdao:has_Support_Value'), 315 'datatype': 'xsd:float', 316 'content': '%1.2f' % clade.confidence, 317 }) 318 node = ElementTree.SubElement(tree, 'edge', **attrib) 319 320 if not clade.is_terminal(): 321 for new_clade in clade.clades: 322 tus.update(self._write_tree(new_clade, tree, parent=clade)) 323 324 del clade.node_id 325 326 return tus
327