Package Bio :: Package Phylo :: Module PhyloXMLIO
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo.PhyloXMLIO

  1  # Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license. Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """PhyloXML reader/parser, writer, and associated functions. 
  7   
  8  Instantiates tree elements from a parsed PhyloXML file, and constructs an XML 
  9  file from a `Bio.Phylo.PhyloXML` object. 
 10   
 11  About capitalization: 
 12   
 13  - phyloXML means the file format specification 
 14  - PhyloXML means the Biopython module `Bio.Phylo.PhyloXML` and its classes 
 15  - Phyloxml means the top-level class used by `PhyloXMLIO.read` (but not 
 16    `Bio.Phylo.read`!), containing a list of Phylogenies (objects derived from 
 17    `BaseTree.Tree`) 
 18  """ 
 19  __docformat__ = "restructuredtext en" 
 20   
 21  import sys 
 22   
 23  from Bio.Phylo import PhyloXML as PX 
 24   
 25  #For speed try to use cElementTree rather than ElementTree 
 26  try: 
 27      if (3, 0) <= sys.version_info[:2] <= (3, 1): 
 28          # Workaround for bug in python 3.0 and 3.1, 
 29          # see http://bugs.python.org/issue9257 
 30          from xml.etree import ElementTree as ElementTree 
 31      else: 
 32          from xml.etree import cElementTree as ElementTree 
 33  except ImportError: 
 34      from xml.etree import ElementTree as ElementTree 
 35   
 36  # Recognize the phyloXML namespace when parsing 
 37  # See http://effbot.org/zone/element-namespaces.htm 
 38  NAMESPACES = { 
 39          'phy':  'http://www.phyloxml.org', 
 40          } 
 41   
 42  try: 
 43      register_namespace = ElementTree.register_namespace 
 44  except AttributeError: 
 45      if not hasattr(ElementTree, '_namespace_map'): 
 46          # cElementTree needs the pure-Python xml.etree.ElementTree 
 47          from xml.etree import ElementTree as ET_py 
 48          ElementTree._namespace_map = ET_py._namespace_map 
 49   
50 - def register_namespace(prefix, uri):
51 ElementTree._namespace_map[uri] = prefix
52 53 for prefix, uri in NAMESPACES.iteritems(): 54 register_namespace(prefix, uri) 55 56
57 -class PhyloXMLError(Exception):
58 """Exception raised when PhyloXML object construction cannot continue. 59 60 XML syntax errors will be found and raised by the underlying ElementTree 61 module; this exception is for valid XML that breaks the phyloXML 62 specification. 63 """ 64 pass
65 66 67 # --------------------------------------------------------- 68 # Public API 69
70 -def read(file):
71 """Parse a phyloXML file or stream and build a tree of Biopython objects. 72 73 The children of the root node are phylogenies and possibly other arbitrary 74 (non-phyloXML) objects. 75 76 :returns: a single `Bio.Phylo.PhyloXML.Phyloxml` object. 77 """ 78 return Parser(file).read()
79 80
81 -def parse(file):
82 """Iterate over the phylogenetic trees in a phyloXML file. 83 84 This ignores any additional data stored at the top level, but may be more 85 memory-efficient than the `read` function. 86 87 :returns: a generator of `Bio.Phylo.PhyloXML.Phylogeny` objects. 88 """ 89 return Parser(file).parse()
90 91
92 -def write(obj, file, encoding='utf-8', indent=True):
93 """Write a phyloXML file. 94 95 :Parameters: 96 obj 97 an instance of `Phyloxml`, `Phylogeny` or `BaseTree.Tree`, or an 98 iterable of either of the latter two. The object will be converted 99 to a Phyloxml object before serialization. 100 file 101 either an open handle or a file name. 102 """ 103 def fix_single(tree): 104 if isinstance(tree, PX.Phylogeny): 105 return tree 106 if isinstance(tree, PX.Clade): 107 return tree.to_phylogeny() 108 if isinstance(tree, PX.BaseTree.Tree): 109 return PX.Phylogeny.from_tree(tree) 110 if isinstance(tree, PX.BaseTree.Clade): 111 return PX.Phylogeny.from_tree(PX.BaseTree.Tree(root=tree)) 112 else: 113 raise ValueError("iterable must contain Tree or Clade types")
114 115 if isinstance(obj, PX.Phyloxml): 116 pass 117 elif (isinstance(obj, PX.BaseTree.Tree) or 118 isinstance(obj, PX.BaseTree.Clade)): 119 obj = fix_single(obj).to_phyloxml() 120 elif hasattr(obj, '__iter__'): 121 obj = PX.Phyloxml({}, phylogenies=(fix_single(t) for t in obj)) 122 else: 123 raise ValueError("First argument must be a Phyloxml, Phylogeny, " 124 "Tree, or iterable of Trees or Phylogenies.") 125 return Writer(obj).write(file, encoding=encoding, indent=indent) 126 127 128 # --------------------------------------------------------- 129 # Functions I wish ElementTree had 130
131 -def _local(tag):
132 """Extract the local tag from a namespaced tag name.""" 133 if tag[0] == '{': 134 return tag[tag.index('}')+1:] 135 return tag
136 137
138 -def _split_namespace(tag):
139 """Split a tag into namespace and local tag strings.""" 140 try: 141 return tag[1:].split('}', 1) 142 except: 143 return ('', tag)
144 145
146 -def _ns(tag, namespace=NAMESPACES['phy']):
147 """Format an XML tag with the given namespace.""" 148 return '{%s}%s' % (namespace, tag)
149 150
151 -def _get_child_as(parent, tag, construct):
152 """Find a child node by tag, and pass it through a constructor. 153 154 Returns None if no matching child is found. 155 """ 156 child = parent.find(_ns(tag)) 157 if child is not None: 158 return construct(child)
159 160
161 -def _get_child_text(parent, tag, construct=unicode):
162 """Find a child node by tag; pass its text through a constructor. 163 164 Returns None if no matching child is found. 165 """ 166 child = parent.find(_ns(tag)) 167 if child is not None and child.text: 168 return construct(child.text)
169 170
171 -def _get_children_as(parent, tag, construct):
172 """Find child nodes by tag; pass each through a constructor. 173 174 Returns an empty list if no matching child is found. 175 """ 176 return [construct(child) for child in 177 parent.findall(_ns(tag))]
178 179
180 -def _get_children_text(parent, tag, construct=unicode):
181 """Find child nodes by tag; pass each node's text through a constructor. 182 183 Returns an empty list if no matching child is found. 184 """ 185 return [construct(child.text) for child in 186 parent.findall(_ns(tag)) 187 if child.text]
188 189
190 -def _indent(elem, level=0):
191 """Add line breaks and indentation to ElementTree in-place. 192 193 Sources: 194 195 - http://effbot.org/zone/element-lib.htm#prettyprint 196 - http://infix.se/2007/02/06/gentlemen-indent-your-xml 197 """ 198 i = "\n" + level*" " 199 if len(elem): 200 if not elem.text or not elem.text.strip(): 201 elem.text = i + " " 202 for e in elem: 203 _indent(e, level+1) 204 if not e.tail or not e.tail.strip(): 205 e.tail = i + " " 206 if not e.tail or not e.tail.strip(): 207 e.tail = i 208 else: 209 if level and (not elem.tail or not elem.tail.strip()): 210 elem.tail = i
211 212 # --------------------------------------------------------- 213 # INPUT 214 # --------------------------------------------------------- 215 216
217 -def _str2bool(text):
218 if text == 'true': 219 return True 220 if text == 'false': 221 return False 222 raise ValueError('String could not be converted to boolean: ' + text)
223 224
225 -def _dict_str2bool(dct, keys):
226 out = dct.copy() 227 for key in keys: 228 if key in out: 229 out[key] = _str2bool(out[key]) 230 return out
231 232
233 -def _int(text):
234 if text is not None: 235 try: 236 return int(text) 237 except Exception: 238 return None
239 240
241 -def _float(text):
242 if text is not None: 243 try: 244 return float(text) 245 except Exception: 246 return None
247 248
249 -def _collapse_wspace(text):
250 """Replace all spans of whitespace with a single space character. 251 252 Also remove leading and trailing whitespace. See "Collapse Whitespace 253 Policy" in the phyloXML spec glossary: 254 http://phyloxml.org/documentation/version_100/phyloxml.xsd.html#Glossary 255 """ 256 if text is not None: 257 return ' '.join(text.split())
258 259 260 # NB: Not currently used
261 -def _replace_wspace(text):
262 """Replace tab, LF and CR characters with spaces, but don't collapse. 263 264 See "Replace Whitespace Policy" in the phyloXML spec glossary: 265 http://phyloxml.org/documentation/version_100/phyloxml.xsd.html#Glossary 266 """ 267 for char in ('\t', '\n', '\r'): 268 if char in text: 269 text = text.replace(char, ' ') 270 return text
271 272
273 -class Parser(object):
274 """Methods for parsing all phyloXML nodes from an XML stream. 275 276 To minimize memory use, the tree of ElementTree parsing events is cleared 277 after completing each phylogeny, clade, and top-level 'other' element. 278 Elements below the clade level are kept in memory until parsing of the 279 current clade is finished -- this shouldn't be a problem because clade is 280 the only recursive element, and non-clade nodes below this level are of 281 bounded size. 282 """ 283
284 - def __init__(self, file):
285 # Get an iterable context for XML parsing events 286 context = iter(ElementTree.iterparse(file, events=('start', 'end'))) 287 event, root = context.next() 288 self.root = root 289 self.context = context
290
291 - def read(self):
292 """Parse the phyloXML file and create a single Phyloxml object.""" 293 phyloxml = PX.Phyloxml(dict((_local(key), val) 294 for key, val in self.root.items())) 295 other_depth = 0 296 for event, elem in self.context: 297 namespace, localtag = _split_namespace(elem.tag) 298 if event == 'start': 299 if namespace != NAMESPACES['phy']: 300 other_depth += 1 301 continue 302 if localtag == 'phylogeny': 303 phylogeny = self._parse_phylogeny(elem) 304 phyloxml.phylogenies.append(phylogeny) 305 if event == 'end' and namespace != NAMESPACES['phy']: 306 # Deal with items not specified by phyloXML 307 other_depth -= 1 308 if other_depth == 0: 309 # We're directly under the root node -- evaluate 310 otr = self.other(elem, namespace, localtag) 311 phyloxml.other.append(otr) 312 self.root.clear() 313 return phyloxml
314
315 - def parse(self):
316 """Parse the phyloXML file incrementally and return each phylogeny.""" 317 phytag = _ns('phylogeny') 318 for event, elem in self.context: 319 if event == 'start' and elem.tag == phytag: 320 yield self._parse_phylogeny(elem)
321 322 # Special parsing cases -- incremental, using self.context 323
324 - def _parse_phylogeny(self, parent):
325 """Parse a single phylogeny within the phyloXML tree. 326 327 Recursively builds a phylogenetic tree with help from parse_clade, then 328 clears the XML event history for the phylogeny element and returns 329 control to the top-level parsing function. 330 """ 331 phylogeny = PX.Phylogeny(**_dict_str2bool(parent.attrib, 332 ['rooted', 'rerootable'])) 333 list_types = { 334 # XML tag, plural attribute 335 'confidence': 'confidences', 336 'property': 'properties', 337 'clade_relation': 'clade_relations', 338 'sequence_relation': 'sequence_relations', 339 } 340 for event, elem in self.context: 341 namespace, tag = _split_namespace(elem.tag) 342 if event == 'start' and tag == 'clade': 343 assert phylogeny.root is None, \ 344 "Phylogeny object should only have 1 clade" 345 phylogeny.root = self._parse_clade(elem) 346 continue 347 if event == 'end': 348 if tag == 'phylogeny': 349 parent.clear() 350 break 351 # Handle the other non-recursive children 352 if tag in list_types: 353 getattr(phylogeny, list_types[tag]).append( 354 getattr(self, tag)(elem)) 355 # Complex types 356 elif tag in ('date', 'id'): 357 setattr(phylogeny, tag, getattr(self, tag)(elem)) 358 # Simple types 359 elif tag in ('name', 'description'): 360 setattr(phylogeny, tag, _collapse_wspace(elem.text)) 361 # Unknown tags 362 elif namespace != NAMESPACES['phy']: 363 phylogeny.other.append(self.other(elem, namespace, tag)) 364 parent.clear() 365 else: 366 # NB: This shouldn't happen in valid files 367 raise PhyloXMLError('Misidentified tag: ' + tag) 368 return phylogeny
369 370 _clade_complex_types = ['color', 'events', 'binary_characters', 'date'] 371 _clade_list_types = { 372 'confidence': 'confidences', 373 'distribution': 'distributions', 374 'reference': 'references', 375 'property': 'properties', 376 } 377 _clade_tracked_tags = set(_clade_complex_types + _clade_list_types.keys() 378 + ['branch_length', 'name', 'node_id', 'width']) 379
380 - def _parse_clade(self, parent):
381 """Parse a Clade node and its children, recursively.""" 382 clade = PX.Clade(**parent.attrib) 383 if clade.branch_length is not None: 384 clade.branch_length = float(clade.branch_length) 385 # NB: Only evaluate nodes at the current level 386 tag_stack = [] 387 for event, elem in self.context: 388 namespace, tag = _split_namespace(elem.tag) 389 if event == 'start': 390 if tag == 'clade': 391 clade.clades.append(self._parse_clade(elem)) 392 continue 393 if tag == 'taxonomy': 394 clade.taxonomies.append(self._parse_taxonomy(elem)) 395 continue 396 if tag == 'sequence': 397 clade.sequences.append(self._parse_sequence(elem)) 398 continue 399 if tag in self._clade_tracked_tags: 400 tag_stack.append(tag) 401 if event == 'end': 402 if tag == 'clade': 403 elem.clear() 404 break 405 if tag != tag_stack[-1]: 406 continue 407 tag_stack.pop() 408 # Handle the other non-recursive children 409 if tag in self._clade_list_types: 410 getattr(clade, self._clade_list_types[tag]).append( 411 getattr(self, tag)(elem)) 412 elif tag in self._clade_complex_types: 413 setattr(clade, tag, getattr(self, tag)(elem)) 414 elif tag == 'branch_length': 415 # NB: possible collision with the attribute 416 if clade.branch_length is not None: 417 raise PhyloXMLError( 418 'Attribute branch_length was already set ' 419 'for this Clade.') 420 clade.branch_length = _float(elem.text) 421 elif tag == 'width': 422 clade.width = _float(elem.text) 423 elif tag == 'name': 424 clade.name = _collapse_wspace(elem.text) 425 elif tag == 'node_id': 426 clade.node_id = PX.Id(elem.text.strip(), 427 elem.attrib.get('provider')) 428 elif namespace != NAMESPACES['phy']: 429 clade.other.append(self.other(elem, namespace, tag)) 430 elem.clear() 431 else: 432 raise PhyloXMLError('Misidentified tag: ' + tag) 433 return clade
434
435 - def _parse_sequence(self, parent):
436 sequence = PX.Sequence(**parent.attrib) 437 for event, elem in self.context: 438 namespace, tag = _split_namespace(elem.tag) 439 if event == 'end': 440 if tag == 'sequence': 441 parent.clear() 442 break 443 if tag in ('accession', 'mol_seq', 'uri', 444 'domain_architecture'): 445 setattr(sequence, tag, getattr(self, tag)(elem)) 446 elif tag == 'annotation': 447 sequence.annotations.append(self.annotation(elem)) 448 elif tag == 'name': 449 sequence.name = _collapse_wspace(elem.text) 450 elif tag in ('symbol', 'location'): 451 setattr(sequence, tag, elem.text) 452 elif namespace != NAMESPACES['phy']: 453 sequence.other.append(self.other(elem, namespace, tag)) 454 parent.clear() 455 return sequence
456
457 - def _parse_taxonomy(self, parent):
458 taxonomy = PX.Taxonomy(**parent.attrib) 459 for event, elem in self.context: 460 namespace, tag = _split_namespace(elem.tag) 461 if event == 'end': 462 if tag == 'taxonomy': 463 parent.clear() 464 break 465 if tag in ('id', 'uri'): 466 setattr(taxonomy, tag, getattr(self, tag)(elem)) 467 elif tag == 'common_name': 468 taxonomy.common_names.append(_collapse_wspace(elem.text)) 469 elif tag == 'synonym': 470 taxonomy.synonyms.append(elem.text) 471 elif tag in ('code', 'scientific_name', 'authority', 'rank'): 472 # ENH: check_str on rank 473 setattr(taxonomy, tag, elem.text) 474 elif namespace != NAMESPACES['phy']: 475 taxonomy.other.append(self.other(elem, namespace, tag)) 476 parent.clear() 477 return taxonomy
478
479 - def other(self, elem, namespace, localtag):
480 return PX.Other(localtag, namespace, elem.attrib, 481 value=elem.text and elem.text.strip() or None, 482 children=[self.other(child, *_split_namespace(child.tag)) 483 for child in elem])
484 485 # Complex types 486
487 - def accession(self, elem):
488 return PX.Accession(elem.text.strip(), elem.get('source'))
489
490 - def annotation(self, elem):
491 return PX.Annotation( 492 desc=_collapse_wspace(_get_child_text(elem, 'desc')), 493 confidence=_get_child_as(elem, 'confidence', self.confidence), 494 properties=_get_children_as(elem, 'property', self.property), 495 uri=_get_child_as(elem, 'uri', self.uri), 496 **elem.attrib)
497
498 - def binary_characters(self, elem):
499 def bc_getter(elem): 500 return _get_children_text(elem, 'bc')
501 return PX.BinaryCharacters( 502 type=elem.get('type'), 503 gained_count=_int(elem.get('gained_count')), 504 lost_count=_int(elem.get('lost_count')), 505 present_count=_int(elem.get('present_count')), 506 absent_count=_int(elem.get('absent_count')), 507 # Flatten BinaryCharacterList sub-nodes into lists of strings 508 gained=_get_child_as(elem, 'gained', bc_getter), 509 lost=_get_child_as(elem, 'lost', bc_getter), 510 present=_get_child_as(elem, 'present', bc_getter), 511 absent=_get_child_as(elem, 'absent', bc_getter))
512
513 - def clade_relation(self, elem):
514 return PX.CladeRelation( 515 elem.get('type'), elem.get('id_ref_0'), elem.get('id_ref_1'), 516 distance=elem.get('distance'), 517 confidence=_get_child_as(elem, 'confidence', self.confidence))
518
519 - def color(self, elem):
520 red, green, blue = (_get_child_text(elem, color, int) for color in 521 ('red', 'green', 'blue')) 522 return PX.BranchColor(red, green, blue)
523
524 - def confidence(self, elem):
525 return PX.Confidence( 526 _float(elem.text), 527 elem.get('type'))
528
529 - def date(self, elem):
530 return PX.Date( 531 unit=elem.get('unit'), 532 desc=_collapse_wspace(_get_child_text(elem, 'desc')), 533 value=_get_child_text(elem, 'value', float), 534 minimum=_get_child_text(elem, 'minimum', float), 535 maximum=_get_child_text(elem, 'maximum', float), 536 )
537
538 - def distribution(self, elem):
539 return PX.Distribution( 540 desc=_collapse_wspace(_get_child_text(elem, 'desc')), 541 points=_get_children_as(elem, 'point', self.point), 542 polygons=_get_children_as(elem, 'polygon', self.polygon))
543
544 - def domain(self, elem):
545 return PX.ProteinDomain(elem.text.strip(), 546 int(elem.get('from')) - 1, 547 int(elem.get('to')), 548 confidence=_float(elem.get('confidence')), 549 id=elem.get('id'))
550
551 - def domain_architecture(self, elem):
552 return PX.DomainArchitecture( 553 length=int(elem.get('length')), 554 domains=_get_children_as(elem, 'domain', self.domain))
555
556 - def events(self, elem):
557 return PX.Events( 558 type=_get_child_text(elem, 'type'), 559 duplications=_get_child_text(elem, 'duplications', int), 560 speciations=_get_child_text(elem, 'speciations', int), 561 losses=_get_child_text(elem, 'losses', int), 562 confidence=_get_child_as(elem, 'confidence', self.confidence))
563
564 - def id(self, elem):
565 provider = elem.get('provider') or elem.get('type') 566 return PX.Id(elem.text.strip(), provider)
567
568 - def mol_seq(self, elem):
569 is_aligned = elem.get('is_aligned') 570 if is_aligned is not None: 571 is_aligned = _str2bool(is_aligned) 572 return PX.MolSeq(elem.text.strip(), is_aligned=is_aligned)
573
574 - def point(self, elem):
575 return PX.Point( 576 elem.get('geodetic_datum'), 577 _get_child_text(elem, 'lat', float), 578 _get_child_text(elem, 'long', float), 579 alt=_get_child_text(elem, 'alt', float), 580 alt_unit=elem.get('alt_unit'))
581
582 - def polygon(self, elem):
583 return PX.Polygon( 584 points=_get_children_as(elem, 'point', self.point))
585
586 - def property(self, elem):
587 return PX.Property(elem.text.strip(), 588 elem.get('ref'), elem.get('applies_to'), elem.get('datatype'), 589 unit=elem.get('unit'), 590 id_ref=elem.get('id_ref'))
591
592 - def reference(self, elem):
593 return PX.Reference( 594 doi=elem.get('doi'), 595 desc=_get_child_text(elem, 'desc'))
596
597 - def sequence_relation(self, elem):
598 return PX.SequenceRelation( 599 elem.get('type'), elem.get('id_ref_0'), elem.get('id_ref_1'), 600 distance=_float(elem.get('distance')), 601 confidence=_get_child_as(elem, 'confidence', self.confidence))
602
603 - def uri(self, elem):
604 return PX.Uri(elem.text.strip(), 605 desc=_collapse_wspace(elem.get('desc')), 606 type=elem.get('type'))
607 608 609 # --------------------------------------------------------- 610 # OUTPUT 611 # --------------------------------------------------------- 612
613 -def _serialize(value):
614 """Convert a Python primitive to a phyloXML-compatible Unicode string.""" 615 if isinstance(value, float): 616 return unicode(value).upper() 617 elif isinstance(value, bool): 618 return unicode(value).lower() 619 return unicode(value)
620 621
622 -def _clean_attrib(obj, attrs):
623 """Create a dictionary from an object's specified, non-None attributes.""" 624 out = {} 625 for key in attrs: 626 val = getattr(obj, key) 627 if val is not None: 628 out[key] = _serialize(val) 629 return out
630 631
632 -def _handle_complex(tag, attribs, subnodes, has_text=False):
633 def wrapped(self, obj): 634 elem = ElementTree.Element(tag, _clean_attrib(obj, attribs)) 635 for subn in subnodes: 636 if isinstance(subn, basestring): 637 # singular object: method and attribute names are the same 638 if getattr(obj, subn) is not None: 639 elem.append(getattr(self, subn)(getattr(obj, subn))) 640 else: 641 # list: singular method, pluralized attribute name 642 method, plural = subn 643 for item in getattr(obj, plural): 644 elem.append(getattr(self, method)(item)) 645 if has_text: 646 elem.text = _serialize(obj.value) 647 return elem
648 wrapped.__doc__ = "Serialize a %s and its subnodes, in order." % tag 649 return wrapped 650 651
652 -def _handle_simple(tag):
653 def wrapped(self, obj): 654 elem = ElementTree.Element(tag) 655 elem.text = _serialize(obj) 656 return elem
657 wrapped.__doc__ = "Serialize a simple %s node." % tag 658 return wrapped 659 660
661 -class Writer(object):
662 """Methods for serializing a PhyloXML object to XML.""" 663
664 - def __init__(self, phyloxml):
665 """Build an ElementTree from a PhyloXML object.""" 666 assert isinstance(phyloxml, PX.Phyloxml), "Not a Phyloxml object" 667 self._tree = ElementTree.ElementTree(self.phyloxml(phyloxml))
668
669 - def write(self, file, encoding='utf-8', indent=True):
670 if indent: 671 _indent(self._tree.getroot()) 672 self._tree.write(file, encoding) 673 return len(self._tree.getroot())
674 675 # Convert classes to ETree elements 676
677 - def phyloxml(self, obj):
678 elem = ElementTree.Element('phyloxml', obj.attributes) # Namespaces 679 for tree in obj.phylogenies: 680 elem.append(self.phylogeny(tree)) 681 for otr in obj.other: 682 elem.append(self.other(otr)) 683 return elem
684
685 - def other(self, obj):
686 elem = ElementTree.Element(_ns(obj.tag, obj.namespace), obj.attributes) 687 elem.text = obj.value 688 for child in obj.children: 689 elem.append(self.other(child)) 690 return elem
691 692 phylogeny = _handle_complex('phylogeny', 693 ('rooted', 'rerootable', 'branch_length_unit', 'type'), 694 ( 'name', 695 'id', 696 'description', 697 'date', 698 ('confidence', 'confidences'), 699 'clade', 700 ('clade_relation', 'clade_relations'), 701 ('sequence_relation', 'sequence_relations'), 702 ('property', 'properties'), 703 ('other', 'other'), 704 )) 705 706 clade = _handle_complex('clade', ('id_source',), 707 ( 'name', 708 'branch_length', 709 ('confidence', 'confidences'), 710 'width', 711 'color', 712 'node_id', 713 ('taxonomy', 'taxonomies'), 714 ('sequence', 'sequences'), 715 'events', 716 'binary_characters', 717 ('distribution', 'distributions'), 718 'date', 719 ('reference', 'references'), 720 ('property', 'properties'), 721 ('clade', 'clades'), 722 ('other', 'other'), 723 )) 724 725 accession = _handle_complex('accession', ('source',), 726 (), has_text=True) 727 728 annotation = _handle_complex('annotation', 729 ('ref', 'source', 'evidence', 'type'), 730 ( 'desc', 731 'confidence', 732 ('property', 'properties'), 733 'uri', 734 )) 735
736 - def binary_characters(self, obj):
737 """Serialize a binary_characters node and its subnodes.""" 738 elem = ElementTree.Element('binary_characters', 739 _clean_attrib(obj, 740 ('type', 'gained_count', 'lost_count', 741 'present_count', 'absent_count'))) 742 for subn in ('gained', 'lost', 'present', 'absent'): 743 subelem = ElementTree.Element(subn) 744 for token in getattr(obj, subn): 745 subelem.append(self.bc(token)) 746 elem.append(subelem) 747 return elem
748 749 clade_relation = _handle_complex('clade_relation', 750 ('id_ref_0', 'id_ref_1', 'distance', 'type'), 751 ('confidence',)) 752 753 color = _handle_complex('color', (), ('red', 'green', 'blue')) 754 755 confidence = _handle_complex('confidence', ('type',), 756 (), has_text=True) 757 758 date = _handle_complex('date', ('unit',), 759 ('desc', 'value', 'minimum', 'maximum')) 760 761 distribution = _handle_complex('distribution', (), 762 ( 'desc', 763 ('point', 'points'), 764 ('polygon', 'polygons'), 765 )) 766
767 - def domain(self, obj):
768 """Serialize a domain node.""" 769 elem = ElementTree.Element('domain', 770 {'from': str(obj.start + 1), 'to': str(obj.end)}) 771 if obj.confidence is not None: 772 elem.set('confidence', _serialize(obj.confidence)) 773 if obj.id is not None: 774 elem.set('id', obj.id) 775 elem.text = _serialize(obj.value) 776 return elem
777 778 domain_architecture = _handle_complex('domain_architecture', 779 ('length',), 780 (('domain', 'domains'),)) 781 782 events = _handle_complex('events', (), 783 ( 'type', 784 'duplications', 785 'speciations', 786 'losses', 787 'confidence', 788 )) 789 790 id = _handle_complex('id', ('provider',), (), has_text=True) 791 792 mol_seq = _handle_complex('mol_seq', ('is_aligned',), 793 (), has_text=True) 794 795 node_id = _handle_complex('node_id', ('provider',), (), has_text=True) 796 797 point = _handle_complex('point', ('geodetic_datum', 'alt_unit'), 798 ('lat', 'long', 'alt')) 799 800 polygon = _handle_complex('polygon', (), (('point', 'points'),)) 801 802 property = _handle_complex('property', 803 ('ref', 'unit', 'datatype', 'applies_to', 'id_ref'), 804 (), has_text=True) 805 806 reference = _handle_complex('reference', ('doi',), ('desc',)) 807 808 sequence = _handle_complex('sequence', 809 ('type', 'id_ref', 'id_source'), 810 ( 'symbol', 811 'accession', 812 'name', 813 'location', 814 'mol_seq', 815 'uri', 816 ('annotation', 'annotations'), 817 'domain_architecture', 818 ('other', 'other'), 819 )) 820 821 sequence_relation = _handle_complex('sequence_relation', 822 ('id_ref_0', 'id_ref_1', 'distance', 'type'), 823 ('confidence',)) 824 825 taxonomy = _handle_complex('taxonomy', 826 ('id_source',), 827 ( 'id', 828 'code', 829 'scientific_name', 830 'authority', 831 ('common_name', 'common_names'), 832 ('synonym', 'synonyms'), 833 'rank', 834 'uri', 835 ('other', 'other'), 836 )) 837 838 uri = _handle_complex('uri', ('desc', 'type'), (), has_text=True) 839 840 # Primitive types 841 842 # Floating point 843 alt = _handle_simple('alt') 844 branch_length = _handle_simple('branch_length') 845 lat = _handle_simple('lat') 846 long = _handle_simple('long') 847 maximum = _handle_simple('maximum') 848 minimum = _handle_simple('minimum') 849 value = _handle_simple('value') 850 width = _handle_simple('width') 851 852 # Integers 853 blue = _handle_simple('blue') 854 duplications = _handle_simple('duplications') 855 green = _handle_simple('green') 856 losses = _handle_simple('losses') 857 red = _handle_simple('red') 858 speciations = _handle_simple('speciations') 859 860 # Strings 861 bc = _handle_simple('bc') 862 code = _handle_simple('code') 863 common_name = _handle_simple('common_name') 864 desc = _handle_simple('desc') 865 description = _handle_simple('description') 866 location = _handle_simple('location') 867 name = _handle_simple('name') 868 rank = _handle_simple('rank') 869 scientific_name = _handle_simple('scientific_name') 870 symbol = _handle_simple('symbol') 871 synonym = _handle_simple('synonym') 872 type = _handle_simple('type')
873