1
2
3
4
5
6 """Classes corresponding to phyloXML elements.
7
8 See Also
9 --------
10 Official specification:
11 http://phyloxml.org/
12 Journal article:
13 Han and Zmasek (2009), doi:10.1186/1471-2105-10-356
14 """
15 __docformat__ = "restructuredtext en"
16
17 import re
18 import warnings
19
20 from Bio import Alphabet
21 from Bio.Align import MultipleSeqAlignment
22 from Bio.Seq import Seq
23 from Bio.SeqFeature import SeqFeature, FeatureLocation
24 from Bio.SeqRecord import SeqRecord
25 from Bio import BiopythonWarning
26
27 from Bio.Phylo import BaseTree
31 """Warning for non-compliance with the phyloXML specification."""
32 pass
33
36 """Check a string using testfunc, and warn if there's no match."""
37 if text is not None and not testfunc(text):
38 warnings.warn("String %s doesn't match the given regexp" % text,
39 PhyloXMLWarning, stacklevel=2)
40
45 """Base class for all PhyloXML objects."""
46
49 """Root node of the PhyloXML document.
50
51 Contains an arbitrary number of Phylogeny elements, possibly followed by
52 elements from other namespaces.
53
54 :Parameters:
55 attributes
56 (XML namespace definitions)
57 phylogenies
58 list of phylogenetic trees
59 other
60 list of arbitrary non-phyloXML elements, if any
61 """
62 - def __init__(self, attributes, phylogenies=None, other=None):
63 self.attributes = attributes
64 self.phylogenies = phylogenies or []
65 self.other = other or []
66
68 """Get a phylogeny by index or name."""
69 if isinstance(index, int) or isinstance(index, slice):
70 return self.phylogenies[index]
71 if not isinstance(index, basestring):
72 raise KeyError("can't use %s as an index" % type(index))
73 for tree in self.phylogenies:
74 if tree.name == index:
75 return tree
76 else:
77 raise KeyError("no phylogeny found with name " + repr(index))
78
80 """Iterate through the phylogenetic trees in this object."""
81 return iter(self.phylogenies)
82
84 """Number of phylogenetic trees in this object."""
85 return len(self.phylogenies)
86
88 return '%s([%s])' % (self.__class__.__name__,
89 ',\n'.join(map(str, self.phylogenies)))
90
91
92 -class Other(PhyloElement):
93 """Container for non-phyloXML elements in the tree.
94
95 Usually, an Other object will have either a 'value' or a non-empty list
96 of 'children', but not both. This is not enforced here, though.
97
98 :Parameters:
99 tag : string
100 local tag for the XML node
101 namespace : string
102 XML namespace for the node -- should not be the default phyloXML
103 namespace.
104 attributes : dict of strings
105 attributes on the XML node
106 value : string
107 text contained directly within this XML node
108 children : list
109 child nodes, if any (also `Other` instances)
110 """
111 - def __init__(self, tag, namespace=None, attributes=None, value=None,
112 children=None):
113 self.tag = tag
114 self.namespace = namespace
115 self.attributes = attributes
116 self.value = value
117 self.children = children or []
118
120 """Iterate through the children of this object (if any)."""
121 return iter(self.children)
122
123
124 -class Phylogeny(PhyloElement, BaseTree.Tree):
125 """A phylogenetic tree.
126
127 :Parameters:
128 root : Clade
129 the root node/clade of this tree
130 rooted : bool
131 True if this tree is rooted
132 rerootable : bool
133 True if this tree is rerootable
134 branch_length_unit : string
135 unit for branch_length values on clades
136 name : string
137 identifier for this tree, not required to be unique
138 id : Id
139 unique identifier for this tree
140 description : string
141 plain-text description
142 date : Date
143 date for the root node of this tree
144 confidences : list
145 Confidence objects for this tree
146 clade_relations : list
147 CladeRelation objects
148 sequence_relations : list
149 SequenceRelation objects
150 properties : list
151 Property objects
152 other : list
153 non-phyloXML elements (type `Other`)
154 """
155 - def __init__(self, root=None, rooted=True,
156 rerootable=None, branch_length_unit=None, type=None,
157
158 name=None, id=None, description=None, date=None,
159
160 confidences=None, clade_relations=None, sequence_relations=None,
161 properties=None, other=None,
162 ):
163 assert isinstance(rooted, bool)
164 self.root = root
165 self.rooted = rooted
166 self.rerootable = rerootable
167 self.branch_length_unit = branch_length_unit
168 self.type = type
169 self.name = name
170 self.id = id
171 self.description = description
172 self.date = date
173 self.confidences = confidences or []
174 self.clade_relations = clade_relations or []
175 self.sequence_relations = sequence_relations or []
176 self.properties = properties or []
177 self.other = other or []
178
179 @classmethod
181 """Create a new Phylogeny given a Tree (from Newick/Nexus or BaseTree).
182
183 Keyword arguments are the usual `Phylogeny` constructor parameters.
184 """
185 phy = cls(
186 root=Clade.from_clade(tree.root),
187 rooted=tree.rooted,
188 name=tree.name,
189 id=(tree.id is not None) and Id(str(tree.id)) or None)
190 phy.__dict__.update(kwargs)
191 return phy
192
193 @classmethod
195 """Create a new Phylogeny given a Newick or BaseTree Clade object.
196
197 Keyword arguments are the usual `PhyloXML.Clade` constructor parameters.
198 """
199 return Clade.from_clade(clade).to_phylogeny(**kwargs)
200
202 """Return this tree, a PhyloXML-compatible Phylogeny object.
203
204 Overrides the `BaseTree` method.
205 """
206 return self
207
209 """Create a new Phyloxml object containing just this phylogeny."""
210 return Phyloxml(kwargs, phylogenies=[self])
211
213 """Construct an alignment from the aligned sequences in this tree."""
214 def is_aligned_seq(elem):
215 if isinstance(elem, Sequence) and elem.mol_seq.is_aligned:
216 return True
217 return False
218 seqs = self._filter_search(is_aligned_seq, 'preorder', True)
219 try:
220 first_seq = seqs.next()
221 except StopIteration:
222
223 return MultipleSeqAlignment([])
224 msa = MultipleSeqAlignment([first_seq.to_seqrecord()],
225 first_seq.get_alphabet())
226 msa.extend(seq.to_seqrecord() for seq in seqs)
227 return msa
228
229
231 """Equivalent to self.confidences[0] if there is only 1 value.
232
233 See also: `Clade.confidence`, `Clade.taxonomy`
234 """
235 if len(self.confidences) == 0:
236 return None
237 if len(self.confidences) > 1:
238 raise AttributeError("more than 1 confidence value available; "
239 "use Phylogeny.confidences")
240 return self.confidences[0]
241
243 if value is None:
244
245 self.confidences = []
246 return
247 if isinstance(value, float) or isinstance(value, int):
248 value = Confidence(value)
249 elif not isinstance(value, Confidence):
250 raise ValueError("value must be a number or Confidence instance")
251 if len(self.confidences) == 0:
252 self.confidences.append(value)
253 elif len(self.confidences) == 1:
254 self.confidences[0] = value
255 else:
256 raise ValueError("multiple confidence values already exist; "
257 "use Phylogeny.confidences instead")
258
260 self.confidences = []
261
262 confidence = property(_get_confidence, _set_confidence, _del_confidence)
263
264
265 -class Clade(PhyloElement, BaseTree.Clade):
266 """Describes a branch of the current phylogenetic tree.
267
268 Used recursively, describes the topology of a phylogenetic tree.
269
270 Both ``color`` and ``width`` elements should be interpreted by client code
271 as applying to the whole clade, including all descendents, unless
272 overwritten in-sub clades. This module doesn't automatically assign these
273 attributes to sub-clades to achieve this cascade -- and neither should you.
274
275 :Parameters:
276 branch_length
277 parent branch length of this clade
278 id_source
279 link other elements to a clade (on the xml-level)
280 name : string
281 short label for this clade
282 confidences : list of Confidence objects
283 used to indicate the support for a clade/parent branch.
284 width : float
285 branch width for this clade (including branch from parent)
286 color : BranchColor
287 color used for graphical display of this clade
288 node_id
289 unique identifier for the root node of this clade
290 taxonomies : list
291 Taxonomy objects
292 sequences : list
293 Sequence objects
294 events : Events
295 describe such events as gene-duplications at the root node/parent
296 branch of this clade
297 binary_characters : BinaryCharacters
298 binary characters
299 distributions : list of Distribution objects
300 distribution(s) of this clade
301 date : Date
302 a date for the root node of this clade
303 references : list
304 Reference objects
305 properties : list
306 Property objects
307 clades : list Clade objects
308 Sub-clades
309 other : list of Other objects
310 non-phyloXML objects
311 """
312 - def __init__(self,
313
314 branch_length=None, id_source=None,
315
316 name=None, width=None, color=None, node_id=None, events=None,
317 binary_characters=None, date=None,
318
319 confidences=None, taxonomies=None, sequences=None,
320 distributions=None, references=None, properties=None, clades=None,
321 other=None,
322 ):
340
341 @classmethod
357
359 """Create a new phylogeny containing just this clade."""
360 phy = Phylogeny(root=self, date=self.date)
361 phy.__dict__.update(kwargs)
362 return phy
363
364
365
367 if len(self.confidences) == 0:
368 return None
369 if len(self.confidences) > 1:
370 raise AttributeError("more than 1 confidence value available; "
371 "use Clade.confidences")
372 return self.confidences[0]
373
375 if value is None:
376
377 self.confidences = []
378 return
379 if isinstance(value, float) or isinstance(value, int):
380 value = Confidence(value)
381 elif not isinstance(value, Confidence):
382 raise ValueError("value must be a number or Confidence instance")
383 if len(self.confidences) == 0:
384 self.confidences.append(value)
385 elif len(self.confidences) == 1:
386 self.confidences[0] = value
387 else:
388 raise ValueError("multiple confidence values already exist; "
389 "use Phylogeny.confidences instead")
390
392 self.confidences = []
393
394 confidence = property(_get_confidence, _set_confidence, _del_confidence)
395
397 if len(self.taxonomies) == 0:
398 return None
399 if len(self.taxonomies) > 1:
400 raise AttributeError("more than 1 taxonomy value available; "
401 "use Clade.taxonomies")
402 return self.taxonomies[0]
403
405 if not isinstance(value, Taxonomy):
406 raise ValueError("assigned value must be a Taxonomy instance")
407 if len(self.taxonomies) == 0:
408 self.taxonomies.append(value)
409 elif len(self.taxonomies) == 1:
410 self.taxonomies[0] = value
411 else:
412 raise ValueError("multiple taxonomy values already exist; "
413 "use Phylogeny.taxonomies instead")
414
415 taxonomy = property(_get_taxonomy, _set_taxonomy)
416
417
418
419
420 -class BranchColor(PhyloElement, BaseTree.BranchColor):
423
428 """Captures the local part in a sequence identifier.
429
430 Example: In ``UniProtKB:P17304``, the Accession instance attribute ``value``
431 is 'P17304' and the ``source`` attribute is 'UniProtKB'.
432 """
436
438 """Show the class name and an identifying attribute."""
439 return '%s:%s' % (self.source, self.value)
440
443 """The annotation of a molecular sequence.
444
445 It is recommended to annotate by using the optional 'ref' attribute.
446
447 :Parameters:
448 ref : string
449 reference string, e.g. 'GO:0008270',
450 'KEGG:Tetrachloroethene degradation', 'EC:1.1.1.1'
451 source : string
452 plain-text source for this annotation
453 evidence : str
454 describe evidence as free text (e.g. 'experimental')
455 desc : string
456 free text description
457 confidence : Confidence
458 state the type and value of support (type Confidence)
459 properties : list
460 typed and referenced annotations from external resources
461 uri : Uri
462 link
463 """
464 re_ref = re.compile(r'[a-zA-Z0-9_]+:[a-zA-Z0-9_\.\-\s]+')
465
466 - def __init__(self,
467
468 ref=None, source=None, evidence=None, type=None,
469
470 desc=None, confidence=None, uri=None,
471
472 properties=None):
482
485 """The names and/or counts of binary characters present, gained, and lost
486 at the root of a clade.
487 """
488 - def __init__(self,
489
490 type=None, gained_count=None, lost_count=None, present_count=None,
491 absent_count=None,
492
493 gained=None, lost=None, present=None, absent=None):
494 self.type=type
495 self.gained_count=gained_count
496 self.lost_count=lost_count
497 self.present_count=present_count
498 self.absent_count=absent_count
499 self.gained=gained or []
500 self.lost=lost or []
501 self.present=present or []
502 self.absent=absent or []
503
507 """Expresses a typed relationship between two clades.
508
509 For example, this could be used to describe multiple parents of a clade.
510
511 @type id_ref_0: str
512 @type id_ref_1: str
513 @type distance: str
514 @type type: str
515
516 @type confidence: Confidence
517 """
518 - def __init__(self, type, id_ref_0, id_ref_1,
519 distance=None, confidence=None):
525
528 """A general purpose confidence element.
529
530 For example, this can be used to express the bootstrap support value of a
531 clade (in which case the `type` attribute is 'bootstrap').
532
533 :Parameters:
534 value : float
535 confidence value
536 type : string
537 label for the type of confidence, e.g. 'bootstrap'
538 """
539 - def __init__(self, value, type='unknown'):
542
543
544
546 """Return the hash value of the object.
547
548 Hash values are integers. They are used to quickly compare dictionary
549 keys during a dictionary lookup. Numeric values that compare equal have
550 the same hash value (even if they are of different types, as is the
551 case for 1 and 1.0).
552 """
553 return id(self)
554
559
564
565
566
571
574
576 return not (self <= other)
577
580
581
582
585
588
591
594
597
600
603
606
608 """Rational-style division in Py3.0+.
609
610 Also active in Py2.5+ with __future__.division import.
611 """
612 return self.value / other
613
616
618 """C-style and old-style division in Py3.0+.
619
620 Also active in Py2.5+ with __future__.division import.
621 """
622 return self.value.__floordiv__(other)
623
626
629
632
635
638
639 - def __pow__(self, other, modulo=None):
640 if modulo is not None:
641 return pow(self.value, other, modulo)
642 return pow(self.value, other)
643
646
647
648
651
654
657
658
659
661 return float(self.value)
662
664 return int(self.value)
665
668
669
670 -class Date(PhyloElement):
671 """A date associated with a clade/node.
672
673 Its value can be numerical by using the 'value' element and/or free text
674 with the 'desc' element' (e.g. 'Silurian'). If a numerical value is used, it
675 is recommended to employ the 'unit' attribute.
676
677 :Parameters:
678 unit : string
679 type of numerical value (e.g. 'mya' for 'million years ago')
680 value : float
681 the date value
682 desc : string
683 plain-text description of the date
684 minimum : float
685 lower bound on the date value
686 maximum : float
687 upper bound on the date value
688 """
689 - def __init__(self, value=None, unit=None, desc=None,
690 minimum=None, maximum=None):
696
698 """Show the class name and the human-readable date."""
699 if self.unit and self.value is not None:
700 return '%s %s' % (self.value, self.unit)
701 if self.desc is not None:
702 return self.desc
703 return self.__class__.__name__
704
707 """Geographic distribution of the items of a clade (species, sequences).
708
709 Intended for phylogeographic applications.
710
711 :Parameters:
712 desc : string
713 free-text description of the location
714 points : list of `Point` objects
715 coordinates (similar to the 'Point' element in Google's KML format)
716 polygons : list of `Polygon` objects
717 coordinate sets defining geographic regions
718 """
719 - def __init__(self, desc=None, points=None, polygons=None):
720 self.desc = desc
721 self.points = points or []
722 self.polygons = polygons or []
723
724
725 -class DomainArchitecture(PhyloElement):
726 """Domain architecture of a protein.
727
728 :Parameters:
729 length : int
730 total length of the protein sequence
731 domains : list ProteinDomain objects
732 the domains within this protein
733 """
734 - def __init__(self, length=None, domains=None):
735 self.length = length
736 self.domains = domains
737
738
739 -class Events(PhyloElement):
740 """Events at the root node of a clade (e.g. one gene duplication).
741
742 All attributes are set to None by default, but this object can also be
743 treated as a dictionary, in which case None values are treated as missing
744 keys and deleting a key resets that attribute's value back to None.
745 """
746 ok_type = set(('transfer', 'fusion', 'speciation_or_duplication', 'other',
747 'mixed', 'unassigned'))
748
749 - def __init__(self, type=None, duplications=None, speciations=None,
750 losses=None, confidence=None):
757
759 return [(k, v) for k, v in self.__dict__.iteritems() if v is not None]
760
762 return [k for k, v in self.__dict__.iteritems() if v is not None]
763
765 return [v for v in self.__dict__.itervalues() if v is not None]
766
769
771 if not hasattr(self, key):
772 raise KeyError(key)
773 val = getattr(self, key)
774 if val is None:
775 raise KeyError("%s has not been set in this object" % repr(key))
776 return val
777
779 setattr(self, key, val)
780
782 setattr(self, key, None)
783
785 return iter(self.keys())
786
788 return (hasattr(self, key) and getattr(self, key) is not None)
789
790
791 -class Id(PhyloElement):
792 """A general-purpose identifier element.
793
794 Allows to indicate the provider (or authority) of an identifier, e.g. NCBI,
795 along with the value itself.
796 """
797 - def __init__(self, value, provider=None):
798 self.value = value
799 self.provider = provider
800
802 if self.provider is not None:
803 return '%s:%s' % (self.provider, self.value)
804 return self.value
805
806
807 -class MolSeq(PhyloElement):
808 """Store a molecular sequence.
809
810 :Parameters:
811 value : string
812 the sequence itself
813 is_aligned : bool
814 True if this sequence is aligned with the others (usually meaning
815 all aligned seqs are the same length and gaps may be present)
816 """
817 re_value = re.compile(r'[a-zA-Z\.\-\?\*_]+')
818
819 - def __init__(self, value, is_aligned=None):
823
826
827
828 -class Point(PhyloElement):
829 """Geographic coordinates of a point, with an optional altitude.
830
831 Used by element 'Distribution'.
832
833 :Parameters:
834 geodetic_datum : string, required
835 the geodetic datum (also called 'map datum'). For example, Google's
836 KML uses 'WGS84'.
837 lat : numeric
838 latitude
839 long : numeric
840 longitude
841 alt : numeric
842 altitude
843 alt_unit : string
844 unit for the altitude (e.g. 'meter')
845 """
846 - def __init__(self, geodetic_datum, lat, long, alt=None, alt_unit=None):
847 self.geodetic_datum = geodetic_datum
848 self.lat = lat
849 self.long = long
850 self.alt = alt
851 self.alt_unit = alt_unit
852
855 """A polygon defined by a list of 'Points' (used by element 'Distribution').
856
857 :param points: list of 3 or more points representing vertices.
858 """
860 self.points = points or []
861
863 return '%s([%s])' % (self.__class__.__name__,
864 ',\n'.join(map(str, self.points)))
865
868 """A typed and referenced property from an external resources.
869
870 Can be attached to `Phylogeny`, `Clade`, and `Annotation` objects.
871
872 :Parameters:
873 value : string
874 the value of the property
875 ref : string
876 reference to an external resource, e.g. "NOAA:depth"
877 applies_to : string
878 indicates the item to which a property applies to (e.g. 'node' for
879 the parent node of a clade, 'parent_branch' for the parent branch of
880 a clade, or just 'clade').
881 datatype : string
882 the type of a property; limited to xsd-datatypes
883 (e.g. 'xsd:string', 'xsd:boolean', 'xsd:integer', 'xsd:decimal',
884 'xsd:float', 'xsd:double', 'xsd:date', 'xsd:anyURI').
885 unit : string (optional)
886 the unit of the property, e.g. "METRIC:m"
887 id_ref : Id (optional)
888 allows to attached a property specifically to one element (on the
889 xml-level)
890 """
891 re_ref = re.compile(r'[a-zA-Z0-9_]+:[a-zA-Z0-9_\.\-\s]+')
892 ok_applies_to = set(('phylogeny', 'clade', 'node', 'annotation',
893 'parent_branch', 'other'))
894 ok_datatype = set(('xsd:string', 'xsd:boolean', 'xsd:decimal', 'xsd:float',
895 'xsd:double', 'xsd:duration', 'xsd:dateTime', 'xsd:time', 'xsd:date',
896 'xsd:gYearMonth', 'xsd:gYear', 'xsd:gMonthDay', 'xsd:gDay',
897 'xsd:gMonth', 'xsd:hexBinary', 'xsd:base64Binary', 'xsd:anyURI',
898 'xsd:normalizedString', 'xsd:token', 'xsd:integer',
899 'xsd:nonPositiveInteger', 'xsd:negativeInteger', 'xsd:long', 'xsd:int',
900 'xsd:short', 'xsd:byte', 'xsd:nonNegativeInteger', 'xsd:unsignedLong',
901 'xsd:unsignedInt', 'xsd:unsignedShort', 'xsd:unsignedByte',
902 'xsd:positiveInteger'))
903
904 - def __init__(self, value, ref, applies_to, datatype,
905 unit=None, id_ref=None):
916
917
918 -class ProteinDomain(PhyloElement):
919 """Represents an individual domain in a domain architecture.
920
921 The locations use 0-based indexing, as most Python objects including
922 SeqFeature do, rather than the usual biological convention starting at 1.
923 This means the start and end attributes can be used directly as slice
924 indexes on Seq objects.
925
926 :Parameters:
927 start : non-negative integer
928 start of the domain on the sequence, using 0-based indexing
929 end : non-negative integer
930 end of the domain on the sequence
931 confidence : float
932 can be used to store e.g. E-values
933 id : string
934 unique identifier/name
935 """
936
937 - def __init__(self, value, start, end, confidence=None, id=None):
938 self.value = value
939 self.start = start
940 self.end = end
941 self.confidence = confidence
942 self.id = id
943
944 @classmethod
945 - def from_seqfeature(cls, feat):
946 return ProteinDomain(feat.id,
947 feat.location.nofuzzy_start,
948 feat.location.nofuzzy_end,
949 confidence=feat.qualifiers.get('confidence'))
950
951 - def to_seqfeature(self):
952 feat = SeqFeature(location=FeatureLocation(self.start, self.end),
953 id=self.value)
954 if hasattr(self, 'confidence'):
955 feat.qualifiers['confidence'] = self.confidence
956 return feat
957
960 """Literature reference for a clade.
961
962 NB: Whenever possible, use the ``doi`` attribute instead of the free-text
963 ``desc`` element.
964 """
965 re_doi = re.compile(r'[a-zA-Z0-9_\.]+/[a-zA-Z0-9_\.]+')
966
967 - def __init__(self, doi=None, desc=None):
971
974 """A molecular sequence (Protein, DNA, RNA) associated with a node.
975
976 One intended use for ``id_ref`` is to link a sequence to a taxonomy (via the
977 taxonomy's ``id_source``) in case of multiple sequences and taxonomies per
978 node.
979
980 :Parameters:
981 type : {'dna', 'rna', 'protein'}
982 type of molecule this sequence represents
983 id_ref : string
984 reference to another resource
985 id_source : string
986 source for the reference
987 symbol : string
988 short symbol of the sequence, e.g. 'ACTM' (max. 10 chars)
989 accession : Accession
990 accession code for this sequence.
991 name : string
992 full name of the sequence, e.g. 'muscle Actin'
993 location
994 location of a sequence on a genome/chromosome.
995 mol_seq : MolSeq
996 the molecular sequence itself
997 uri : Uri
998 link
999 annotations : list of Annotation objects
1000 annotations on this sequence
1001 domain_architecture : DomainArchitecture
1002 protein domains on this sequence
1003 other : list of Other objects
1004 non-phyloXML elements
1005 """
1006 alphabets = {'dna': Alphabet.generic_dna,
1007 'rna': Alphabet.generic_rna,
1008 'protein': Alphabet.generic_protein}
1009 re_symbol = re.compile(r'\S{1,10}')
1010
1011 - def __init__(self,
1012
1013 type=None, id_ref=None, id_source=None,
1014
1015 symbol=None, accession=None, name=None, location=None,
1016 mol_seq=None, uri=None, domain_architecture=None,
1017
1018 annotations=None, other=None,
1019 ):
1034
1035 @classmethod
1037 """Create a new PhyloXML Sequence from a SeqRecord object."""
1038 if is_aligned == None:
1039 is_aligned = isinstance(record.seq.alphabet, Alphabet.Gapped)
1040 params = {
1041 'accession': Accession(record.id, ''),
1042 'symbol': record.name,
1043 'name': record.description,
1044 'mol_seq': MolSeq(str(record.seq), is_aligned),
1045 }
1046 if isinstance(record.seq.alphabet, Alphabet.DNAAlphabet):
1047 params['type'] = 'dna'
1048 elif isinstance(record.seq.alphabet, Alphabet.RNAAlphabet):
1049 params['type'] = 'rna'
1050 elif isinstance(record.seq.alphabet, Alphabet.ProteinAlphabet):
1051 params['type'] = 'protein'
1052
1053
1054 for key in ('id_ref', 'id_source', 'location'):
1055 if key in record.annotations:
1056 params[key] = record.annotations[key]
1057 if isinstance(record.annotations.get('uri'), dict):
1058 params['uri'] = Uri(**record.annotations['uri'])
1059
1060 if record.annotations.get('annotations'):
1061 params['annotations'] = []
1062 for annot in record.annotations['annotations']:
1063 ann_args = {}
1064 for key in ('ref', 'source', 'evidence', 'type', 'desc'):
1065 if key in annot:
1066 ann_args[key] = annot[key]
1067 if isinstance(annot.get('confidence'), list):
1068 ann_args['confidence'] = Confidence(
1069 *annot['confidence'])
1070 if isinstance(annot.get('properties'), list):
1071 ann_args['properties'] = [Property(**prop)
1072 for prop in annot['properties']
1073 if isinstance(prop, dict)]
1074 params['annotations'].append(Annotation(**ann_args))
1075
1076
1077 if record.features:
1078 params['domain_architecture'] = DomainArchitecture(
1079 length=len(record.seq),
1080 domains=[ProteinDomain.from_seqfeature(feat)
1081 for feat in record.features])
1082
1083 return Sequence(**params)
1084
1086 """Create a SeqRecord object from this Sequence instance.
1087
1088 The seqrecord.annotations dictionary is packed like so::
1089
1090 { # Sequence attributes with no SeqRecord equivalent:
1091 'id_ref': self.id_ref,
1092 'id_source': self.id_source,
1093 'location': self.location,
1094 'uri': { 'value': self.uri.value,
1095 'desc': self.uri.desc,
1096 'type': self.uri.type },
1097 # Sequence.annotations attribute (list of Annotations)
1098 'annotations': [{ 'ref': ann.ref,
1099 'source': ann.source,
1100 'evidence': ann.evidence,
1101 'type': ann.type,
1102 'confidence': [ ann.confidence.value,
1103 ann.confidence.type ],
1104 'properties': [{ 'value': prop.value,
1105 'ref': prop.ref,
1106 'applies_to': prop.applies_to,
1107 'datatype': prop.datatype,
1108 'unit': prop.unit,
1109 'id_ref': prop.id_ref }
1110 for prop in ann.properties],
1111 } for ann in self.annotations],
1112 }
1113 """
1114 def clean_dict(dct):
1115 """Remove None-valued items from a dictionary."""
1116 return dict((key, val) for key, val in dct.iteritems()
1117 if val is not None)
1118
1119 seqrec = SeqRecord(Seq(self.mol_seq.value, self.get_alphabet()),
1120 **clean_dict({
1121 'id': str(self.accession),
1122 'name': self.symbol,
1123 'description': self.name,
1124
1125 }))
1126 if self.domain_architecture:
1127 seqrec.features = [dom.to_seqfeature()
1128 for dom in self.domain_architecture.domains]
1129
1130 seqrec.annotations = clean_dict({
1131 'id_ref': self.id_ref,
1132 'id_source': self.id_source,
1133 'location': self.location,
1134 'uri': self.uri and clean_dict({
1135 'value': self.uri.value,
1136 'desc': self.uri.desc,
1137 'type': self.uri.type,
1138 }),
1139 'annotations': self.annotations and [
1140 clean_dict({
1141 'ref': ann.ref,
1142 'source': ann.source,
1143 'evidence': ann.evidence,
1144 'type': ann.type,
1145 'confidence': ann.confidence and [
1146 ann.confidence.value,
1147 ann.confidence.type],
1148 'properties': [clean_dict({
1149 'value': prop.value,
1150 'ref': prop.ref,
1151 'applies_to': prop.applies_to,
1152 'datatype': prop.datatype,
1153 'unit': prop.unit,
1154 'id_ref': prop.id_ref })
1155 for prop in ann.properties],
1156 }) for ann in self.annotations],
1157 })
1158 return seqrec
1159
1165
1168 """Express a typed relationship between two sequences.
1169
1170 For example, this could be used to describe an orthology (in which case
1171 attribute 'type' is 'orthology').
1172
1173 :Parameters:
1174 id_ref_0 : Id
1175 first sequence reference identifier
1176 id_ref_1 : Id
1177 second sequence reference identifier
1178 distance : float
1179 distance between the two sequences
1180 type : restricted string
1181 describe the type of relationship
1182 confidence : Confidence
1183 confidence value for this relation
1184 """
1185 ok_type = set(('orthology', 'one_to_one_orthology', 'super_orthology',
1186 'paralogy', 'ultra_paralogy', 'xenology', 'unknown', 'other'))
1187
1188 - def __init__(self, type, id_ref_0, id_ref_1,
1189 distance=None, confidence=None):
1196
1199 """Describe taxonomic information for a clade.
1200
1201 :Parameters:
1202 id_source : Id
1203 link other elements to a taxonomy (on the XML level)
1204 id : Id
1205 unique identifier of a taxon, e.g. Id('6500',
1206 provider='ncbi_taxonomy') for the California sea hare
1207 code : restricted string
1208 store UniProt/Swiss-Prot style organism codes, e.g. 'APLCA' for the
1209 California sea hare 'Aplysia californica'
1210 scientific_name : string
1211 the standard scientific name for this organism, e.g. 'Aplysia
1212 californica' for the California sea hare
1213 authority : string
1214 keep the authority, such as 'J. G. Cooper, 1863', associated with
1215 the 'scientific_name'
1216 common_names : list of strings
1217 common names for this organism
1218 synonyms : list of strings
1219 synonyms for this taxon?
1220 rank : restricted string
1221 taxonomic rank
1222 uri : Uri
1223 link
1224 other : list of Other objects
1225 non-phyloXML elements
1226 """
1227 re_code = re.compile(r'[a-zA-Z0-9_]{2,10}')
1228 ok_rank = set(('domain', 'kingdom', 'subkingdom', 'branch', 'infrakingdom',
1229 'superphylum', 'phylum', 'subphylum', 'infraphylum', 'microphylum',
1230 'superdivision', 'division', 'subdivision', 'infradivision',
1231 'superclass', 'class', 'subclass', 'infraclass', 'superlegion',
1232 'legion', 'sublegion', 'infralegion', 'supercohort', 'cohort',
1233 'subcohort', 'infracohort', 'superorder', 'order', 'suborder',
1234 'superfamily', 'family', 'subfamily', 'supertribe', 'tribe', 'subtribe',
1235 'infratribe', 'genus', 'subgenus', 'superspecies', 'species',
1236 'subspecies', 'variety', 'subvariety', 'form', 'subform', 'cultivar',
1237 'unknown', 'other'))
1238
1239 - def __init__(self,
1240
1241 id_source=None,
1242
1243 id=None, code=None, scientific_name=None, authority=None,
1244 rank=None, uri=None,
1245
1246 common_names=None, synonyms=None, other=None,
1247 ):
1260
1262 """Show the class name and an identifying attribute."""
1263 if self.code is not None:
1264 return self.code
1265 if self.scientific_name is not None:
1266 return self.scientific_name
1267 if self.rank is not None:
1268 return self.rank
1269 if self.id is not None:
1270 return str(self.id)
1271 return self.__class__.__name__
1272
1273
1274 -class Uri(PhyloElement):
1275 """A uniform resource identifier.
1276
1277 In general, this is expected to be an URL (for example, to link to an image
1278 on a website, in which case the ``type`` attribute might be 'image' and
1279 ``desc`` might be 'image of a California sea hare').
1280 """
1281 - def __init__(self, value, desc=None, type=None):
1285
1287 if self.value:
1288 return self.value
1289 return repr(self)
1290