Package Bio :: Package Alphabet :: Module IUPAC
[hide private]
[frames] | no frames]

Source Code for Module Bio.Alphabet.IUPAC

  1  # Copyright 2000-2001 by Andrew Dalke. 
  2  # Revisions copyright 2008 by Peter Cock. 
  3  # All rights reserved. 
  4  # This code is part of the Biopython distribution and governed by its 
  5  # license.  Please see the LICENSE file that should have been included 
  6  # as part of this package. 
  7   
  8  """Standard nucleotide and protein alphabets defined by IUPAC.""" 
  9   
 10  from Bio import Alphabet 
 11  from Bio.Data import IUPACData 
 12   
 13  __docformat__ = "restructuredtext en" 
 14   
 15   
 16  # #################### Protein 
 17   
 18  # From the IUPAC definition at: 
 19  #   http://www.chem.qmw.ac.uk/iupac/AminoAcid/A2021.html#AA21 
 20   
 21  assert IUPACData.extended_protein_letters == IUPACData.extended_protein_letters.upper() 
 22   
 23   
24 -class ExtendedIUPACProtein(Alphabet.ProteinAlphabet):
25 """Extended uppercase IUPAC protein single letter alphabet including X etc. 26 27 In addition to the standard 20 single letter protein codes, this includes: 28 29 - B = "Asx"; Aspartic acid (R) or Asparagine (N) 30 - X = "Xxx"; Unknown or 'other' amino acid 31 - Z = "Glx"; Glutamic acid (E) or Glutamine (Q) 32 - J = "Xle"; Leucine (L) or Isoleucine (I), used in mass-spec (NMR) 33 - U = "Sec"; Selenocysteine 34 - O = "Pyl"; Pyrrolysine 35 36 This alphabet is not intended to be used with X for Selenocysteine 37 (an ad-hoc standard prior to the IUPAC adoption of U instead). 38 """ 39 letters = IUPACData.extended_protein_letters
40 41 extended_protein = ExtendedIUPACProtein() 42 43 assert IUPACData.protein_letters == IUPACData.protein_letters.upper() 44 45
46 -class IUPACProtein(ExtendedIUPACProtein):
47 """Uppercase IUPAC protein single letter alphabet of the 20 standard amino acids.""" 48 letters = IUPACData.protein_letters
49 50 protein = IUPACProtein() 51 52 # #################### DNA 53 54 55 # The next two are the IUPAC definitions, from: 56 # http://www.chem.qmw.ac.uk/iubmb/misc/naseq.html
57 -class IUPACAmbiguousDNA(Alphabet.DNAAlphabet):
58 """Uppercase IUPAC ambiguous DNA.""" 59 letters = IUPACData.ambiguous_dna_letters
60 61 ambiguous_dna = IUPACAmbiguousDNA() 62 63
64 -class IUPACUnambiguousDNA(IUPACAmbiguousDNA):
65 """Uppercase IUPAC unambiguous DNA (letters GATC only).""" 66 letters = IUPACData.unambiguous_dna_letters
67 68 unambiguous_dna = IUPACUnambiguousDNA() 69 70 71 # Also from the URL, but not part of the standard
72 -class ExtendedIUPACDNA(Alphabet.DNAAlphabet):
73 """Extended IUPAC DNA alphabet. 74 75 In addition to the standard letter codes GATC, this includes: 76 77 - B = 5-bromouridine 78 - D = 5,6-dihydrouridine 79 - S = thiouridine 80 - W = wyosine 81 """ 82 letters = IUPACData.extended_dna_letters
83 84 extended_dna = ExtendedIUPACDNA() 85 86 # #################### RNA 87 88
89 -class IUPACAmbiguousRNA(Alphabet.RNAAlphabet):
90 """Uppercase IUPAC ambiguous RNA.""" 91 letters = IUPACData.ambiguous_rna_letters
92 93 ambiguous_rna = IUPACAmbiguousRNA() 94 95
96 -class IUPACUnambiguousRNA(IUPACAmbiguousRNA):
97 """Uppercase IUPAC unambiguous RNA (letters GAUC only).""" 98 letters = IUPACData.unambiguous_rna_letters
99 100 unambiguous_rna = IUPACUnambiguousRNA() 101 102 # are there extended forms? 103 # class ExtendedIUPACRNA(Alphabet.RNAAlphabet): 104 # letters = extended_rna_letters 105 # # B == 5-bromouridine 106 # # D == 5,6-dihydrouridine 107 # # S == thiouridine 108 # # W == wyosine 109