Package Bio :: Package PDB :: Module StructureAlignment'
[hide private]
[frames] | no frames]

Source Code for Module Bio.PDB.StructureAlignment'

  1  # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Map the residues of two structures to each other based on a FASTA alignment 
  7  file. 
  8  """ 
  9   
 10  from __future__ import print_function 
 11   
 12  from Bio.Data import SCOPData 
 13   
 14  from Bio.PDB import Selection 
 15  from Bio.PDB.Polypeptide import is_aa 
 16   
 17  __docformat__ = "restructuredtext en" 
 18   
19 -class StructureAlignment(object):
20 """ 21 This class aligns two structures based on an alignment of their 22 sequences. 23 """
24 - def __init__(self, fasta_align, m1, m2, si=0, sj=1):
25 """ 26 Attributes: 27 28 - fasta_align --- Alignment object 29 - m1, m2 --- two models 30 - si, sj --- the sequences in the Alignment object that 31 correspond to the structures 32 """ 33 l=fasta_align.get_alignment_length() 34 # Get the residues in the models 35 rl1=Selection.unfold_entities(m1, 'R') 36 rl2=Selection.unfold_entities(m2, 'R') 37 # Residue positions 38 p1=0 39 p2=0 40 # Map equivalent residues to each other 41 map12={} 42 map21={} 43 # List of residue pairs (None if -) 44 duos=[] 45 for i in range(0, l): 46 column=fasta_align.get_column(i) 47 aa1=column[si] 48 aa2=column[sj] 49 if aa1!="-": 50 # Position in seq1 is not - 51 while True: 52 # Loop until an aa is found 53 r1=rl1[p1] 54 p1=p1+1 55 if is_aa(r1): 56 break 57 self._test_equivalence(r1, aa1) 58 else: 59 r1=None 60 if aa2!="-": 61 # Position in seq2 is not - 62 while True: 63 # Loop until an aa is found 64 r2=rl2[p2] 65 p2=p2+1 66 if is_aa(r2): 67 break 68 self._test_equivalence(r2, aa2) 69 else: 70 r2=None 71 if r1: 72 # Map residue in seq1 to its equivalent in seq2 73 map12[r1]=r2 74 if r2: 75 # Map residue in seq2 to its equivalent in seq1 76 map21[r2]=r1 77 # Append aligned pair (r is None if gap) 78 duos.append((r1, r2)) 79 self.map12=map12 80 self.map21=map21 81 self.duos=duos
82
83 - def _test_equivalence(self, r1, aa1):
84 "Test if aa in sequence fits aa in structure." 85 resname=r1.get_resname() 86 resname=SCOPData.protein_letters_3to1[resname] 87 assert(aa1==resname)
88
89 - def get_maps(self):
90 """ 91 Return two dictionaries that map a residue in one structure to 92 the equivealent residue in the other structure. 93 """ 94 return self.map12, self.map21
95
96 - def get_iterator(self):
97 """ 98 Iterator over all residue pairs. 99 """ 100 for i in range(0, len(self.duos)): 101 yield self.duos[i]
102 103 104 if __name__=="__main__": 105 import sys 106 from Bio.Alphabet import generic_protein 107 from Bio import AlignIO 108 from Bio.PDB import PDBParser 109 110 if len(sys.argv) != 4: 111 print("Expects three arguments,") 112 print(" - FASTA alignment filename (expect two sequences)") 113 print(" - PDB file one") 114 print(" - PDB file two") 115 sys.exit() 116 117 # The alignment 118 fa=AlignIO.read(open(sys.argv[1]), "fasta", generic_protein) 119 120 pdb_file1=sys.argv[2] 121 pdb_file2=sys.argv[3] 122 123 # The structures 124 p=PDBParser() 125 s1=p.get_structure('1', pdb_file1) 126 p=PDBParser() 127 s2=p.get_structure('2', pdb_file2) 128 129 # Get the models 130 m1=s1[0] 131 m2=s2[0] 132 133 al=StructureAlignment(fa, m1, m2) 134 135 # Print aligned pairs (r is None if gap) 136 for (r1, r2) in al.get_iterator(): 137 print("%s %s" % (r1, r2)) 138