Package Bio :: Package PDB :: Module StructureAlignment'
[hide private]
[frames] | no frames]

Source Code for Module Bio.PDB.StructureAlignment'

  1  # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Map the residues of two structures to each other based on a FASTA alignment 
  7  file. 
  8  """ 
  9   
 10  from __future__ import print_function 
 11   
 12  from Bio.Data import SCOPData 
 13   
 14  from Bio.PDB import Selection 
 15  from Bio.PDB.Polypeptide import is_aa 
 16   
 17   
18 -class StructureAlignment(object):
19 """ 20 This class aligns two structures based on an alignment of their 21 sequences. 22 """
23 - def __init__(self, fasta_align, m1, m2, si=0, sj=1):
24 """ 25 fasta_align --- Alignment object 26 m1, m2 --- two models 27 si, sj --- the sequences in the Alignment object that 28 correspond to the structures 29 """ 30 l=fasta_align.get_alignment_length() 31 # Get the residues in the models 32 rl1=Selection.unfold_entities(m1, 'R') 33 rl2=Selection.unfold_entities(m2, 'R') 34 # Residue positions 35 p1=0 36 p2=0 37 # Map equivalent residues to each other 38 map12={} 39 map21={} 40 # List of residue pairs (None if -) 41 duos=[] 42 for i in range(0, l): 43 column=fasta_align.get_column(i) 44 aa1=column[si] 45 aa2=column[sj] 46 if aa1!="-": 47 # Position in seq1 is not - 48 while True: 49 # Loop until an aa is found 50 r1=rl1[p1] 51 p1=p1+1 52 if is_aa(r1): 53 break 54 self._test_equivalence(r1, aa1) 55 else: 56 r1=None 57 if aa2!="-": 58 # Position in seq2 is not - 59 while True: 60 # Loop until an aa is found 61 r2=rl2[p2] 62 p2=p2+1 63 if is_aa(r2): 64 break 65 self._test_equivalence(r2, aa2) 66 else: 67 r2=None 68 if r1: 69 # Map residue in seq1 to its equivalent in seq2 70 map12[r1]=r2 71 if r2: 72 # Map residue in seq2 to its equivalent in seq1 73 map21[r2]=r1 74 # Append aligned pair (r is None if gap) 75 duos.append((r1, r2)) 76 self.map12=map12 77 self.map21=map21 78 self.duos=duos
79
80 - def _test_equivalence(self, r1, aa1):
81 "Test if aa in sequence fits aa in structure." 82 resname=r1.get_resname() 83 resname=SCOPData.protein_letters_3to1[resname] 84 assert(aa1==resname)
85
86 - def get_maps(self):
87 """ 88 Return two dictionaries that map a residue in one structure to 89 the equivealent residue in the other structure. 90 """ 91 return self.map12, self.map21
92
93 - def get_iterator(self):
94 """ 95 Iterator over all residue pairs. 96 """ 97 for i in range(0, len(self.duos)): 98 yield self.duos[i]
99 100 101 if __name__=="__main__": 102 import sys 103 from Bio.Alphabet import generic_protein 104 from Bio import AlignIO 105 from Bio.PDB import PDBParser 106 107 if len(sys.argv) != 4: 108 print("Expects three arguments,") 109 print(" - FASTA alignment filename (expect two sequences)") 110 print(" - PDB file one") 111 print(" - PDB file two") 112 sys.exit() 113 114 # The alignment 115 fa=AlignIO.read(open(sys.argv[1]), "fasta", generic_protein) 116 117 pdb_file1=sys.argv[2] 118 pdb_file2=sys.argv[3] 119 120 # The structures 121 p=PDBParser() 122 s1=p.get_structure('1', pdb_file1) 123 p=PDBParser() 124 s2=p.get_structure('2', pdb_file2) 125 126 # Get the models 127 m1=s1[0] 128 m2=s2[0] 129 130 al=StructureAlignment(fa, m1, m2) 131 132 # Print aligned pairs (r is None if gap) 133 for (r1, r2) in al.get_iterator(): 134 print("%s %s" % (r1, r2)) 135