Package Bio :: Package Restriction :: Package _Update :: Module RestrictionCompiler
[hide private]
[frames] | no frames]

Source Code for Module Bio.Restriction._Update.RestrictionCompiler

   1  #!/usr/bin/env python 
   2  # 
   3  #      Restriction Analysis Libraries. 
   4  #      Copyright (C) 2004. Frederic Sohm. 
   5  # 
   6  # This code is part of the Biopython distribution and governed by its 
   7  # license.  Please see the LICENSE file that should have been included 
   8  # as part of this package. 
   9  # 
  10  #   this script is used to produce the dictionary which will contains the data 
  11  #   about the restriction enzymes from the Emboss/Rebase data files 
  12  #   namely 
  13  #   emboss_e.### (description of the sites), 
  14  #   emboss_r.### (origin, methylation, references) 
  15  #   emboss_s.### (suppliers) 
  16  #   where ### is a number of three digits : 1 for the year two for the month 
  17  # 
  18  #   very dirty implementation but it does the job, so... 
  19  #   Not very quick either but you are not supposed to use it frequently. 
  20  # 
  21  #   The results are stored in 
  22  #   path/to/site-packages/Bio/Restriction/Restriction_Dictionary.py 
  23  #   the file contains two dictionary: 
  24  #   'rest_dict' which contains the data for the enzymes 
  25  #   and 
  26  #   'suppliers' which map the name of the suppliers to their abbreviation. 
  27  # 
  28   
  29  """Convert a serie of Rebase files into a Restriction_Dictionary.py module. 
  30   
  31  The Rebase files are in the emboss format: 
  32   
  33      emboss_e.###    -> contains information about the restriction sites. 
  34      emboss_r.###    -> contains general information about the enzymes. 
  35      emboss_s.###    -> contains information about the suppliers. 
  36   
  37  ### is a 3 digit number. The first digit is the year and the two last the month. 
  38  """ 
  39   
  40  import os 
  41  import itertools 
  42  import time 
  43  import sys 
  44  import shutil 
  45   
  46  from Bio.Seq import Seq 
  47   
  48  import Bio.Restriction.Restriction 
  49  from Bio.Restriction.Restriction import AbstractCut, RestrictionType, NoCut, OneCut 
  50  from Bio.Restriction.Restriction import TwoCuts, Meth_Dep, Meth_Undep, Palindromic 
  51  from Bio.Restriction.Restriction import NonPalindromic, Unknown, Blunt, Ov5, Ov3 
  52  from Bio.Restriction.Restriction import NotDefined, Defined, Ambiguous 
  53  from Bio.Restriction.Restriction import Commercially_available, Not_available 
  54   
  55  import Bio.Restriction.RanaConfig as config 
  56  from Bio.Restriction._Update.Update import RebaseUpdate 
  57  from Bio.Restriction.Restriction import * 
  58  from Bio.Restriction.DNAUtils import antiparallel 
  59   
  60  DNA=Seq 
  61  dna_alphabet = {'A':'A', 'C':'C', 'G':'G', 'T':'T', 
  62                  'R':'AG', 'Y':'CT', 'W':'AT', 'S':'CG', 'M':'AC', 'K':'GT', 
  63                  'H':'ACT', 'B':'CGT', 'V':'ACG', 'D':'AGT', 
  64                  'N':'ACGT', 
  65                  'a': 'a', 'c': 'c', 'g': 'g', 't': 't', 
  66                  'r':'ag', 'y':'ct', 'w':'at', 's':'cg', 'm':'ac', 'k':'gt', 
  67                  'h':'act', 'b':'cgt', 'v':'acg', 'd':'agt', 
  68                  'n':'acgt'} 
  69   
  70   
  71  complement_alphabet = {'A':'T', 'T':'A', 'C':'G', 'G':'C','R':'Y', 'Y':'R', 
  72                         'W':'W', 'S':'S', 'M':'K', 'K':'M', 'H':'D', 'D':'H', 
  73                         'B':'V', 'V':'B', 'N':'N','a':'t', 'c':'g', 'g':'c', 
  74                         't':'a', 'r':'y', 'y':'r', 'w':'w', 's':'s','m':'k', 
  75                         'k':'m', 'h':'d', 'd':'h', 'b':'v', 'v':'b', 'n':'n'} 
  76  enzymedict = {} 
  77  suppliersdict = {} 
  78  classdict = {} 
  79  typedict = {} 
  80   
  81   
82 -class OverhangError(ValueError):
83 """Exception for dealing with overhang.""" 84 pass
85 86
87 -def BaseExpand(base):
88 """BaseExpand(base) -> string. 89 90 given a degenerated base, returns its meaning in IUPAC alphabet. 91 92 i.e: 93 b= 'A' -> 'A' 94 b= 'N' -> 'ACGT' 95 etc...""" 96 base = base.upper() 97 return dna_alphabet[base]
98 99
100 -def regex(site):
101 """regex(site) -> string. 102 103 Construct a regular expression from a DNA sequence. 104 i.e.: 105 site = 'ABCGN' -> 'A[CGT]CG.'""" 106 reg_ex = site 107 for base in reg_ex: 108 if base in ('A', 'T', 'C', 'G', 'a', 'c', 'g', 't'): 109 pass 110 if base in ('N', 'n'): 111 reg_ex = '.'.join(reg_ex.split('N')) 112 reg_ex = '.'.join(reg_ex.split('n')) 113 if base in ('R', 'Y', 'W', 'M', 'S', 'K', 'H', 'D', 'B', 'V'): 114 expand = '['+ str(BaseExpand(base))+']' 115 reg_ex = expand.join(reg_ex.split(base)) 116 return reg_ex
117 118
119 -def Antiparallel(sequence):
120 """Antiparallel(sequence) -> string. 121 122 returns a string which represents the reverse complementary strand of 123 a DNA sequence.""" 124 return antiparallel(str(sequence))
125 126
127 -def is_palindrom(sequence):
128 """is_palindrom(sequence) -> bool. 129 130 True is the sequence is a palindrom. 131 sequence is a DNA object.""" 132 return sequence == DNA(Antiparallel(sequence))
133 134
135 -def LocalTime():
136 """LocalTime() -> string. 137 138 LocalTime calculate the extension for emboss file for the current year and 139 month.""" 140 t = time.gmtime() 141 year = str(t.tm_year)[-1] 142 month = str(t.tm_mon) 143 if len(month) == 1: 144 month = '0' + month 145 return year+month
146 147
148 -class newenzyme(object):
149 """construct the attributes of the enzyme corresponding to 'name'."""
150 - def __init__(cls, name):
151 cls.opt_temp = 37 152 cls.inact_temp = 65 153 cls.substrat = 'DNA' 154 target = enzymedict[name] 155 cls.site = target[0] 156 cls.size = target[1] 157 cls.suppl = tuple(target[9]) 158 cls.freq = target[11] 159 cls.ovhg = target[13] 160 cls.