1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29 """Convert a serie of Rebase files into a Restriction_Dictionary.py module.
30
31 The Rebase files are in the emboss format:
32
33 emboss_e.### -> contains information about the restriction sites.
34 emboss_r.### -> contains general information about the enzymes.
35 emboss_s.### -> contains information about the suppliers.
36
37 ### is a 3 digit number. The first digit is the year and the two last the month.
38 """
39
40 import os
41 import itertools
42 import time
43 import sys
44 import shutil
45
46 from Bio.Seq import Seq
47
48 import Bio.Restriction.Restriction
49 from Bio.Restriction.Restriction import AbstractCut, RestrictionType, NoCut, OneCut
50 from Bio.Restriction.Restriction import TwoCuts, Meth_Dep, Meth_Undep, Palindromic
51 from Bio.Restriction.Restriction import NonPalindromic, Unknown, Blunt, Ov5, Ov3
52 from Bio.Restriction.Restriction import NotDefined, Defined, Ambiguous
53 from Bio.Restriction.Restriction import Commercially_available, Not_available
54
55 import Bio.Restriction.RanaConfig as config
56 from Bio.Restriction._Update.Update import RebaseUpdate
57 from Bio.Restriction.Restriction import *
58 from Bio.Restriction.DNAUtils import antiparallel
59
60 DNA=Seq
61 dna_alphabet = {'A':'A', 'C':'C', 'G':'G', 'T':'T',
62 'R':'AG', 'Y':'CT', 'W':'AT', 'S':'CG', 'M':'AC', 'K':'GT',
63 'H':'ACT', 'B':'CGT', 'V':'ACG', 'D':'AGT',
64 'N':'ACGT',
65 'a': 'a', 'c': 'c', 'g': 'g', 't': 't',
66 'r':'ag', 'y':'ct', 'w':'at', 's':'cg', 'm':'ac', 'k':'gt',
67 'h':'act', 'b':'cgt', 'v':'acg', 'd':'agt',
68 'n':'acgt'}
69
70
71 complement_alphabet = {'A':'T', 'T':'A', 'C':'G', 'G':'C','R':'Y', 'Y':'R',
72 'W':'W', 'S':'S', 'M':'K', 'K':'M', 'H':'D', 'D':'H',
73 'B':'V', 'V':'B', 'N':'N','a':'t', 'c':'g', 'g':'c',
74 't':'a', 'r':'y', 'y':'r', 'w':'w', 's':'s','m':'k',
75 'k':'m', 'h':'d', 'd':'h', 'b':'v', 'v':'b', 'n':'n'}
76 enzymedict = {}
77 suppliersdict = {}
78 classdict = {}
79 typedict = {}
80
81
83 """Exception for dealing with overhang."""
84 pass
85
86
88 """BaseExpand(base) -> string.
89
90 given a degenerated base, returns its meaning in IUPAC alphabet.
91
92 i.e:
93 b= 'A' -> 'A'
94 b= 'N' -> 'ACGT'
95 etc..."""
96 base = base.upper()
97 return dna_alphabet[base]
98
99
101 """regex(site) -> string.
102
103 Construct a regular expression from a DNA sequence.
104 i.e.:
105 site = 'ABCGN' -> 'A[CGT]CG.'"""
106 reg_ex = site
107 for base in reg_ex:
108 if base in ('A', 'T', 'C', 'G', 'a', 'c', 'g', 't'):
109 pass
110 if base in ('N', 'n'):
111 reg_ex = '.'.join(reg_ex.split('N'))
112 reg_ex = '.'.join(reg_ex.split('n'))
113 if base in ('R', 'Y', 'W', 'M', 'S', 'K', 'H', 'D', 'B', 'V'):
114 expand = '['+ str(BaseExpand(base))+']'
115 reg_ex = expand.join(reg_ex.split(base))
116 return reg_ex
117
118
120 """Antiparallel(sequence) -> string.
121
122 returns a string which represents the reverse complementary strand of
123 a DNA sequence."""
124 return antiparallel(str(sequence))
125
126
128 """is_palindrom(sequence) -> bool.
129
130 True is the sequence is a palindrom.
131 sequence is a DNA object."""
132 return sequence == DNA(Antiparallel(sequence))
133
134
136 """LocalTime() -> string.
137
138 LocalTime calculate the extension for emboss file for the current year and
139 month."""
140 t = time.gmtime()
141 year = str(t.tm_year)[-1]
142 month = str(t.tm_mon)
143 if len(month) == 1:
144 month = '0' + month
145 return year+month
146
147
149 """construct the attributes of the enzyme corresponding to 'name'."""