1
2
3
4
5
6
7
8
9
10
11 """ Notes about the diverses class of the restriction enzyme implementation.
12
13 RestrictionType is the type of all restriction enzymes.
14 ----------------------------------------------------------------------------
15 AbstractCut implements some methods that are common to all enzymes.
16 ----------------------------------------------------------------------------
17 NoCut, OneCut,TwoCuts represent the number of double strand cuts
18 produced by the enzyme.
19 they correspond to the 4th field of the rebase
20 record emboss_e.NNN.
21 0->NoCut : the enzyme is not characterised.
22 2->OneCut : the enzyme produce one double strand cut.
23 4->TwoCuts : two double strand cuts.
24 ----------------------------------------------------------------------------
25 Meth_Dep, Meth_Undep represent the methylation susceptibility to
26 the enzyme.
27 Not implemented yet.
28 ----------------------------------------------------------------------------
29 Palindromic, if the site is palindromic or not.
30 NotPalindromic allow some optimisations of the code.
31 No need to check the reverse strand
32 with palindromic sites.
33 ----------------------------------------------------------------------------
34 Unknown, Blunt, represent the overhang.
35 Ov5, Ov3 Unknown is here for symetry reasons and
36 correspond to enzymes that are not characterised
37 in rebase.
38 ----------------------------------------------------------------------------
39 Defined, Ambiguous, represent the sequence of the overhang.
40 NotDefined
41 NotDefined is for enzymes not characterised in
42 rebase.
43
44 Defined correspond to enzymes that display a
45 constant overhang whatever the sequence.
46 ex : EcoRI. G^AATTC -> overhang :AATT
47 CTTAA^G
48
49 Ambiguous : the overhang varies with the
50 sequence restricted.
51 Typically enzymes which cut outside their
52 restriction site or (but not always)
53 inside an ambiguous site.
54 ex:
55 AcuI CTGAAG(22/20) -> overhang : NN
56 AasI GACNNN^NNNGTC -> overhang : NN
57 CTGN^NNNNNCAG
58
59 note : these 3 classes refers to the overhang not the site.
60 So the enzyme ApoI (RAATTY) is defined even if its restriction
61 site is ambiguous.
62
63 ApoI R^AATTY -> overhang : AATT -> Defined
64 YTTAA^R
65 Accordingly, blunt enzymes are always Defined even
66 when they cut outside their restriction site.
67 ----------------------------------------------------------------------------
68 Not_available, as found in rebase file emboss_r.NNN files.
69 Commercially_available
70 allow the selection of the enzymes according to
71 their suppliers to reduce the quantity
72 of results.
73 Also will allow the implementation of buffer
74 compatibility tables. Not implemented yet.
75
76 the list of suppliers is extracted from
77 emboss_s.NNN
78 ----------------------------------------------------------------------------
79 """
80
81 import re
82 import itertools
83
84 from Bio.Seq import Seq, MutableSeq
85 from Bio.Alphabet import IUPAC
86
87 from Bio.Restriction.Restriction_Dictionary import rest_dict as enzymedict
88 from Bio.Restriction.Restriction_Dictionary import typedict
89 from Bio.Restriction.Restriction_Dictionary import suppliers as suppliers_dict
90 from Bio.Restriction.RanaConfig import *
91 from Bio.Restriction.PrintFormat import PrintFormat
92
93
94
96 """Check characters in a string (PRIVATE).
97
98 Remove digits and white space present in string. Allows any valid ambiguous
99 IUPAC DNA single letters codes (ABCDGHKMNRSTVWY, lower case are converted).
100
101 Other characters (e.g. symbols) trigger a TypeError.
102
103 Returns the string WITH A LEADING SPACE (!). This is for backwards
104 compatibility, and may in part be explained by the fact that
105 Bio.Restriction doesn't use zero based counting.
106 """
107
108 seq_string = "".join(seq_string.split()).upper()
109
110 for c in "0123456789" : seq_string = seq_string.replace(c,"")
111
112 if not set(seq_string).issubset(set("ABCDGHKMNRSTVWY")) :
113 raise TypeError("Invalid character found in %s" % repr(seq_string))
114 return " " + seq_string
115
116
117 matching = {'A' : 'ARWMHVDN', 'C' : 'CYSMHBVN', 'G' : 'GRSKBVDN',
118 'T' : 'TYWKHBDN', 'R' : 'ABDGHKMNSRWV', 'Y' : 'CBDHKMNSTWVY',
119 'W' : 'ABDHKMNRTWVY', 'S' : 'CBDGHKMNSRVY', 'M' : 'ACBDHMNSRWVY',
120 'K' : 'BDGHKNSRTWVY', 'H' : 'ACBDHKMNSRTWVY',
121 'B' : 'CBDGHKMNSRTWVY', 'V' : 'ACBDGHKMNSRWVY',
122 'D' : 'ABDGHKMNSRTWVY', 'N' : 'ACBDGHKMNSRTWVY'}
123
124 DNA = Seq
125
224
225
227 """RestrictionType. Type from which derives all enzyme classes.
228
229 Implement the operator methods."""
230
231 - def __init__(cls, name='', bases=(), dct={}):
232 """RE(name, bases, dct) -> RestrictionType instance.
233
234 Not intended to be used in normal operation. The enzymes are
235 instantiated when importing the module.
236
237 see below."""
238 if "-" in name :
239 raise ValueError("Problem with hyphen in %s as enzyme name" \
240 % repr(name))
241 super(RestrictionType, cls).__init__(cls, name, bases, dct)
242 try :
243 cls.compsite = re.compile(cls.compsite)
244 except Exception, err :
245 raise ValueError("Problem with regular expression, re.compiled(%s)" \
246 % repr(cls.compsite))
247
259
261 """RE.__div__(other) -> list.
262
263 RE/other
264 returns RE.search(other)."""
265 return cls.search(other)
266
268 """RE.__rdiv__(other) -> list.
269
270 other/RE
271 returns RE.search(other)."""
272 return cls.search(other)
273
275 """RE.__truediv__(other) -> list.
276
277 RE/other
278 returns RE.search(other)."""
279 return cls.search(other)
280
282 """RE.__rtruediv__(other) -> list.
283
284 other/RE
285 returns RE.search(other)."""
286 return cls.search(other)
287
289 """RE.__floordiv__(other) -> list.
290
291 RE//other
292 returns RE.catalyse(other)."""
293 return cls.catalyse(other)
294
296 """RE.__rfloordiv__(other) -> list.
297
298 other//RE
299 returns RE.catalyse(other)."""
300 return cls.catalyse(other)
301
303 """RE.__str__() -> str.
304
305 return the name of the enzyme."""
306 return cls.__name__
307
309 """RE.__repr__() -> str.
310
311 used with eval or exec will instantiate the enzyme."""
312 return "%s" % cls.__name__
313
315 """RE.__len__() -> int.
316
317 length of the recognition site."""
318 return cls.size
319
321
322
323 return id(cls)
324
326 """RE == other -> bool
327
328 True if RE and other are the same enzyme.
329
330 Specifically this checks they are the same Python object.
331 """
332
333 return id(cls)==id(other)
334
336 """RE != other -> bool.
337 isoschizomer strict, same recognition site, same restriction -> False
338 all the other-> True
339
340 WARNING - This is not the inverse of the __eq__ method.
341 """
342 if not isinstance(other, RestrictionType):
343 return True
344 elif cls.