1
2
3
4
5
6 """This module deals with CAPS markers.
7
8 A CAPS marker is a location a DifferentialCutsite as described below and a
9 set of primers that can be used to visualize this. More information can
10 be found in the paper located at:
11
12 http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8106085&dopt=Abstract
13
14 Copyright Jonathan Taylor 2005
15 """
16
17
19 """A differential cutsite is a location in an alignment where an enzyme cuts
20 at least one sequence and also cannot cut at least one other sequence.
21
22 Members:
23 start Where it lives in the alignment.
24 enzyme The enzyme that causes this.
25 cuts_in A list of sequences (as indexes into the alignment) the
26 enzyme cuts in.
27 blocked_in A list of sequences (as indexes into the alignment) the
28 enzyme is blocked in.
29
30 """
31
33 """Initialize a DifferentialCutsite.
34
35 Each member (as listed in the class description) should be included as a
36 keyword.
37 """
38
39 self.start = int(kwds["start"])
40 self.enzyme = kwds["enzyme"]
41 self.cuts_in = kwds["cuts_in"]
42 self.blocked_in = kwds["blocked_in"]
43
44
47
48
50 """A map of an alignment showing all possible dcuts.
51
52 Members:
53 alignment The alignment that is mapped.
54 dcuts A list of possible CAPS markers in the form of
55 DifferentialCutsites.
56 """
57
58 - def __init__(self, alignment, enzymes = []):
59 """Initialize the CAPSMap
60
61 Required:
62 alignment The alignment to be mapped.
63
64 Optional:
65 enzymes The enzymes to be used to create the map.
66 """
67
68 self.sequences = [rec.seq for rec in alignment]
69 self.size = len(self.sequences)
70 self.length = len(self.sequences[0])
71 for seq in self.sequences:
72 if len(seq) != self.length:
73 raise AlignmentHasDifferentLengthsError
74
75 self.alignment = alignment
76 self.enzymes = enzymes
77
78
79 self._digest()
80
82 cuts = {}
83 all = []
84
85
86 for seq in self.sequences:
87
88
89 cuts[seq] = [cut - enzyme.fst5 for cut in enzyme.search(seq)]
90
91
92 all.extend(cuts[seq])
93
94
95 all.sort()
96
97 last = -999
98 new = []
99 for cut in all:
100 if cut != last:
101 new.append(cut)
102 last = cut
103
104 all = new
105
106
107 for cut in all:
108
109
110 cuts_in = []
111 blocked_in = []
112
113 for i in range(0, self.size):
114 seq = self.sequences[i]
115 if cut in cuts[seq]:
116 cuts_in.append(i)
117 else:
118 blocked_in.append(i)
119
120 if cuts_in != [] and blocked_in != []:
121 self.dcuts.append(DifferentialCutsite(start = cut, enzyme = enzyme, cuts_in = cuts_in, blocked_in = blocked_in))
122
124 self.dcuts = []
125
126 for enzyme in self.enzymes:
127 self._digest_with(enzyme)
128