1
2
3
4
5
6
7
8
9
10
11
12 """ Graph module
13
14 Provides:
15
16 o GraphData - Contains data from which a graph will be drawn, and
17 information about its presentation
18
19 For drawing capabilities, this module uses reportlab to draw and write
20 the diagram:
21
22 http://www.reportlab.com
23
24 For dealing with biological information, the package expects BioPython
25 objects:
26
27 http://www.biopython.org
28 """
29
30
31 from reportlab.lib import colors
32
33 from math import sqrt
37 """ GraphData
38
39 Provides:
40
41 Methods:
42
43 o __init__(self, id=None, data=None, name=None, style='bar',
44 color=colors.lightgreen, altcolor=colors.darkseagreen)
45 Called on instantiation
46
47 o set_data(self, data) Load the object with data to be plotted
48
49 o get_data(self) Returns the data to be plotted as a list of
50 (position, value) tuples
51
52 o add_point(self, point) Add a single point to the data set
53
54 o quartiles(self) Returns a tuple of the data quartiles
55
56 o range(self) Returns a tuple of the base range covered by the graph
57 data
58
59 o mean(self) Returns a float of the mean data point value
60
61 o stdev(self) Returns the sample standard deviation of the data values
62
63 o __len__(self) Returns the length of sequence covered by the data
64
65 o __getitem__(self, index) Returns the value at the base specified,
66 or graph data in the base range
67
68 o __str__(self) Returns a formatted string describing the graph data
69
70 Attributes:
71
72 o id Unique identifier for the data
73
74 o data Dictionary of describing the data, keyed by position
75
76 o name String describing the data
77
78 o style String ('bar', 'heat', 'line') describing how to draw the data
79
80 o poscolor colors.Color for drawing high (some styles) or all
81 values
82
83 o negcolor colors.Color for drawing low values (some styles)
84
85 o linewidth Int, thickness to draw the line in 'line' styles
86 """
87 - def __init__(self, id=None, data=None, name=None, style='bar',
88 color=colors.lightgreen, altcolor=colors.darkseagreen,
89 center=None, colour=None, altcolour=None, centre=None):
90 """__init__(self, id=None, data=None, name=None, style='bar',
91 color=colors.lightgreen, altcolor=colors.darkseagreen)
92
93 o id Unique ID for the graph
94
95 o data List of (position, value) tuples
96
97 o name String describing the graph
98
99 o style String describing the presentation style ('bar', 'line',
100 'heat')
101
102 o color colors.Color describing the color to draw all or the
103 'high' (some styles) values (overridden by backwards
104 compatible argument with UK spelling, colour).
105
106 o altcolor colors.Color describing the color to draw the 'low'
107 values (some styles only) (overridden by backwards
108 compatible argument with UK spelling, colour).
109
110 o center Value at which x-axis crosses y-axis (overridden by
111 backwards comparible argument with UK spelling, centre).
112
113 """
114
115
116 if colour is not None:
117 color = colour
118 if altcolour is not None:
119 altcolor = altcolour
120 if centre is not None:
121 center = centre
122
123 self.id = id
124 self.data = {}
125 if data is not None:
126 self.set_data(data)
127 self.name = name
128
129
130 self.style = style
131 self.poscolor = color
132 self.negcolor = altcolor
133 self.linewidth = 2
134 self.center = center
135
136 @property
138 """Backwards compatible alias for center (DEPRECATED)."""
139 warnings.warn("The .centre attribute is deprecated, use .center instead",
140 Bio.BiopythonDeprecationWarning)
141 return self.center
142
144 """ set_data(self, data)
145
146 o data List of (position, value) tuples
147
148 Add data with a list of (position, value) tuples
149 """
150 for (pos, val) in data:
151 self.data[pos] = val
152
154 """ get_data(self) -> [(int, float), (int, float), ...]
155
156 Return data as a list of sorted (position, value) tuples
157 """
158 data = []
159 for xval in self.data.keys():
160 yval = self.data[xval]
161 data.append((xval, yval))
162 data.sort()
163 return data
164
166 """ add_point(self, point)
167
168 o point (position, value) tuple
169
170 Add a single point to the set of data
171 """
172 pos, val = point
173 self.data[pos] = val
174
176 """ quartiles(self) -> (float, float, float, float, float)
177
178 Returns the (minimum, lowerQ, medianQ, upperQ, maximum) values as
179 a tuple
180 """
181 data = self.data.values()
182 data.sort()
183 datalen = len(data)
184 return(data[0], data[datalen//4], data[datalen//2],
185 data[3*datalen//4], data[-1])
186
188 """ range(self) -> (int, int)
189
190 Returns the range of the data, i.e. its start and end points on
191 the genome as a (start, end) tuple
192 """
193 positions = self.data.keys()
194 positions.sort()
195
196
197 return (positions[0], positions[-1])
198
200 """ mean(self) -> Float
201
202 Returns the mean value for the data points
203 """
204 data = self.data.values()
205 sum = 0.
206 for item in data:
207 sum += float(item)
208 return sum/len(data)
209
211 """ stdev(self) -> Float
212
213 Returns the sample standard deviation for the data
214 """
215 data = self.data.values()
216 m = self.mean()
217 runtotal = 0.
218 for entry in data:
219 runtotal += float((entry - m)**2)
220
221
222 return sqrt(runtotal/(len(data)-1))
223
225 """ __len__(self) -> Int
226
227 Returns the number of points in the data set
228 """
229 return len(self.data)
230
232 """ __getitem__(self, index) -> Float or list of tuples
233
234 Given an integer representing position on the sequence
235 returns a float - the data value at the passed position.
236
237 If a slice, returns graph data from the region as a list or
238 (position, value) tuples. Slices with step are not supported.
239
240 Returns the data value at the passed position
241 """
242 if isinstance(index, int):
243 return self.data[index]
244 elif isinstance(index, slice):
245
246
247 low = index.start
248 high = index.stop
249 if index.step is not None and index.step != 1:
250 raise ValueError
251 positions = self.data.keys()
252 positions.sort()
253 outlist = []
254 for pos in positions:
255 if pos >= low and pos <=high:
256 outlist.append((pos, self.data[pos]))
257 return outlist
258 else:
259 raise TypeError("Need an integer or a slice")
260
262 """ __str__(self) -> ""
263
264 Returns a string describing the graph data
265 """
266 outstr = ["\nGraphData: %s, ID: %s" % (self.name, self.id)]
267 outstr.append("Number of points: %d" % len(self.data))
268 outstr.append("Mean data value: %s" % self.mean())
269 outstr.append("Sample SD: %.3f" % self.stdev())
270 outstr.append("Minimum: %s\n1Q: %s\n2Q: %s\n3Q: %s\nMaximum: %s" % self.quartiles())
271 outstr.append("Sequence Range: %s..%s" % self.range())
272 return "\n".join(outstr)
273