Package Bio :: Package Graphics :: Package GenomeDiagram :: Module _Graph
[hide private]
[frames] | no frames]

Source Code for Module Bio.Graphics.GenomeDiagram._Graph

  1  # Copyright 2003-2008 by Leighton Pritchard.  All rights reserved. 
  2  # Revisions copyright 2008-2009 by Peter Cock. 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6  # 
  7  # Contact:       Leighton Pritchard, Scottish Crop Research Institute, 
  8  #                Invergowrie, Dundee, Scotland, DD2 5DA, UK 
  9  #                L.Pritchard@scri.ac.uk 
 10  ################################################################################ 
 11   
 12  """ Graph module 
 13   
 14      Provides: 
 15   
 16      o GraphData - Contains data from which a graph will be drawn, and 
 17                      information about its presentation 
 18   
 19      For drawing capabilities, this module uses reportlab to draw and write 
 20      the diagram: 
 21   
 22      http://www.reportlab.com 
 23   
 24      For dealing with biological information, the package expects BioPython 
 25      objects: 
 26   
 27      http://www.biopython.org 
 28  """ 
 29   
 30  # ReportLab imports 
 31  from __future__ import print_function 
 32   
 33  from reportlab.lib import colors 
 34   
 35  from math import sqrt 
 36   
 37   
38 -class GraphData(object):
39 """ GraphData 40 41 Provides: 42 43 Methods: 44 45 o __init__(self, id=None, data=None, name=None, style='bar', 46 color=colors.lightgreen, altcolor=colors.darkseagreen) 47 Called on instantiation 48 49 o set_data(self, data) Load the object with data to be plotted 50 51 o get_data(self) Returns the data to be plotted as a list of 52 (position, value) tuples 53 54 o add_point(self, point) Add a single point to the data set 55 56 o quartiles(self) Returns a tuple of the data quartiles 57 58 o range(self) Returns a tuple of the base range covered by the graph 59 data 60 61 o mean(self) Returns a float of the mean data point value 62 63 o stdev(self) Returns the sample standard deviation of the data values 64 65 o __len__(self) Returns the length of sequence covered by the data 66 67 o __getitem__(self, index) Returns the value at the base specified, 68 or graph data in the base range 69 70 o __str__(self) Returns a formatted string describing the graph data 71 72 Attributes: 73 74 o id Unique identifier for the data 75 76 o data Dictionary of describing the data, keyed by position 77 78 o name String describing the data 79 80 o style String ('bar', 'heat', 'line') describing how to draw the data 81 82 o poscolor colors.Color for drawing high (some styles) or all 83 values 84 85 o negcolor colors.Color for drawing low values (some styles) 86 87 o linewidth Int, thickness to draw the line in 'line' styles 88 """
89 - def __init__(self, id=None, data=None, name=None, style='bar', 90 color=colors.lightgreen, altcolor=colors.darkseagreen, 91 center=None, colour=None, altcolour=None):
92 """__init__(self, id=None, data=None, name=None, style='bar', 93 color=colors.lightgreen, altcolor=colors.darkseagreen) 94 95 o id Unique ID for the graph 96 97 o data List of (position, value) tuples 98 99 o name String describing the graph 100 101 o style String describing the presentation style ('bar', 'line', 102 'heat') 103 104 o color colors.Color describing the color to draw all or the 105 'high' (some styles) values (overridden by backwards 106 compatible argument with UK spelling, colour). 107 108 o altcolor colors.Color describing the color to draw the 'low' 109 values (some styles only) (overridden by backwards 110 compatible argument with UK spelling, colour). 111 112 o center Value at which x-axis crosses y-axis. 113 114 """ 115 116 # Let the UK spelling (colour) override the USA spelling (color) 117 if colour is not None: 118 color = colour 119 if altcolour is not None: 120 altcolor = altcolour 121 122 self.id = id # Unique identifier for the graph 123 self.data = {} # holds values, keyed by sequence position 124 if data is not None: 125 self.set_data(data) 126 self.name = name # Descriptive string 127 128 # Attributes describing how the graph will be drawn 129 self.style = style # One of 'bar', 'heat' or 'line' 130 self.poscolor = color # Color to draw all, or 'high' values 131 self.negcolor = altcolor # Color to draw 'low' values 132 self.linewidth = 2 # linewidth to use in line graphs 133 self.center = center # value at which x-axis crosses y-axis
134
135 - def set_data(self, data):
136 """ set_data(self, data) 137 138 o data List of (position, value) tuples 139 140 Add data with a list of (position, value) tuples 141 """ 142 for (pos, val) in data: # Fill data dictionary 143 self.data[pos] = val
144
145 - def get_data(self):
146 """ get_data(self) -> [(int, float), (int, float), ...] 147 148 Return data as a list of sorted (position, value) tuples 149 """ 150 data = [] 151 for xval in self.data: 152 yval = self.data[xval] 153 data.append((xval, yval)) 154 data.sort() 155 return data
156
157 - def add_point(self, point):
158 """ add_point(self, point) 159 160 o point (position, value) tuple 161 162 Add a single point to the set of data 163 """ 164 pos, val = point 165 self.data[pos] = val
166
167 - def quartiles(self):
168 """ quartiles(self) -> (float, float, float, float, float) 169 170 Returns the (minimum, lowerQ, medianQ, upperQ, maximum) values as 171 a tuple 172 """ 173 data = sorted(self.data.values()) 174 datalen = len(data) 175 return(data[0], data[datalen//4], data[datalen//2], 176 data[3*datalen//4], data[-1])
177
178 - def range(self):
179 """ range(self) -> (int, int) 180 181 Returns the range of the data, i.e. its start and end points on 182 the genome as a (start, end) tuple 183 """ 184 positions = sorted(self.data) # i.e. dict keys 185 # Return first and last positions in graph 186 # print len(self.data) 187 return (positions[0], positions[-1])
188
189 - def mean(self):
190 """ mean(self) -> Float 191 192 Returns the mean value for the data points 193 """ 194 data = list(self.data.values()) 195 sum = 0. 196 for item in data: 197 sum += float(item) 198 return sum/len(data)
199
200 - def stdev(self):
201 """ stdev(self) -> Float 202 203 Returns the sample standard deviation for the data 204 """ 205 data = list(self.data.values()) 206 m = self.mean() 207 runtotal = 0. 208 for entry in data: 209 runtotal += float((entry - m)**2) 210 # This is sample standard deviation; population stdev would involve 211 # division by len(data), rather than len(data)-1 212 return sqrt(runtotal/(len(data)-1))
213
214 - def __len__(self):
215 """ __len__(self) -> Int 216 217 Returns the number of points in the data set 218 """ 219 return len(self.data)
220
221 - def __getitem__(self, index):
222 """ __getitem__(self, index) -> Float or list of tuples 223 224 Given an integer representing position on the sequence 225 returns a float - the data value at the passed position. 226 227 If a slice, returns graph data from the region as a list or 228 (position, value) tuples. Slices with step are not supported. 229 230 Returns the data value at the passed position 231 """ 232 if isinstance(index, int): 233 return self.data[index] 234 elif isinstance(index, slice): 235 # TODO - Why does it treat the end points both as inclusive? 236 # This doesn't match Python norms does it? 237 low = index.start 238 high = index.stop 239 if index.step is not None and index.step != 1: 240 raise ValueError 241 outlist = [] 242 for pos in sorted(self.data): 243 if pos >= low and pos <=high: 244 outlist.append((pos, self.data[pos])) 245 return outlist 246 else: 247 raise TypeError("Need an integer or a slice")
248
249 - def __str__(self):
250 """ __str__(self) -> "" 251 252 Returns a string describing the graph data 253 """ 254 outstr = ["\nGraphData: %s, ID: %s" % (self.name, self.id)] 255 outstr.append("Number of points: %d" % len(self.data)) 256 outstr.append("Mean data value: %s" % self.mean()) 257 outstr.append("Sample SD: %.3f" % self.stdev()) 258 outstr.append("Minimum: %s\n1Q: %s\n2Q: %s\n3Q: %s\nMaximum: %s" % self.quartiles()) 259 outstr.append("Sequence Range: %s..%s" % self.range()) 260 return "\n".join(outstr)
261