Package Bio :: Package KDTree :: Module KDTree'
[hide private]
[frames] | no frames]

Source Code for Module Bio.KDTree.KDTree'

  1  # Copyright 2004 by Thomas Hamelryck. 
  2  # All rights reserved. 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6  """KD tree data structure for searching N-dimensional vectors. 
  7   
  8  The KD tree data structure can be used for all kinds of searches that 
  9  involve N-dimensional vectors, e.g.  neighbor searches (find all points 
 10  within a radius of a given point) or finding all point pairs in a set 
 11  that are within a certain radius of each other. See "Computational Geometry: 
 12  Algorithms and Applications" (Mark de Berg, Marc van Kreveld, Mark Overmars, 
 13  Otfried Schwarzkopf). Author: Thomas Hamelryck. 
 14  """ 
 15   
 16  from __future__ import print_function 
 17   
 18  from numpy import sum, sqrt, array 
 19  from numpy import random 
 20   
 21  from Bio.KDTree import _CKDTree 
 22   
 23  __docformat__ = "restructuredtext en" 
 24   
 25   
26 -def _dist(p, q):
27 diff = p - q 28 return sqrt(sum(diff * diff))
29 30
31 -def _neighbor_test(nr_points, dim, bucket_size, radius):
32 """Test all fixed radius neighbor search. 33 34 Test all fixed radius neighbor search using the 35 KD tree C module. 36 37 Arguments: 38 - nr_points: number of points used in test 39 - dim: dimension of coords 40 - bucket_size: nr of points per tree node 41 - radius: radius of search (typically 0.05 or so) 42 43 Returns true if the test passes. 44 """ 45 # KD tree search 46 kdt = _CKDTree.KDTree(dim, bucket_size) 47 coords = random.random((nr_points, dim)) 48 kdt.set_data(coords) 49 neighbors = kdt.neighbor_search(radius) 50 r = [neighbor.radius for neighbor in neighbors] 51 if r is None: 52 l1 = 0 53 else: 54 l1 = len(r) 55 # now do a slow search to compare results 56 neighbors = kdt.neighbor_simple_search(radius) 57 r = [neighbor.radius for neighbor in neighbors] 58 if r is None: 59 l2 = 0 60 else: 61 l2 = len(r) 62 if l1 == l2: 63 # print("Passed.") 64 return True 65 else: 66 print("Not passed: %i != %i." % (l1, l2)) 67 return False
68 69
70 -def _test(nr_points, dim, bucket_size, radius):
71 """Test neighbor search. 72 73 Test neighbor search using the KD tree C module. 74 75 Arguments: 76 - nr_points: number of points used in test 77 - dim: dimension of coords 78 - bucket_size: nr of points per tree node 79 - radius: radius of search (typically 0.05 or so) 80 81 Returns true if the test passes. 82 """ 83 # kd tree search 84 kdt = _CKDTree.KDTree(dim, bucket_size) 85 coords = random.random((nr_points, dim)) 86 center = coords[0] 87 kdt.set_data(coords) 88 kdt.search_center_radius(center, radius) 89 r = kdt.get_indices() 90 if r is None: 91 l1 = 0 92 else: 93 l1 = len(r) 94 l2 = 0 95 # now do a manual search to compare results 96 for i in range(0, nr_points): 97 p = coords[i] 98 if _dist(p, center) <= radius: 99 l2 = l2 + 1 100 if l1 == l2: 101 # print("Passed.") 102 return True 103 else: 104 print("Not passed: %i != %i." % (l1, l2)) 105 return False
106 107
108 -class KDTree(object):
109 """KD tree implementation (C++, SWIG python wrapper) 110 111 The KD tree data structure can be used for all kinds of searches that 112 involve N-dimensional vectors, e.g. neighbor searches (find all points 113 within a radius of a given point) or finding all point pairs in a set 114 that are within a certain radius of each other. 115 116 Reference: 117 118 Computational Geometry: Algorithms and Applications 119 Second Edition 120 Mark de Berg, Marc van Kreveld, Mark Overmars, Otfried Schwarzkopf 121 published by Springer-Verlag 122 2nd rev. ed. 2000. 123 ISBN: 3-540-65620-0 124 125 The KD tree data structure is described in chapter 5, pg. 99. 126 127 The following article made clear to me that the nodes should 128 contain more than one point (this leads to dramatic speed 129 improvements for the "all fixed radius neighbor search", see 130 below): 131 132 JL Bentley, "Kd trees for semidynamic point sets," in Sixth Annual ACM 133 Symposium on Computational Geometry, vol. 91. San Francisco, 1990 134 135 This KD implementation also performs a "all fixed radius neighbor search", 136 i.e. it can find all point pairs in a set that are within a certain radius 137 of each other. As far as I know the algorithm has not been published. 138 """ 139
140 - def __init__(self, dim, bucket_size=1):
141 self.dim = dim 142 self.kdt = _CKDTree.KDTree(dim, bucket_size) 143 self.built = 0
144 145 # Set data 146
147 - def set_coords(self, coords):
148 """Add the coordinates of the points. 149 150 Arguments: 151 - coords: two dimensional NumPy array. E.g. if the points 152 have dimensionality D and there are N points, the coords 153 array should be NxD dimensional. 154 """ 155 if coords.min() <= -1e6 or coords.max() >= 1e6: 156 raise Exception("Points should lie between -1e6 and 1e6") 157 if len(coords.shape) != 2 or coords.shape[1] != self.dim: 158 raise Exception("Expected a Nx%i NumPy array" % self.dim) 159 self.kdt.set_data(coords) 160 self.built = 1
161 162 # Fixed radius search for a point 163
164 - def search(self, center, radius):
165 """Search all points within radius of center. 166 167 Arguments: 168 - center: one dimensional NumPy array. E.g. if the points have 169 dimensionality D, the center array should be D dimensional. 170 - radius: float>0 171 """ 172 if not self.built: 173 raise Exception("No point set specified") 174 if center.shape != (self.dim,): 175 raise Exception("Expected a %i-dimensional NumPy array" 176 % self.dim) 177 self.kdt.search_center_radius(center, radius)
178
179 - def get_radii(self):
180 """Return radii. 181 182 Return the list of distances from center after 183 a neighbor search. 184 """ 185 a = self.kdt.get_radii() 186 if a is None: 187 return [] 188 return a
189
190 - def get_indices(self):
191 """Return the list of indices. 192 193 Return the list of indices after a neighbor search. 194 The indices refer to the original coords NumPy array. The 195 coordinates with these indices were within radius of center. 196 197 For an index pair, the first index<second index. 198 """ 199 a = self.kdt.get_indices() 200 if a is None: 201 return [] 202 return a
203 204 # Fixed radius search for all points 205
206 - def all_search(self, radius):
207 """All fixed neighbor search. 208 209 Search all point pairs that are within radius. 210 211 Arguments: 212 - radius: float (>0) 213 """ 214 if not self.built: 215 raise Exception("No point set specified") 216 self.neighbors = self.kdt.neighbor_search(radius)
217
218 - def all_get_indices(self):
219 """Return All Fixed Neighbor Search results. 220 221 Return a Nx2 dim NumPy array containing 222 the indices of the point pairs, where N 223 is the number of neighbor pairs. 224 """ 225 a = array([[neighbor.index1, neighbor.index2] for neighbor in self.neighbors]) 226 return a
227
228 - def all_get_radii(self):
229 """Return All Fixed Neighbor Search results. 230 231 Return an N-dim array containing the distances 232 of all the point pairs, where N is the number 233 of neighbor pairs.. 234 """ 235 return [neighbor.radius for neighbor in self.neighbors]
236 237 if __name__ == "__main__": 238 239 nr_points = 100000 240 dim = 3 241 bucket_size = 10 242 query_radius = 10 243 244 coords = 200 * random.random((nr_points, dim)) 245 246 kdtree = KDTree(dim, bucket_size) 247 248 # enter coords 249 kdtree.set_coords(coords) 250 251 # Find all point pairs within radius 252 253 kdtree.all_search(query_radius) 254 255 # get indices & radii of points 256 257 # indices is a list of tuples. Each tuple contains the 258 # two indices of a point pair within query_radius of 259 # each other. 260 indices = kdtree.all_get_indices() 261 radii = kdtree.all_get_radii() 262 263 print("Found %i point pairs within radius %f." % (len(indices), query_radius)) 264 265 # Do 10 individual queries 266 267 for i in range(0, 10): 268 # pick a random center 269 center = random.random(dim) 270 271 # search neighbors 272 kdtree.search(center, query_radius) 273 274 # get indices & radii of points 275 indices = kdtree.get_indices() 276 radii = kdtree.get_radii() 277 278 x, y, z = center 279 print("Found %i points in radius %f around center (%.2f, %.2f, %.2f)." % (len(indices), query_radius, x, y, z)) 280