GeographicLib  1.50
NearestNeighbor.hpp
Go to the documentation of this file.
1 /**
2  * \file NearestNeighbor.hpp
3  * \brief Header for GeographicLib::NearestNeighbor class
4  *
5  * Copyright (c) Charles Karney (2016-2019) <charles@karney.com> and licensed
6  * under the MIT/X11 License. For more information, see
7  * https://geographiclib.sourceforge.io/
8  **********************************************************************/
9 
10 #if !defined(GEOGRAPHICLIB_NEARESTNEIGHBOR_HPP)
11 #define GEOGRAPHICLIB_NEARESTNEIGHBOR_HPP 1
12 
13 #include <algorithm> // for nth_element, max_element, etc.
14 #include <vector>
15 #include <queue> // for priority_queue
16 #include <utility> // for swap + pair
17 #include <cstring>
18 #include <limits>
19 #include <cmath>
20 #include <iostream>
21 #include <sstream>
22 // Only for GEOGRAPHICLIB_STATIC_ASSERT and GeographicLib::GeographicErr
24 
25 #if defined(GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION) && \
26  GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION
27 #include <boost/serialization/nvp.hpp>
28 #include <boost/serialization/split_member.hpp>
29 #include <boost/serialization/array.hpp>
30 #include <boost/serialization/vector.hpp>
31 #endif
32 
33 #if defined(_MSC_VER)
34 // Squelch warnings about constant conditional expressions
35 # pragma warning (push)
36 # pragma warning (disable: 4127)
37 #endif
38 
39 namespace GeographicLib {
40 
41  /**
42  * \brief Nearest-neighbor calculations
43  *
44  * This class solves the nearest-neighbor problm using a vantage-point tree
45  * as described in \ref nearest.
46  *
47  * This class is templated so that it can handle arbitrary metric spaces as
48  * follows:
49  *
50  * @tparam dist_t the type used for measuring distances; it can be a real or
51  * signed integer type; in typical geodetic applications, \e dist_t might
52  * be <code>double</code>.
53  * @tparam pos_t the type for specifying the positions of points; geodetic
54  * application might bundled the latitude and longitude into a
55  * <code>std::pair<dist_t, dist_t></code>.
56  * @tparam distfun_t the type of a function object which takes takes two
57  * positions (of type \e pos_t) and returns the distance (of type \e
58  * dist_t); in geodetic applications, this might be a class which is
59  * constructed with a Geodesic object and which implements a member
60  * function with a signature <code>dist_t operator() (const pos_t&, const
61  * pos_t&) const</code>, which returns the geodesic distance between two
62  * points.
63  *
64  * \note The distance measure must satisfy the triangle inequality, \f$
65  * d(a,c) \le d(a,b) + d(b,c) \f$ for all points \e a, \e b, \e c. The
66  * geodesic distance (given by Geodesic::Inverse) does, while the great
67  * ellipse distance and the rhumb line distance <i>do not</i>. If you use
68  * the ordinary Euclidean distance, i.e., \f$ \sqrt{(x_a-x_b)^2 +
69  * (y_a-y_b)^2} \f$ for two dimensions, don't be tempted to leave out the
70  * square root in the interests of "efficiency"; the squared distance does
71  * not satisfy the triangle inequality!
72  *
73  * \note This is a "header-only" implementation and, as such, depends in a
74  * minimal way on the rest of GeographicLib (the only dependency is through
75  * the use of GEOGRAPHICLIB_STATIC_ASSERT and GeographicLib::GeographicErr
76  * for handling compile-time and run-time exceptions). Therefore, it is easy
77  * to extract this class from the rest of GeographicLib and use it as a
78  * stand-alone facility.
79  *
80  * The \e dist_t type must support numeric_limits queries (specifically:
81  * is_signed, is_integer, max(), digits).
82  *
83  * The NearestNeighbor object is constructed with a vector of points (type \e
84  * pos_t) and a distance function (type \e distfun_t). However the object
85  * does \e not store the points. When querying the object with Search(),
86  * it's necessary to supply the same vector of points and the same distance
87  * function.
88  *
89  * There's no capability in this implementation to add or remove points from
90  * the set. Instead Initialize() should be called to re-initialize the
91  * object with the modified vector of points.
92  *
93  * Because of the overhead in constructing a NearestNeighbor object for a
94  * large set of points, functions Save() and Load() are provided to save the
95  * object to an external file. operator<<(), operator>>() and <a
96  * href="https://www.boost.org/libs/serialization/doc"> Boost
97  * serialization</a> can also be used to save and restore a NearestNeighbor
98  * object. This is illustrated in the example.
99  *
100  * Example of use:
101  * \include example-NearestNeighbor.cpp
102  **********************************************************************/
103  template <typename dist_t, typename pos_t, class distfun_t>
105  // For tracking changes to the I/O format
106  static const int version = 1;
107  // This is what we get "free"; but if sizeof(dist_t) = 1 (unlikely), allow
108  // 4 slots (and this accommodates the default value bucket = 4).
109  static const int maxbucket =
110  (2 + ((4 * sizeof(dist_t)) / sizeof(int) >= 2 ?
111  (4 * sizeof(dist_t)) / sizeof(int) : 2));
112  public:
113 
114  /**
115  * Default constructor for NearestNeighbor.
116  *
117  * This is equivalent to specifying an empty set of points.
118  **********************************************************************/
119  NearestNeighbor() : _numpoints(0), _bucket(0), _cost(0) {}
120 
121  /**
122  * Constructor for NearestNeighbor.
123  *
124  * @param[in] pts a vector of points to include in the set.
125  * @param[in] dist the distance function object.
126  * @param[in] bucket the size of the buckets at the leaf nodes; this must
127  * lie in [0, 2 + 4*sizeof(dist_t)/sizeof(int)] (default 4).
128  * @exception GeographicErr if the value of \e bucket is out of bounds or
129  * the size of \e pts is too big for an int.
130  * @exception std::bad_alloc if memory for the tree can't be allocated.
131  *
132  * \e pts may contain coincident points (i.e., the distance between them
133  * vanishes); these are treated as distinct.
134  *
135  * The choice of \e bucket is a tradeoff between space and efficiency. A
136  * larger \e bucket decreases the size of the NearestNeighbor object which
137  * scales as pts.size() / max(1, bucket) and reduces the number of distance
138  * calculations to construct the object by log2(bucket) * pts.size().
139  * However each search then requires about bucket additional distance
140  * calculations.
141  *
142  * \warning The distances computed by \e dist must satisfy the standard
143  * metric conditions. If not, the results are undefined. Neither the data
144  * in \e pts nor the query points should contain NaNs or infinities because
145  * such data violates the metric conditions.
146  *
147  * \warning The same arguments \e pts and \e dist must be provided
148  * to the Search() function.
149  **********************************************************************/
150  NearestNeighbor(const std::vector<pos_t>& pts, const distfun_t& dist,
151  int bucket = 4) {
152  Initialize(pts, dist, bucket);
153  }
154 
155  /**
156  * Initialize or re-initialize NearestNeighbor.
157  *
158  * @param[in] pts a vector of points to include in the tree.
159  * @param[in] dist the distance function object.
160  * @param[in] bucket the size of the buckets at the leaf nodes; this must
161  * lie in [0, 2 + 4*sizeof(dist_t)/sizeof(int)] (default 4).
162  * @exception GeographicErr if the value of \e bucket is out of bounds or
163  * the size of \e pts is too big for an int.
164  * @exception std::bad_alloc if memory for the tree can't be allocated.
165  *
166  * See also the documentation on the constructor.
167  *
168  * If an exception is thrown, the state of the NearestNeighbor is
169  * unchanged.
170  **********************************************************************/
171  void Initialize(const std::vector<pos_t>& pts, const distfun_t& dist,
172  int bucket = 4) {
173  GEOGRAPHICLIB_STATIC_ASSERT(std::numeric_limits<dist_t>::is_signed,
174  "dist_t must be a signed type");
175  if (!( 0 <= bucket && bucket <= maxbucket ))
177  ("bucket must lie in [0, 2 + 4*sizeof(dist_t)/sizeof(int)]");
178  if (pts.size() > size_t(std::numeric_limits<int>::max()))
179  throw GeographicLib::GeographicErr("pts array too big");
180  // the pair contains distance+id
181  std::vector<item> ids(pts.size());
182  for (int k = int(ids.size()); k--;)
183  ids[k] = std::make_pair(dist_t(0), k);
184  int cost = 0;
185  std::vector<Node> tree;
186  init(pts, dist, bucket, tree, ids, cost,
187  0, int(ids.size()), int(ids.size()/2));
188  _tree.swap(tree);
189  _numpoints = int(pts.size());
190  _bucket = bucket;
191  _mc = _sc = 0;
192  _cost = cost; _c1 = _k = _cmax = 0;
193  _cmin = std::numeric_limits<int>::max();
194  }
195 
196  /**
197  * Search the NearestNeighbor.
198  *
199  * @param[in] pts the vector of points used for initialization.
200  * @param[in] dist the distance function object used for initialization.
201  * @param[in] query the query point.
202  * @param[out] ind a vector of indices to the closest points found.
203  * @param[in] k the number of points to search for (default = 1).
204  * @param[in] maxdist only return points with distances of \e maxdist or
205  * less from \e query (default is the maximum \e dist_t).
206  * @param[in] mindist only return points with distances of more than
207  * \e mindist from \e query (default = &minus;1).
208  * @param[in] exhaustive whether to do an exhaustive search (default true).
209  * @param[in] tol the tolerance on the results (default 0).
210  * @return the distance to the closest point found (&minus;1 if no points
211  * are found).
212  * @exception GeographicErr if \e pts has a different size from that used
213  * to construct the object.
214  *
215  * The indices returned in \e ind are sorted by distance from \e query
216  * (closest first).
217  *
218  * The simplest invocation is with just the 4 non-optional arguments. This
219  * returns the closest distance and the index to the closest point in
220  * <i>ind</i><sub>0</sub>. If there are several points equally close, then
221  * <i>ind</i><sub>0</sub> gives the index of an arbirary one of them. If
222  * there's no closest point (because the set of points is empty), then \e
223  * ind is empty and &minus;1 is returned.
224  *
225  * With \e exhaustive = true and \e tol = 0 (their default values), this
226  * finds the indices of \e k closest neighbors to \e query whose distances
227  * to \e query are in (\e mindist, \e maxdist]. If \e mindist and \e
228  * maxdist have their default values, then these bounds have no effect. If
229  * \e query is one of the points in the tree, then set \e mindist = 0 to
230  * prevent this point (and other coincident points) from being returned.
231  *
232  * If \e exhaustive = false, exit as soon as \e k results satisfying the
233  * distance criteria are found. If less than \e k results are returned
234  * then the search was exhaustive even if \e exhaustive = false.
235  *
236  * If \e tol is positive, do an approximate search; in this case the
237  * results are to be interpreted as follows: if the <i>k</i>'th distance is
238  * \e dk, then all results with distances less than or equal \e dk &minus;
239  * \e tol are correct; all others are suspect &mdash; there may be other
240  * closer results with distances greater or equal to \e dk &minus; \e tol.
241  * If less than \e k results are found, then the search is exact.
242  *
243  * \e mindist should be used to exclude a "small" neighborhood of the query
244  * point (relative to the average spacing of the data). If \e mindist is
245  * large, the efficiency of the search deteriorates.
246  *
247  * \note Only the shortest distance is returned (as as the function value).
248  * The distances to other points (indexed by <i>ind</i><sub><i>j</i></sub>
249  * for \e j > 0) can be found by invoking \e dist again.
250  *
251  * \warning The arguments \e pts and \e dist must be identical to those
252  * used to initialize the NearestNeighbor; if not, this function will
253  * return some meaningless result (however, if the size of \e pts is wrong,
254  * this function throw an exception).
255  *
256  * \warning The query point cannot be a NaN or infinite because then the
257  * metric conditions are violated.
258  **********************************************************************/
259  dist_t Search(const std::vector<pos_t>& pts, const distfun_t& dist,
260  const pos_t& query,
261  std::vector<int>& ind,
262  int k = 1,
263  dist_t maxdist = std::numeric_limits<dist_t>::max(),
264  dist_t mindist = -1,
265  bool exhaustive = true,
266  dist_t tol = 0) const {
267  if (_numpoints != int(pts.size()))
268  throw GeographicLib::GeographicErr("pts array has wrong size");
269  std::priority_queue<item> results;
270  if (_numpoints > 0 && k > 0 && maxdist > mindist) {
271  // distance to the kth closest point so far
272  dist_t tau = maxdist;
273  // first is negative of how far query is outside boundary of node
274  // +1 if on boundary or inside
275  // second is node index
276  std::priority_queue<item> todo;
277  todo.push(std::make_pair(dist_t(1), int(_tree.size()) - 1));
278  int c = 0;
279  while (!todo.empty()) {
280  int n = todo.top().second;
281  dist_t d = -todo.top().first;
282  todo.pop();
283  dist_t tau1 = tau - tol;
284  // compare tau and d again since tau may have become smaller.
285  if (!( n >= 0 && tau1 >= d )) continue;
286  const Node& current = _tree[n];
287  dist_t dst = 0; // to suppress warning about uninitialized variable
288  bool exitflag = false, leaf = current.index < 0;
289  for (int i = 0; i < (leaf ? _bucket : 1); ++i) {
290  int index = leaf ? current.leaves[i] : current.index;
291  if (index < 0) break;
292  dst = dist(pts[index], query);
293  ++c;
294 
295  if (dst > mindist && dst <= tau) {
296  if (int(results.size()) == k) results.pop();
297  results.push(std::make_pair(dst, index));
298  if (int(results.size()) == k) {
299  if (exhaustive)
300  tau = results.top().first;
301  else {
302  exitflag = true;
303  break;
304  }
305  if (tau <= tol) {
306  exitflag = true;
307  break;
308  }
309  }
310  }
311  }
312  if (exitflag) break;
313 
314  if (current.index < 0) continue;
315  tau1 = tau - tol;
316  for (int l = 0; l < 2; ++l) {
317  if (current.data.child[l] >= 0 &&
318  dst + current.data.upper[l] >= mindist) {
319  if (dst < current.data.lower[l]) {
320  d = current.data.lower[l] - dst;
321  if (tau1 >= d)
322  todo.push(std::make_pair(-d, current.data.child[l]));
323  } else if (dst > current.data.upper[l]) {
324  d = dst - current.data.upper[l];
325  if (tau1 >= d)
326  todo.push(std::make_pair(-d, current.data.child[l]));
327  } else
328  todo.push(std::make_pair(dist_t(1), current.data.child[l]));
329  }
330  }
331  }
332  ++_k;
333  _c1 += c;
334  double omc = _mc;
335  _mc += (c - omc) / _k;
336  _sc += (c - omc) * (c - _mc);
337  if (c > _cmax) _cmax = c;
338  if (c < _cmin) _cmin = c;
339  }
340 
341  dist_t d = -1;
342  ind.resize(results.size());
343 
344  for (int i = int(ind.size()); i--;) {
345  ind[i] = int(results.top().second);
346  if (i == 0) d = results.top().first;
347  results.pop();
348  }
349  return d;
350 
351  }
352 
353  /**
354  * @return the total number of points in the set.
355  **********************************************************************/
356  int NumPoints() const { return _numpoints; }
357 
358  /**
359  * Write the object to an I/O stream.
360  *
361  * @param[in,out] os the stream to write to.
362  * @param[in] bin if true (the default) save in binary mode.
363  * @exception std::bad_alloc if memory for the string representation of the
364  * object can't be allocated.
365  *
366  * The counters tracking the statistics of searches are not saved; however
367  * the initializtion cost is saved. The format of the binary saves is \e
368  * not portable.
369  *
370  * \note <a href="https://www.boost.org/libs/serialization/doc">
371  * Boost serialization</a> can also be used to save and restore a
372  * NearestNeighbor object. This requires that the
373  * GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION macro be defined.
374  **********************************************************************/
375  void Save(std::ostream& os, bool bin = true) const {
376  int realspec = std::numeric_limits<dist_t>::digits *
377  (std::numeric_limits<dist_t>::is_integer ? -1 : 1);
378  if (bin) {
379  char id[] = "NearestNeighbor_";
380  os.write(id, 16);
381  int buf[6];
382  buf[0] = version;
383  buf[1] = realspec;
384  buf[2] = _bucket;
385  buf[3] = _numpoints;
386  buf[4] = int(_tree.size());
387  buf[5] = _cost;
388  os.write(reinterpret_cast<const char *>(buf), 6 * sizeof(int));
389  for (int i = 0; i < int(_tree.size()); ++i) {
390  const Node& node = _tree[i];
391  os.write(reinterpret_cast<const char *>(&node.index), sizeof(int));
392  if (node.index >= 0) {
393  os.write(reinterpret_cast<const char *>(node.data.lower),
394  2 * sizeof(dist_t));
395  os.write(reinterpret_cast<const char *>(node.data.upper),
396  2 * sizeof(dist_t));
397  os.write(reinterpret_cast<const char *>(node.data.child),
398  2 * sizeof(int));
399  } else {
400  os.write(reinterpret_cast<const char *>(node.leaves),
401  _bucket * sizeof(int));
402  }
403  }
404  } else {
405  std::stringstream ostring;
406  // Ensure enough precision for type dist_t. With C++11, max_digits10
407  // can be used instead.
408  if (!std::numeric_limits<dist_t>::is_integer) {
409  static const int prec
410  = int(std::ceil(std::numeric_limits<dist_t>::digits *
411  std::log10(2.0) + 1));
412  ostring.precision(prec);
413  }
414  ostring << version << " " << realspec << " " << _bucket << " "
415  << _numpoints << " " << _tree.size() << " " << _cost;
416  for (int i = 0; i < int(_tree.size()); ++i) {
417  const Node& node = _tree[i];
418  ostring << "\n" << node.index;
419  if (node.index >= 0) {
420  for (int l = 0; l < 2; ++l)
421  ostring << " " << node.data.lower[l] << " " << node.data.upper[l]
422  << " " << node.data.child[l];
423  } else {
424  for (int l = 0; l < _bucket; ++l)
425  ostring << " " << node.leaves[l];
426  }
427  }
428  os << ostring.str();
429  }
430  }
431 
432  /**
433  * Read the object from an I/O stream.
434  *
435  * @param[in,out] is the stream to read from
436  * @param[in] bin if true (the default) load in binary mode.
437  * @exception GeographicErr if the state read from \e is is illegal.
438  * @exception std::bad_alloc if memory for the tree can't be allocated.
439  *
440  * The counters tracking the statistics of searches are reset by this
441  * operation. Binary data must have been saved on a machine with the same
442  * architecture. If an exception is thrown, the state of the
443  * NearestNeighbor is unchanged.
444  *
445  * \note <a href="https://www.boost.org/libs/serialization/doc">
446  * Boost serialization</a> can also be used to save and restore a
447  * NearestNeighbor object. This requires that the
448  * GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION macro be defined.
449  *
450  * \warning The same arguments \e pts and \e dist used for
451  * initialization must be provided to the Search() function.
452  **********************************************************************/
453  void Load(std::istream& is, bool bin = true) {
454  int version1, realspec, bucket, numpoints, treesize, cost;
455  if (bin) {
456  char id[17];
457  is.read(id, 16);
458  id[16] = '\0';
459  if (!(std::strcmp(id, "NearestNeighbor_") == 0))
460  throw GeographicLib::GeographicErr("Bad ID");
461  is.read(reinterpret_cast<char *>(&version1), sizeof(int));
462  is.read(reinterpret_cast<char *>(&realspec), sizeof(int));
463  is.read(reinterpret_cast<char *>(&bucket), sizeof(int));
464  is.read(reinterpret_cast<char *>(&numpoints), sizeof(int));
465  is.read(reinterpret_cast<char *>(&treesize), sizeof(int));
466  is.read(reinterpret_cast<char *>(&cost), sizeof(int));
467  } else {
468  if (!( is >> version1 >> realspec >> bucket >> numpoints >> treesize
469  >> cost ))
470  throw GeographicLib::GeographicErr("Bad header");
471  }
472  if (!( version1 == version ))
473  throw GeographicLib::GeographicErr("Incompatible version");
474  if (!( realspec == std::numeric_limits<dist_t>::digits *
475  (std::numeric_limits<dist_t>::is_integer ? -1 : 1) ))
476  throw GeographicLib::GeographicErr("Different dist_t types");
477  if (!( 0 <= bucket && bucket <= maxbucket ))
478  throw GeographicLib::GeographicErr("Bad bucket size");
479  if (!( 0 <= treesize && treesize <= numpoints ))
480  throw
481  GeographicLib::GeographicErr("Bad number of points or tree size");
482  if (!( 0 <= cost ))
483  throw GeographicLib::GeographicErr("Bad value for cost");
484  std::vector<Node> tree;
485  tree.reserve(treesize);
486  for (int i = 0; i < treesize; ++i) {
487  Node node;
488  if (bin) {
489  is.read(reinterpret_cast<char *>(&node.index), sizeof(int));
490  if (node.index >= 0) {
491  is.read(reinterpret_cast<char *>(node.data.lower),
492  2 * sizeof(dist_t));
493  is.read(reinterpret_cast<char *>(node.data.upper),
494  2 * sizeof(dist_t));
495  is.read(reinterpret_cast<char *>(node.data.child),
496  2 * sizeof(int));
497  } else {
498  is.read(reinterpret_cast<char *>(node.leaves),
499  bucket * sizeof(int));
500  for (int l = bucket; l < maxbucket; ++l)
501  node.leaves[l] = 0;
502  }
503  } else {
504  if (!( is >> node.index ))
505  throw GeographicLib::GeographicErr("Bad index");
506  if (node.index >= 0) {
507  for (int l = 0; l < 2; ++l) {
508  if (!( is >> node.data.lower[l] >> node.data.upper[l]
509  >> node.data.child[l] ))
510  throw GeographicLib::GeographicErr("Bad node data");
511  }
512  } else {
513  // Must be at least one valid leaf followed by a sequence end
514  // markers (-1).
515  for (int l = 0; l < bucket; ++l) {
516  if (!( is >> node.leaves[l] ))
517  throw GeographicLib::GeographicErr("Bad leaf data");
518  }
519  for (int l = bucket; l < maxbucket; ++l)
520  node.leaves[l] = 0;
521  }
522  }
523  node.Check(numpoints, treesize, bucket);
524  tree.push_back(node);
525  }
526  _tree.swap(tree);
527  _numpoints = numpoints;
528  _bucket = bucket;
529  _mc = _sc = 0;
530  _cost = cost; _c1 = _k = _cmax = 0;
531  _cmin = std::numeric_limits<int>::max();
532  }
533 
534  /**
535  * Write the object to stream \e os as text.
536  *
537  * @param[in,out] os the output stream.
538  * @param[in] t the NearestNeighbor object to be saved.
539  * @exception std::bad_alloc if memory for the string representation of the
540  * object can't be allocated.
541  **********************************************************************/
542  friend std::ostream& operator<<(std::ostream& os, const NearestNeighbor& t)
543  { t.Save(os, false); return os; }
544 
545  /**
546  * Read the object from stream \e is as text.
547  *
548  * @param[in,out] is the input stream.
549  * @param[out] t the NearestNeighbor object to be loaded.
550  * @exception GeographicErr if the state read from \e is is illegal.
551  * @exception std::bad_alloc if memory for the tree can't be allocated.
552  **********************************************************************/
553  friend std::istream& operator>>(std::istream& is, NearestNeighbor& t)
554  { t.Load(is, false); return is; }
555 
556  /**
557  * Swap with another NearestNeighbor object.
558  *
559  * @param[in,out] t the NearestNeighbor object to swap with.
560  **********************************************************************/
562  std::swap(_numpoints, t._numpoints);
563  std::swap(_bucket, t._bucket);
564  std::swap(_cost, t._cost);
565  _tree.swap(t._tree);
566  std::swap(_mc, t._mc);
567  std::swap(_sc, t._sc);
568  std::swap(_c1, t._c1);
569  std::swap(_k, t._k);
570  std::swap(_cmin, t._cmin);
571  std::swap(_cmax, t._cmax);
572  }
573 
574  /**
575  * The accumulated statistics on the searches so far.
576  *
577  * @param[out] setupcost the cost of initializing the NearestNeighbor.
578  * @param[out] numsearches the number of calls to Search().
579  * @param[out] searchcost the total cost of the calls to Search().
580  * @param[out] mincost the minimum cost of a Search().
581  * @param[out] maxcost the maximum cost of a Search().
582  * @param[out] mean the mean cost of a Search().
583  * @param[out] sd the standard deviation in the cost of a Search().
584  *
585  * Here "cost" measures the number of distance calculations needed. Note
586  * that the accumulation of statistics is \e not thread safe.
587  **********************************************************************/
588  void Statistics(int& setupcost, int& numsearches, int& searchcost,
589  int& mincost, int& maxcost,
590  double& mean, double& sd) const {
591  setupcost = _cost; numsearches = _k; searchcost = _c1;
592  mincost = _cmin; maxcost = _cmax;
593  mean = _mc; sd = std::sqrt(_sc / (_k - 1));
594  }
595 
596  /**
597  * Reset the counters for the accumulated statistics on the searches so
598  * far.
599  **********************************************************************/
600  void ResetStatistics() const {
601  _mc = _sc = 0;
602  _c1 = _k = _cmax = 0;
603  _cmin = std::numeric_limits<int>::max();
604  }
605 
606  private:
607  // Package up a dist_t and an int. We will want to sort on the dist_t so
608  // put it first.
609  typedef std::pair<dist_t, int> item;
610  // \cond SKIP
611  class Node {
612  public:
613  struct bounds {
614  dist_t lower[2], upper[2]; // bounds on inner/outer distances
615  int child[2];
616  };
617  union {
618  bounds data;
619  int leaves[maxbucket];
620  };
621  int index;
622 
623  Node()
624  : index(-1)
625  {
626  for (int i = 0; i < 2; ++i) {
627  data.lower[i] = data.upper[i] = 0;
628  data.child[i] = -1;
629  }
630  }
631 
632  // Sanity check on a Node
633  void Check(int numpoints, int treesize, int bucket) const {
634  if (!( -1 <= index && index < numpoints ))
635  throw GeographicLib::GeographicErr("Bad index");
636  if (index >= 0) {
637  if (!( -1 <= data.child[0] && data.child[0] < treesize &&
638  -1 <= data.child[1] && data.child[1] < treesize ))
639  throw GeographicLib::GeographicErr("Bad child pointers");
640  if (!( 0 <= data.lower[0] && data.lower[0] <= data.upper[0] &&
641  data.upper[0] <= data.lower[1] &&
642  data.lower[1] <= data.upper[1] ))
643  throw GeographicLib::GeographicErr("Bad bounds");
644  } else {
645  // Must be at least one valid leaf followed by a sequence end markers
646  // (-1).
647  bool start = true;
648  for (int l = 0; l < bucket; ++l) {
649  if (!( (start ?
650  ((l == 0 ? 0 : -1) <= leaves[l] && leaves[l] < numpoints) :
651  leaves[l] == -1) ))
652  throw GeographicLib::GeographicErr("Bad leaf data");
653  start = leaves[l] >= 0;
654  }
655  for (int l = bucket; l < maxbucket; ++l) {
656  if (leaves[l] != 0)
657  throw GeographicLib::GeographicErr("Bad leaf data");
658  }
659  }
660  }
661 
662 #if defined(GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION) && \
663  GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION
664  friend class boost::serialization::access;
665  template<class Archive>
666  void save(Archive& ar, const unsigned int) const {
667  ar & boost::serialization::make_nvp("index", index);
668  if (index < 0)
669  ar & boost::serialization::make_nvp("leaves", leaves);
670  else
671  ar & boost::serialization::make_nvp("lower", data.lower)
672  & boost::serialization::make_nvp("upper", data.upper)
673  & boost::serialization::make_nvp("child", data.child);
674  }
675  template<class Archive>
676  void load(Archive& ar, const unsigned int) {
677  ar & boost::serialization::make_nvp("index", index);
678  if (index < 0)
679  ar & boost::serialization::make_nvp("leaves", leaves);
680  else
681  ar & boost::serialization::make_nvp("lower", data.lower)
682  & boost::serialization::make_nvp("upper", data.upper)
683  & boost::serialization::make_nvp("child", data.child);
684  }
685  template<class Archive>
686  void serialize(Archive& ar, const unsigned int file_version)
687  { boost::serialization::split_member(ar, *this, file_version); }
688 #endif
689  };
690  // \endcond
691 #if defined(GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION) && \
692  GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION
693  friend class boost::serialization::access;
694  template<class Archive> void save(Archive& ar, const unsigned) const {
695  int realspec = std::numeric_limits<dist_t>::digits *
696  (std::numeric_limits<dist_t>::is_integer ? -1 : 1);
697  // Need to use version1, otherwise load error in debug mode on Linux:
698  // undefined reference to GeographicLib::NearestNeighbor<...>::version.
699  int version1 = version;
700  ar & boost::serialization::make_nvp("version", version1)
701  & boost::serialization::make_nvp("realspec", realspec)
702  & boost::serialization::make_nvp("bucket", _bucket)
703  & boost::serialization::make_nvp("numpoints", _numpoints)
704  & boost::serialization::make_nvp("cost", _cost)
705  & boost::serialization::make_nvp("tree", _tree);
706  }
707  template<class Archive> void load(Archive& ar, const unsigned) {
708  int version1, realspec, bucket, numpoints, cost;
709  ar & boost::serialization::make_nvp("version", version1);
710  if (version1 != version)
711  throw GeographicLib::GeographicErr("Incompatible version");
712  std::vector<Node> tree;
713  ar & boost::serialization::make_nvp("realspec", realspec);
714  if (!( realspec == std::numeric_limits<dist_t>::digits *
715  (std::numeric_limits<dist_t>::is_integer ? -1 : 1) ))
716  throw GeographicLib::GeographicErr("Different dist_t types");
717  ar & boost::serialization::make_nvp("bucket", bucket);
718  if (!( 0 <= bucket && bucket <= maxbucket ))
719  throw GeographicLib::GeographicErr("Bad bucket size");
720  ar & boost::serialization::make_nvp("numpoints", numpoints)
721  & boost::serialization::make_nvp("cost", cost)
722  & boost::serialization::make_nvp("tree", tree);
723  if (!( 0 <= int(tree.size()) && int(tree.size()) <= numpoints ))
724  throw
725  GeographicLib::GeographicErr("Bad number of points or tree size");
726  for (int i = 0; i < int(tree.size()); ++i)
727  tree[i].Check(numpoints, int(tree.size()), bucket);
728  _tree.swap(tree);
729  _numpoints = numpoints;
730  _bucket = bucket;
731  _mc = _sc = 0;
732  _cost = cost; _c1 = _k = _cmax = 0;
733  _cmin = std::numeric_limits<int>::max();
734  }
735  template<class Archive>
736  void serialize(Archive& ar, const unsigned int file_version)
737  { boost::serialization::split_member(ar, *this, file_version); }
738 #endif
739 
740  int _numpoints, _bucket, _cost;
741  std::vector<Node> _tree;
742  // Counters to track stastistics on the cost of searches
743  mutable double _mc, _sc;
744  mutable int _c1, _k, _cmin, _cmax;
745 
746  int init(const std::vector<pos_t>& pts, const distfun_t& dist, int bucket,
747  std::vector<Node>& tree, std::vector<item>& ids, int& cost,
748  int l, int u, int vp) {
749 
750  if (u == l)
751  return -1;
752  Node node;
753 
754  if (u - l > (bucket == 0 ? 1 : bucket)) {
755 
756  // choose a vantage point and move it to the start
757  int i = vp;
758  std::swap(ids[l], ids[i]);
759 
760  int m = (u + l + 1) / 2;
761 
762  for (int k = l + 1; k < u; ++k) {
763  ids[k].first = dist(pts[ids[l].second], pts[ids[k].second]);
764  ++cost;
765  }
766  // partition around the median distance
767  std::nth_element(ids.begin() + l + 1,
768  ids.begin() + m,
769  ids.begin() + u);
770  node.index = ids[l].second;
771  if (m > l + 1) { // node.child[0] is possibly empty
772  typename std::vector<item>::iterator
773  t = std::min_element(ids.begin() + l + 1, ids.begin() + m);
774  node.data.lower[0] = t->first;
775  t = std::max_element(ids.begin() + l + 1, ids.begin() + m);
776  node.data.upper[0] = t->first;
777  // Use point with max distance as vantage point; this point act as a
778  // "corner" point and leads to a good partition.
779  node.data.child[0] = init(pts, dist, bucket, tree, ids, cost,
780  l + 1, m, int(t - ids.begin()));
781  }
782  typename std::vector<item>::iterator
783  t = std::max_element(ids.begin() + m, ids.begin() + u);
784  node.data.lower[1] = ids[m].first;
785  node.data.upper[1] = t->first;
786  // Use point with max distance as vantage point here too
787  node.data.child[1] = init(pts, dist, bucket, tree, ids, cost,
788  m, u, int(t - ids.begin()));
789  } else {
790  if (bucket == 0)
791  node.index = ids[l].second;
792  else {
793  node.index = -1;
794  // Sort the bucket entries so that the tree is independent of the
795  // implementation of nth_element.
796  std::sort(ids.begin() + l, ids.begin() + u);
797  for (int i = l; i < u; ++i)
798  node.leaves[i-l] = ids[i].second;
799  for (int i = u - l; i < bucket; ++i)
800  node.leaves[i] = -1;
801  for (int i = bucket; i < maxbucket; ++i)
802  node.leaves[i] = 0;
803  }
804  }
805 
806  tree.push_back(node);
807  return int(tree.size()) - 1;
808  }
809 
810  };
811 
812 } // namespace GeographicLib
813 
814 namespace std {
815 
816  /**
817  * Swap two GeographicLib::NearestNeighbor objects.
818  *
819  * @tparam dist_t the type used for measuring distances.
820  * @tparam pos_t the type for specifying the positions of points.
821  * @tparam distfun_t the type for a function object which calculates
822  * distances between points.
823  * @param[in,out] a the first GeographicLib::NearestNeighbor to swap.
824  * @param[in,out] b the second GeographicLib::NearestNeighbor to swap.
825  **********************************************************************/
826  template <typename dist_t, typename pos_t, class distfun_t>
829  a.swap(b);
830  }
831 
832 } // namespace std
833 
834 #if defined(_MSC_VER)
835 # pragma warning (pop)
836 #endif
837 
838 #endif // GEOGRAPHICLIB_NEARESTNEIGHBOR_HPP
void Statistics(int &setupcost, int &numsearches, int &searchcost, int &mincost, int &maxcost, double &mean, double &sd) const
NearestNeighbor(const std::vector< pos_t > &pts, const distfun_t &dist, int bucket=4)
void Load(std::istream &is, bool bin=true)
dist_t Search(const std::vector< pos_t > &pts, const distfun_t &dist, const pos_t &query, std::vector< int > &ind, int k=1, dist_t maxdist=std::numeric_limits< dist_t >::max(), dist_t mindist=-1, bool exhaustive=true, dist_t tol=0) const
Namespace for GeographicLib.
Definition: Accumulator.cpp:12
friend std::ostream & operator<<(std::ostream &os, const NearestNeighbor &t)
void swap(GeographicLib::NearestNeighbor< dist_t, pos_t, distfun_t > &a, GeographicLib::NearestNeighbor< dist_t, pos_t, distfun_t > &b)
friend std::istream & operator>>(std::istream &is, NearestNeighbor &t)
Exception handling for GeographicLib.
Definition: Constants.hpp:390
Header for GeographicLib::Constants class.
void Initialize(const std::vector< pos_t > &pts, const distfun_t &dist, int bucket=4)
void swap(NearestNeighbor &t)
Nearest-neighbor calculations.
void Save(std::ostream &os, bool bin=true) const