libcrn  3.9.5
A document image processing library
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CRNkNN.h
Go to the documentation of this file.
1 /* Copyright 2014 INSA-Lyon
2  *
3  * This file is part of libcrn.
4  *
5  * libcrn is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * libcrn is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with libcrn. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * file: CRNkNN.h
19  * \author Yann LEYDIER
20  */
21 
22 #ifndef CRNkNN_HEADER
23 #define CRNkNN_HEADER
24 
25 #include <CRNException.h>
26 #include <CRNMath/CRNMath.h>
27 #include <vector>
28 #include <map>
29 #include <set>
30 #include <limits>
31 #include <algorithm>
32 
33 namespace crn
34 {
44  template<typename DataType, typename DistFunc> class IterativekNN
45  {
46  public:
55  IterativekNN(size_t neighborhood, const DistFunc &df, size_t fast_min = 50, size_t fast_factor = 10, size_t fast_max = 100): k(neighborhood), dist(df), min_fast(fast_min), sampling(fast_factor), max_fast(fast_max)
56  { if (k <= 1) throw ExceptionDomain("IterativekNN::IterativekNN(size_t neighborhood, const DistFunc &df): The neighborhood must be > 1."); }
65  IterativekNN(size_t neighborhood, DistFunc &&df, size_t fast_min = 50, size_t fast_factor = 10, size_t fast_max = 100): k(neighborhood), dist(std::move(df)), min_fast(fast_min), sampling(fast_factor), max_fast(fast_max)
66  { if (k <= 1) throw ExceptionDomain("IterativekNN::IterativekNN(size_t neighborhood, DistFunc &&df): The neighborhood must be > 1."); }
67 
71  void Add(const DataType &obj)
72  {
73  sample.emplace_back(obj);
74  add();
75  }
76 
80  void Add(DataType &&obj)
81  {
82  sample.emplace_back(std::move(obj));
83  add();
84  }
85 
89  void FastAdd(const DataType &obj)
90  {
91  sample.emplace_back(obj);
92  if (sample.size() < min_fast)
93  add();
94  else
95  fastAdd();
96  }
97 
101  void FastAdd(DataType &&obj)
102  {
103  sample.emplace_back(std::move(obj));
104  if (sample.size() < min_fast)
105  add();
106  else
107  fastAdd();
108  }
109 
113  std::vector<double> GetLOF() const
114  {
115  const auto ndata = sample.size();
116  if (ndata < k)
117  throw ExceptionDimension("IterativekNN::GetLOF(): #samples < k");
118 
119  std::vector<double> lrd(ndata, 0.0);
120  for (size_t el = 0; el < ndata; ++el)
121  {
122  auto itn = sample[el].nn.begin();
123  for (size_t kn = 0; kn < crn::Min(k, sample[el].nn.size()); ++kn, ++itn)
124  lrd[el] += crn::Max(itn->first, sample[itn->second].nn.rbegin()->first);
125  lrd[el] = double(k) / lrd[el];
126  }
127  std::vector<double> lof(ndata, 0.0);
128  for (size_t el = 0; el < ndata; ++el)
129  {
130  for (const auto &nn : sample[el].nn)
131  {
132  lof[el] += lrd[nn.second];
133  }
134  lof[el] /= double(k) * lrd[el];
135  }
136  return lof;
137  }
138 
144  std::vector<double> GetLoOP(double lambda) const
145  {
146  const auto ndata = sample.size();
147  if (ndata < k)
148  throw ExceptionDimension("IterativekNN::GetLoOP(): #samples < k");
149 
150  // pdist
151  std::vector<double> pdist(ndata, 0.0);
152  for (size_t tmp = 0; tmp < ndata; ++tmp)
153  {
154  for (const auto &neigh : sample[tmp].nn)
155  pdist[tmp] += Sqr(neigh.first);
156  pdist[tmp] = lambda * sqrt(pdist[tmp] / double(k));
157  }
158 
159  // PLOF
160  std::vector<double> plof(ndata, 0.0);
161  for (size_t tmp = 0; tmp < ndata; ++tmp)
162  {
163  for (const auto &neigh : sample[tmp].nn)
164  plof[tmp] += pdist[neigh.second];
165  plof[tmp] = (double(k) * pdist[tmp] / plof[tmp]) + 1;
166  }
167  double nplof = sqrt(2.0) * lambda * double(ndata) / std::accumulate(plof.begin(), plof.end(), 0.0);
168 
169  // LoOP
170  std::vector<double> loop(ndata, 0.0);
171  for (size_t tmp = 0; tmp < ndata; ++tmp)
172  loop[tmp] = Max(0.0, erf(plof[tmp] / nplof));
173 
174  return loop;
175  }
176 
177  const DataType& GetElement(size_t el) const { return sample[el].data; }
178  size_t GetNElements() const noexcept { return sample.size(); }
179 
180  private:
182  struct Element
183  {
184  Element(const DataType &d):data(d) {}
185  Element(DataType &&d):data(std::move(d)) {}
186  DataType data;
187  std::multimap<double, size_t> nn;
188  };
189 
190  inline void add()
191  {
192  sample.back().nn.insert(std::make_pair(0.0, sample.size() - 1));
193  if (sample.size() == 1)
194  return;
195  std::set<size_t> need_update;
196  // look for neighbours
197  for (size_t el = 0; el < sample.size() - 1; ++el)
198  {
199  const double d = dist(sample.back().data, sample[el].data);
200  // add to new point's kNN?
201  double maxval = (sample.back().nn.size() >= k) ? sample.back().nn.rbegin()->first : std::numeric_limits<double>::max();
202  if (d < maxval)
203  {
204  if (sample.back().nn.size() >= k)
205  sample.back().nn.erase(maxval);
206  sample.back().nn.insert(std::make_pair(d, el));
207  }
208  // add to old point's kNN?
209  if (el == (sample.size() - 1))
210  continue; // don't add twice
211  maxval = (sample[el].nn.size() >= k) ? sample[el].nn.rbegin()->first : std::numeric_limits<double>::max();
212  if (d < maxval)
213  {
214  if (sample[el].nn.size() >= k)
215  sample[el].nn.erase(maxval);
216  for (const auto &n : sample[el].nn)
217  {
218  need_update.insert(n.second);
219  }
220  sample[el].nn.insert(std::make_pair(d, sample.size() - 1));
221  }
222 
223  }
224  for (const auto &n : sample.back().nn)
225  need_update.insert(n.second);
226  }
227 
228  inline void fastAdd()
229  {
230  sample.back().nn.insert(std::make_pair(0.0, sample.size() - 1));
231  if (sample.size() == 1)
232  return;
233  std::set<size_t> tovisit, visited;
234  std::set<size_t> need_update;
235 
236  const size_t ninit = crn::Min((sample.size() - 2) / sampling + 1, max_fast);
237  for (size_t tmp = 0; tmp < ninit; ++tmp)
238  tovisit.insert((tmp * (sample.size() - 2)) / ninit);
239  // look for neighbours
240  while (!tovisit.empty())
241  {
242  const size_t el = *tovisit.begin();
243  visited.insert(el);
244  tovisit.erase(tovisit.begin());
245 
246  const double d = dist(sample.back().data, sample[el].data);
247  // add to new point's kNN?
248  double maxval = (sample.back().nn.size() >= k) ? sample.back().nn.rbegin()->first : std::numeric_limits<double>::max();
249  if (d < maxval)
250  {
251  if (sample.back().nn.size() >= k)
252  sample.back().nn.erase(maxval);
253  sample.back().nn.insert(std::make_pair(d, el));
254  for (const auto &n : sample[el].nn)
255  if (visited.find(n.second) == visited.end())
256  tovisit.insert(n.second);
257  }
258  // add to old point's kNN?
259  if (el == (sample.size() - 1))
260  continue; // don't add twice
261  maxval = (sample[el].nn.size() >= k) ? sample[el].nn.rbegin()->first : std::numeric_limits<double>::max();
262  if (d < maxval)
263  {
264  if (sample[el].nn.size() >= k)
265  sample[el].nn.erase(maxval);
266  for (const auto &n : sample[el].nn)
267  {
268  need_update.insert(n.second);
269  if (visited.find(n.second) == visited.end())
270  tovisit.insert(n.second);
271  }
272  sample[el].nn.insert(std::make_pair(d, sample.size() - 1));
273  }
274  }
275  for (const auto &n : sample.back().nn)
276  need_update.insert(n.second);
277  }
278 
279  std::vector<Element> sample;
280  DistFunc dist;
281  const size_t k;
282  const size_t sampling;
283  const size_t max_fast;
284  const size_t min_fast;
285  };
286 }
287 
288 #endif
void Add(const DataType &obj)
Adds a element with full computation of nearest neighbors.
Definition: CRNkNN.h:71
const T & Max(const T &a, const T &b)
Returns the max of two values.
Definition: CRNMath.h:47
std::vector< double > GetLOF() const
Definition: CRNkNN.h:113
IterativekNN(size_t neighborhood, const DistFunc &df, size_t fast_min=50, size_t fast_factor=10, size_t fast_max=100)
Definition: CRNkNN.h:55
void FastAdd(const DataType &obj)
Adds a element with partial computation of nearest neighbors.
Definition: CRNkNN.h:89
A generic domain error.
Definition: CRNException.h:83
void FastAdd(DataType &&obj)
Adds a element with partial computation of nearest neighbors.
Definition: CRNkNN.h:101
Iterative kNN.
Definition: CRNkNN.h:44
void Add(DataType &&obj)
Adds a element with full computation of nearest neighbors.
Definition: CRNkNN.h:80
IterativekNN(size_t neighborhood, DistFunc &&df, size_t fast_min=50, size_t fast_factor=10, size_t fast_max=100)
Definition: CRNkNN.h:65
constexpr SumType< T > Sqr(const T &v) noexcept(noexcept(v *v))
Returns the square of a value.
Definition: CRNMath.h:61
A dimension error.
Definition: CRNException.h:119
size_t GetNElements() const noexcept
Definition: CRNkNN.h:178
std::vector< double > GetLoOP(double lambda) const
Definition: CRNkNN.h:144
const T & Min(const T &a, const T &b)
Returns the min of two values.
Definition: CRNMath.h:49
const DataType & GetElement(size_t el) const
Definition: CRNkNN.h:177