55 IterativekNN(
size_t neighborhood,
const DistFunc &df,
size_t fast_min = 50,
size_t fast_factor = 10,
size_t fast_max = 100): k(neighborhood), dist(df), min_fast(fast_min), sampling(fast_factor), max_fast(fast_max)
56 {
if (k <= 1)
throw ExceptionDomain(
"IterativekNN::IterativekNN(size_t neighborhood, const DistFunc &df): The neighborhood must be > 1."); }
65 IterativekNN(
size_t neighborhood, DistFunc &&df,
size_t fast_min = 50,
size_t fast_factor = 10,
size_t fast_max = 100): k(neighborhood), dist(std::move(df)), min_fast(fast_min), sampling(fast_factor), max_fast(fast_max)
66 {
if (k <= 1)
throw ExceptionDomain(
"IterativekNN::IterativekNN(size_t neighborhood, DistFunc &&df): The neighborhood must be > 1."); }
71 void Add(
const DataType &obj)
73 sample.emplace_back(obj);
80 void Add(DataType &&obj)
82 sample.emplace_back(std::move(obj));
91 sample.emplace_back(obj);
92 if (sample.size() < min_fast)
103 sample.emplace_back(std::move(obj));
104 if (sample.size() < min_fast)
115 const auto ndata = sample.size();
119 std::vector<double> lrd(ndata, 0.0);
120 for (
size_t el = 0; el < ndata; ++el)
122 auto itn = sample[el].nn.begin();
123 for (
size_t kn = 0; kn <
crn::Min(k, sample[el].nn.size()); ++kn, ++itn)
124 lrd[el] +=
crn::Max(itn->first, sample[itn->second].nn.rbegin()->first);
125 lrd[el] = double(k) / lrd[el];
127 std::vector<double> lof(ndata, 0.0);
128 for (
size_t el = 0; el < ndata; ++el)
130 for (
const auto &nn : sample[el].nn)
132 lof[el] += lrd[nn.second];
134 lof[el] /= double(k) * lrd[el];
144 std::vector<double>
GetLoOP(
double lambda)
const
146 const auto ndata = sample.size();
151 std::vector<double> pdist(ndata, 0.0);
152 for (
size_t tmp = 0; tmp < ndata; ++tmp)
154 for (
const auto &neigh : sample[tmp].nn)
155 pdist[tmp] +=
Sqr(neigh.first);
156 pdist[tmp] = lambda * sqrt(pdist[tmp] /
double(k));
160 std::vector<double> plof(ndata, 0.0);
161 for (
size_t tmp = 0; tmp < ndata; ++tmp)
163 for (
const auto &neigh : sample[tmp].nn)
164 plof[tmp] += pdist[neigh.second];
165 plof[tmp] = (double(k) * pdist[tmp] / plof[tmp]) + 1;
167 double nplof = sqrt(2.0) * lambda * double(ndata) / std::accumulate(plof.begin(), plof.end(), 0.0);
170 std::vector<double> loop(ndata, 0.0);
171 for (
size_t tmp = 0; tmp < ndata; ++tmp)
172 loop[tmp] =
Max(0.0, erf(plof[tmp] / nplof));
177 const DataType&
GetElement(
size_t el)
const {
return sample[el].data; }
184 Element(
const DataType &d):data(d) {}
185 Element(DataType &&d):data(std::move(d)) {}
187 std::multimap<double, size_t> nn;
192 sample.back().nn.insert(std::make_pair(0.0, sample.size() - 1));
193 if (sample.size() == 1)
195 std::set<size_t> need_update;
197 for (
size_t el = 0; el < sample.size() - 1; ++el)
199 const double d = dist(sample.back().data, sample[el].data);
201 double maxval = (sample.back().nn.size() >= k) ? sample.back().nn.rbegin()->first : std::numeric_limits<double>::max();
204 if (sample.back().nn.size() >= k)
205 sample.back().nn.erase(maxval);
206 sample.back().nn.insert(std::make_pair(d, el));
209 if (el == (sample.size() - 1))
211 maxval = (sample[el].nn.size() >= k) ? sample[el].nn.rbegin()->first : std::numeric_limits<double>::max();
214 if (sample[el].nn.size() >= k)
215 sample[el].nn.erase(maxval);
216 for (
const auto &n : sample[el].nn)
218 need_update.insert(n.second);
220 sample[el].nn.insert(std::make_pair(d, sample.size() - 1));
224 for (
const auto &n : sample.back().nn)
225 need_update.insert(n.second);
228 inline void fastAdd()
230 sample.back().nn.insert(std::make_pair(0.0, sample.size() - 1));
231 if (sample.size() == 1)
233 std::set<size_t> tovisit, visited;
234 std::set<size_t> need_update;
236 const size_t ninit =
crn::Min((sample.size() - 2) / sampling + 1, max_fast);
237 for (
size_t tmp = 0; tmp < ninit; ++tmp)
238 tovisit.insert((tmp * (sample.size() - 2)) / ninit);
240 while (!tovisit.empty())
242 const size_t el = *tovisit.begin();
244 tovisit.erase(tovisit.begin());
246 const double d = dist(sample.back().data, sample[el].data);
248 double maxval = (sample.back().nn.size() >= k) ? sample.back().nn.rbegin()->first : std::numeric_limits<double>::max();
251 if (sample.back().nn.size() >= k)
252 sample.back().nn.erase(maxval);
253 sample.back().nn.insert(std::make_pair(d, el));
254 for (
const auto &n : sample[el].nn)
255 if (visited.find(n.second) == visited.end())
256 tovisit.insert(n.second);
259 if (el == (sample.size() - 1))
261 maxval = (sample[el].nn.size() >= k) ? sample[el].nn.rbegin()->first : std::numeric_limits<double>::max();
264 if (sample[el].nn.size() >= k)
265 sample[el].nn.erase(maxval);
266 for (
const auto &n : sample[el].nn)
268 need_update.insert(n.second);
269 if (visited.find(n.second) == visited.end())
270 tovisit.insert(n.second);
272 sample[el].nn.insert(std::make_pair(d, sample.size() - 1));
275 for (
const auto &n : sample.back().nn)
276 need_update.insert(n.second);
279 std::vector<Element> sample;
282 const size_t sampling;
283 const size_t max_fast;
284 const size_t min_fast;
void Add(const DataType &obj)
Adds a element with full computation of nearest neighbors.
const T & Max(const T &a, const T &b)
Returns the max of two values.
std::vector< double > GetLOF() const
IterativekNN(size_t neighborhood, const DistFunc &df, size_t fast_min=50, size_t fast_factor=10, size_t fast_max=100)
void FastAdd(const DataType &obj)
Adds a element with partial computation of nearest neighbors.
void FastAdd(DataType &&obj)
Adds a element with partial computation of nearest neighbors.
void Add(DataType &&obj)
Adds a element with full computation of nearest neighbors.
IterativekNN(size_t neighborhood, DistFunc &&df, size_t fast_min=50, size_t fast_factor=10, size_t fast_max=100)
constexpr SumType< T > Sqr(const T &v) noexcept(noexcept(v *v))
Returns the square of a value.
size_t GetNElements() const noexcept
std::vector< double > GetLoOP(double lambda) const
const T & Min(const T &a, const T &b)
Returns the min of two values.
const DataType & GetElement(size_t el) const