libcrn  3.9.5
A document image processing library
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CRNOutliers.cpp
Go to the documentation of this file.
1 /* Copyright 2014-2015 INSA-Lyon, Université Paris Descartes
2  *
3  * This file is part of libcrn.
4  *
5  * libcrn is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * libcrn is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with libcrn. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * file: CRNOutliers.cpp
19  * \author Yann LEYDIER
20  */
21 
22 #include <CRNAI/CRNOutliers.h>
24 #include <CRNStringUTF8.h>
25 #include <CRNi18n.h>
26 #include <numeric> // for accumulate
27 
28 using namespace crn;
29 
31 template<typename DISTMAT> std::vector<double> computeLOF(const DISTMAT &distmat, size_t ndata, size_t k)
32 {
33  if (k <= 1)
34  throw ExceptionDomain(crn::StringUTF8("std::vector<double> ComputeLOF(): ") + _("The neighborhood must be > 1."));
35  if (ndata <= k)
36  throw ExceptionLogic(crn::StringUTF8("std::vector<double> ComputeLOF(): ") + _("The neighborhood is greater than the number of elements."));
37 
38  // kNN and k-distance ( = kNN.rbegin()->first)
39  std::vector<std::multimap<double, size_t>> knn(ndata);
40  for (size_t tmp1 = 0; tmp1 < ndata; ++tmp1)
41  {
42  for (size_t tmp2 = 0; tmp2 < ndata; ++tmp2)
43  {
44  double maxval = (knn[tmp1].size() >= k) ? knn[tmp1].rbegin()->first : std::numeric_limits<double>::max();
45  if (distmat[tmp1][tmp2] < maxval)
46  {
47  if (knn[tmp1].size() >= k)
48  knn[tmp1].erase(maxval);
49  knn[tmp1].insert(std::make_pair(distmat[tmp1][tmp2], tmp2));
50  }
51  }
52  }
53 
54  // local reachability density
55  std::vector<double> lrd(ndata, 0.0);
56  for (size_t tmp1 = 0; tmp1 < ndata; ++tmp1)
57  {
58  for (const auto &nn : knn[tmp1])
59  lrd[tmp1] += crn::Max(distmat[tmp1][nn.second], knn[nn.second].rbegin()->first); // reachdist(q,p) = max(d(q,p), kdist(p))
60  lrd[tmp1] = double(k) / lrd[tmp1];
61  }
62 
63  // local outlier factor
64  std::vector<double> lof(ndata, 0.0);
65  for (size_t tmp1 = 0; tmp1 < ndata; ++tmp1)
66  {
67  for (const auto &nn : knn[tmp1])
68  {
69  lof[tmp1] += lrd[nn.second];
70  }
71  lof[tmp1] /= double(k) * lrd[tmp1];
72  }
73 
74  return lof;
75 }
76 
89 std::vector<double> crn::ComputeLOF(const crn::SquareMatrixDouble &distmat, size_t k)
90 {
91  return computeLOF(distmat, distmat.GetRows(), k);
92 }
93 
106 std::vector<double> crn::ComputeLOF(const std::vector<std::vector<double>> &distmat, size_t k)
107 {
108  const auto ndata = distmat.size();
109  for (const auto &row : distmat)
110  if (row.size() != ndata)
111  throw ExceptionDimension(crn::StringUTF8("std::vector<double> ComputeLOF(): ") + _("The distance matrix is not square."));
112 
113  return computeLOF(distmat, ndata, k);
114 }
115 
117 template<typename DISTMAT> std::vector<double> computeLoOP(const DISTMAT &distmat, size_t ndata, size_t k, double lambda)
118 {
119  if (k <= 1)
120  throw ExceptionDomain(crn::StringUTF8("std::vector<double> ComputeLoOP(): ") + _("The neighborhood must be > 1."));
121  if (lambda <= 0)
122  throw ExceptionDomain(crn::StringUTF8("std::vector<double> ComputeLoOP(): ") + _("lambda must be > 0."));
123  if (ndata <= k)
124  throw ExceptionLogic(crn::StringUTF8("std::vector<double> ComputeLoOP(): ") + _("The neighborhood is greater than the number of elements."));
125 
126  // kNN
127  std::vector<std::multimap<double, size_t>> knn(ndata);
128  for (size_t tmp1 = 0; tmp1 < ndata; ++tmp1)
129  {
130  for (size_t tmp2 = 0; tmp2 < ndata; ++tmp2)
131  {
132  double maxval = (knn[tmp1].size() >= k) ? knn[tmp1].rbegin()->first : std::numeric_limits<double>::max();
133  if (distmat[tmp1][tmp2] < maxval)
134  {
135  if (knn[tmp1].size() >= k)
136  knn[tmp1].erase(maxval);
137  knn[tmp1].insert(std::make_pair(distmat[tmp1][tmp2], tmp2));
138  }
139  }
140  }
141 
142  // pdist
143  std::vector<double> pdist(ndata, 0.0);
144  for (size_t tmp = 0; tmp < ndata; ++tmp)
145  {
146  for (const auto &neigh : knn[tmp])
147  pdist[tmp] += Sqr(neigh.first);
148  pdist[tmp] = lambda * sqrt(pdist[tmp] / double(k));
149  }
150 
151  // PLOF
152  std::vector<double> plof(ndata, 0.0);
153  for (size_t tmp = 0; tmp < ndata; ++tmp)
154  {
155  for (const auto &neigh : knn[tmp])
156  plof[tmp] += pdist[neigh.second];
157  plof[tmp] = (double(k) * pdist[tmp] / plof[tmp]) + 1;
158  }
159  double nplof = sqrt(2.0) * lambda * double(ndata) / std::accumulate(plof.begin(), plof.end(), 0.0); // XXX Vérifier cette équation!!!
160 
161  // LoOP
162  std::vector<double> loop(ndata, 0.0);
163  for (size_t tmp = 0; tmp < ndata; ++tmp)
164  loop[tmp] = Max(0.0, erf(plof[tmp] / nplof));
165 
166  return loop;
167 }
168 
182 std::vector<double> crn::ComputeLoOP(const crn::SquareMatrixDouble &distmat, size_t k, double lambda)
183 {
184  return computeLoOP(distmat, distmat.GetRows(), k, lambda);
185 }
186 
200 std::vector<double> crn::ComputeLoOP(const std::vector<std::vector<double>> &distmat, size_t k, double lambda)
201 {
202  const size_t ndata = distmat.size();
203  for (const auto &row : distmat)
204  if (row.size() != ndata)
205  throw ExceptionDimension(crn::StringUTF8("std::vector<double> ComputeLoOP(const std::vector<std::vector<double>> &distmat, size_t k, double lambda): ") + _("The distance matrix is not square."));
206 
207  return computeLoOP(distmat, ndata, k, lambda);
208 }
209 
size_t GetRows() const noexcept
Returns the number of rows.
Definition: CRNMatrix.h:157
std::vector< double > ComputeLOF(const SquareMatrixDouble &distmat, size_t k)
Compute the Local Outlier Factor for each element from the distance matrix.
Definition: CRNOutliers.cpp:89
#define _(String)
Definition: CRNi18n.h:51
const T & Max(const T &a, const T &b)
Returns the max of two values.
Definition: CRNMath.h:47
A generic logic error.
Definition: CRNException.h:71
std::vector< double > computeLoOP(const DISTMAT &distmat, size_t ndata, size_t k, double lambda)
std::vector< double > ComputeLoOP(const SquareMatrixDouble &distmat, size_t k, double lambda)
compute the local outlier probability for each element from the distance matrix
A generic domain error.
Definition: CRNException.h:83
constexpr SumType< T > Sqr(const T &v) noexcept(noexcept(v *v))
Returns the square of a value.
Definition: CRNMath.h:61
A dimension error.
Definition: CRNException.h:119
std::vector< double > computeLOF(const DISTMAT &distmat, size_t ndata, size_t k)
Definition: CRNOutliers.cpp:31
Square double matrix class.
A character string class.
Definition: CRNStringUTF8.h:49