libcrn  3.9.5
A document image processing library
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CRNSpectralClustering.cpp
Go to the documentation of this file.
1 /* Copyright 2012-2016 INSA-Lyon
2  *
3  * This file is part of libcrn.
4  *
5  * libcrn is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * libcrn is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with libcrn. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * file: CRNSpectralClustering.cpp
19  * \author Yann LEYDIER
20  */
21 
22 #include <iostream>
23 #include <fstream>
24 #include <set>
25 
27 #include <CRNAI/CRNClassifResult.h>
29 #include <CRNException.h>
30 #include <CRNi18n.h>
31 
32 using namespace crn;
33 
42 SpectralClustering SpectralClustering::CreateLocalScaleFromNN(const SquareMatrixDouble &distance_matrix, size_t sigma_neighborhood, double epsilon)
43 {
44  if (sigma_neighborhood < 1)
45  throw ExceptionInvalidArgument(_("Neighborhood to compute sigma must be >=1."));
46  size_t nelem = distance_matrix.GetRows();
47  if (!nelem)
48  throw ExceptionDimension(_("Empty distance matrix."));
49  std::vector<double> sigmas(nelem);
50  for (size_t r = 0; r < nelem; ++r)
51  {
52  std::set<double> dist;
53  for (size_t c = 0; c < nelem; ++c)
54  if (c != r)
55  dist.insert(distance_matrix[r][c]);
56  auto it = dist.begin();
57  std::advance(it, sigma_neighborhood - 1);
58  if (it != dist.end())
59  sigmas[r] = *it;
60  else
61  sigmas[r] = *dist.rbegin();
62  }
63 
64  SquareMatrixDouble w(nelem, 0);
65  for (size_t r = 0; r < nelem; ++r)
66  for (size_t c = 0; c < nelem; ++c)
67  {
68  if ((r == c) || (distance_matrix[r][c] > epsilon))
69  w[r][c] = 0;
70  else
71  w[r][c] = exp(-Sqr(distance_matrix[r][c]) / (2 * sigmas[r] * sigmas[c]));
72  }
73  return SpectralClustering(w);
74 }
75 
84 SpectralClustering SpectralClustering::CreateGlobalScaleFromNN(const SquareMatrixDouble &distance_matrix, size_t sigma_neighborhood, double epsilon)
85 {
86  if (sigma_neighborhood < 1)
87  throw ExceptionInvalidArgument(_("Neighborhood to compute sigma must be >=1."));
88  size_t nelem = distance_matrix.GetRows();
89  if (!nelem)
90  throw ExceptionDimension(_("Empty distance matrix."));
91  double sigma = 0;
92  for (size_t r = 0; r < nelem; ++r)
93  {
94  std::set<double> dist;
95  for (size_t c = 0; c < nelem; ++c)
96  if (c != r)
97  dist.insert(distance_matrix[r][c]);
98  auto it = dist.begin();
99  std::advance(it, sigma_neighborhood - 1);
100  if (it != dist.end())
101  sigma += *it;
102  else
103  sigma += *dist.rbegin();
104  }
105  sigma /= double(nelem);
106  //std::cout << sigma << std::endl;
107 
108  return CreateFixedScale(distance_matrix, sigma, epsilon);
109 }
110 
119 SpectralClustering SpectralClustering::CreateGlobalScaleFromDimension(const SquareMatrixDouble &distance_matrix, size_t dimension, double epsilon)
120 {
121  if (dimension < 1)
122  throw ExceptionInvalidArgument(_("Dimension must be >=1."));
123  size_t nelem = distance_matrix.GetRows();
124  if (!nelem)
125  throw ExceptionDimension(_("Empty distance matrix."));
126  double sigma = 0;
127  for (size_t r = 0; r < nelem; ++r)
128  {
129  double m = *std::max_element(distance_matrix[r] + r, distance_matrix[r] + nelem);
130  if (m > sigma) sigma = m;
131  }
132  sigma /= 2 * pow(double(nelem), 1 / double(dimension));
133  //std::cout << sigma << std::endl;
134 
135  return CreateFixedScale(distance_matrix, sigma, epsilon);
136 }
137 
146 SpectralClustering SpectralClustering::CreateFixedScale(const SquareMatrixDouble &distance_matrix, double sigma, double epsilon)
147 {
148  if (sigma < 0.0)
149  throw ExceptionInvalidArgument(_("Sigma must be positive."));
150  size_t nelem = distance_matrix.GetRows();
151  if (!nelem)
152  throw ExceptionDimension(_("Empty distance matrix."));
153 
154  sigma *= sigma * 2;
155  SquareMatrixDouble w(nelem, 0);
156  for (size_t r = 0; r < nelem; ++r)
157  for (size_t c = 0; c < nelem; ++c)
158  {
159  if ((r == c) || (distance_matrix[r][c] > epsilon))
160  w[r][c] = 0;
161  else
162  w[r][c] = exp(-Sqr(distance_matrix[r][c]) / sigma);
163  }
164  return SpectralClustering(w);
165 }
166 
172 {
174  for (size_t r = 0; r < w.GetRows(); ++r)
175  {
176  double s = 0;
177  for (size_t c = 0; c < w.GetCols(); ++c)
178  s += w[r][c];
179  if (s != 0) s = 1 / sqrt(s);
180  d[r][r] = s;
181  }
182  auto l = d;
183  l *= w;
184  l *= d;
185 
186  eigenpairs = l.MakeTQLIEigensystem(1000);
187 }
188 
192 std::vector<double> SpectralClustering::GetEigenvalues() const
193 {
194  std::vector<double> vals(eigenpairs.size());
195  auto vit = vals.begin();
196  for (auto eit = eigenpairs.rbegin(); eit != eigenpairs.rend(); ++eit)
197  *vit++ = eit->first;
198  return vals;
199 }
200 
207 {
208  if ((limit < 0.0) || (limit > 1.0))
209  throw ExceptionDomain(_("Eigenvalues should be in [0, 1]."));
210  size_t n = 1;
211  for (auto eit = eigenpairs.rbegin(); eit != eigenpairs.rend(); ++eit)
212  {
213  if (eit->first < limit)
214  break;
215  n += 1;
216  }
217  return n;
218 }
219 
226 std::vector<std::vector<double>> SpectralClustering::ProjectData(size_t ncoordinates, bool normalize) const
227 {
228  if (ncoordinates < 1)
229  throw ExceptionDimension(_("Cannot project on less than one coordinate."));
230  size_t nelem = eigenpairs.size();
231  std::vector<std::vector<double> > data(nelem, std::vector<double>(ncoordinates));
232  std::multimap<double, MatrixDouble>::const_reverse_iterator stopit;
233  if (ncoordinates >= nelem)
234  stopit = eigenpairs.rend();
235  else
236  {
237  stopit = eigenpairs.rbegin();
238  std::advance(stopit, ncoordinates);
239  }
240  size_t coord = 0;
241  for (auto eit = eigenpairs.rbegin(); eit != stopit; ++eit)
242  {
243  for (size_t tmp = 0; tmp < nelem; ++tmp)
244  data[tmp][coord] = eit->second[tmp][0];
245  coord += 1;
246  }
247  if (normalize)
248  {
249  for (auto & elem : data)
250  {
251  double s = 0;
252  for (size_t i = 0; i < ncoordinates; ++i)
253  s += Sqr(elem[i]);
254  s = sqrt(s);
255  for (size_t i = 0; i < ncoordinates; ++i)
256  elem[i] /= s;
257  }
258  }
259  return data;
260 }
261 
size_t GetRows() const noexcept
Returns the number of rows.
Definition: CRNMatrix.h:157
size_t GetCols() const noexcept
Returns the number of columns.
Definition: CRNMatrix.h:163
#define _(String)
Definition: CRNi18n.h:51
std::vector< std::vector< double > > ProjectData(size_t ncoordinates, bool normalize) const
Projects the data on the first coordinates.
static SpectralClustering CreateGlobalScaleFromDimension(const SquareMatrixDouble &distance_matrix, size_t dimension, double epsilon=std::numeric_limits< double >::max())
Clustering with global automatic scale.
static SpectralClustering CreateLocalScaleFromNN(const SquareMatrixDouble &distance_matrix, size_t sigma_neighborhood=7, double epsilon=std::numeric_limits< double >::max())
Clustering with local automatic scale.
std::vector< double > GetEigenvalues() const
Returns the eigenvalues (sorted from highest to lowest)
size_t EstimateClusterNumber(double limit=1.0) const
Estimates the number of clusters.
A generic domain error.
Definition: CRNException.h:83
constexpr SumType< T > Sqr(const T &v) noexcept(noexcept(v *v))
Returns the square of a value.
Definition: CRNMath.h:61
static SpectralClustering CreateGlobalScaleFromNN(const SquareMatrixDouble &distance_matrix, size_t sigma_neighborhood=1, double epsilon=std::numeric_limits< double >::max())
Clustering with global automatic scale.
A dimension error.
Definition: CRNException.h:119
Spectral clustering.
static SpectralClustering CreateFixedScale(const SquareMatrixDouble &distance_matrix, double sigma, double epsilon=std::numeric_limits< double >::max())
Clustering with global fixed scale.
Square double matrix class.
Invalid argument error (e.g.: nullptr pointer)
Definition: CRNException.h:107
SpectralClustering(const SpectralClustering &)=delete