libcrn  3.9.5
A document image processing library
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CRNkMeans.h
Go to the documentation of this file.
1 /* Copyright 2007-2016 Yann LEYDIER, CoReNum, INSA-Lyon, ENS-Lyon
2  *
3  * This file is part of libcrn.
4  *
5  * libcrn is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * libcrn is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with libcrn. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * file: CRNkMeans.h
19  * \author Yann LEYDIER
20  */
21 
22 #ifndef CRNKMEANS_HEADER
23 #define CRNKMEANS_HEADER
24 
25 #include <CRNAI/CRNBasicClassify.h>
26 #include <vector>
27 #include <random>
28 
32 namespace crn
33 {
34  /****************************************************************************/
45  template<
46  typename T,
47  typename std::enable_if<IsMetric<typename std::result_of<decltype(Dereference<T>)&(const T&)>::type>::value, int>::type = 0,
48  typename std::enable_if<IsVectorOverR<typename std::result_of<decltype(Dereference<T>)&(const T&)>::type>::value, int>::type = 0
49  > class kMeans
50  {
51  public:
52  using value_type = typename std::result_of<decltype(Dereference<T>)&(const T&)>::type;
53  kMeans(const kMeans&) = delete;
54  kMeans(kMeans&&) = default;
55  kMeans& operator=(const kMeans&) = delete;
56  kMeans& operator=(kMeans&&) = default;
57 
59  void AddPrototype(const T &sam)
60  {
61  proto.push_back(Dereference(sam));
62  }
67  {
68  const auto nb = GetNbSamples();
69  if (!nb)
70  throw ExceptionNotFound("No sample available.");
71  auto generator = std::default_random_engine{};
72  auto distribution = std::uniform_int_distribution<size_t>{0, nb - 1};
73  AddPrototype(data[distribution(generator)]);
74  }
76  size_t GetNbClasses() const noexcept { return proto.size(); }
78  const std::vector<value_type>& GetPrototypes() const noexcept { return proto; }
80  void ClearPrototypes() noexcept { proto.clear(); }
81 
85  void AddSample(const T &sam)
86  {
87  data.push_back(sam);
88  }
92  void AddSample(T &&sam)
93  {
94  data.push_back(std::move(sam));
95  }
97  size_t GetNbSamples() const noexcept { return data.size(); }
99  const std::vector<const T>& GetSamples() const noexcept { return data; }
101  void ClearSamples() noexcept { data.clear(); }
102 
107  size_t Run(size_t maxcnt = 100)
108  {
109  auto cnt = size_t(0);
110  const auto k = proto.size();
111  classes.clear();
112  classes.resize(k);
113  auto popclasses = std::vector<size_t>(k, 0);
114  auto finished = false;
115  while (!finished)
116  {
117  for (auto &c : classes)
118  c.clear();
119  // for each sample
120  for (auto elnum = size_t(0); elnum < data.size(); ++elnum)
121  {
122  // classify
123  classes[Classify(*data[elnum])].push_back(elnum);
124  }
125  finished = true;
126  // for each class
127  for (auto p = size_t(0); p < k; p++)
128  {
129  // compute mean
130  auto sum = SumType<value_type>(Zero(*data.front()));
131  for (const auto &num : classes[p])
132  sum += *data[num];
133  proto[p] = value_type(sum * (1.0 / double(classes[p].size())));
134  // check if finished
135  if (popclasses[p] != classes[p].size())
136  {
137  popclasses[p] = classes[p].size();
138  finished = false;
139  }
140  }
141  cnt += 1;
142  if (cnt > maxcnt)
143  break;
144  }
145  return cnt;
146  }
147 
153  size_t Classify(const value_type &obj, double *distance = nullptr)
154  {
155  if (proto.empty())
156  throw ExceptionDimension();
157  auto res = BasicClassify::NearestNeighbor(obj, proto.begin(), proto.end());
158  if (distance)
159  *distance = res.distance;
160  return res.class_id;
161  }
162 
168  const std::vector<size_t>& GetClass(size_t k) const
169  {
170  if (k >= classes.size())
171  throw ExceptionDimension("kMeans::GetClass(): Wrong class number.");
172  return classes[k];
173  }
174 
175  private:
176  std::vector<const T> data;
177  std::vector<value_type> proto;
178  std::vector<std::vector<size_t>> classes;
179  };
180 }
181 #endif
typename TypeInfo< T >::SumType SumType
Definition: CRNType.h:185
kMeans & operator=(const kMeans &)=delete
k-means clustering algorithm
Definition: CRNkMeans.h:49
size_t Run(size_t maxcnt=100)
Runs the k-means.
Definition: CRNkMeans.h:107
size_t GetNbClasses() const noexcept
Returns the number of classes.
Definition: CRNkMeans.h:76
void AddSample(T &&sam)
Adds one sample.
Definition: CRNkMeans.h:92
void AddSample(const T &sam)
Adds one sample.
Definition: CRNkMeans.h:85
void AddRandomPrototype()
Adds one random prototype out of the samples pool.
Definition: CRNkMeans.h:66
const std::vector< value_type > & GetPrototypes() const noexcept
Returns the vector of prototypes.
Definition: CRNkMeans.h:78
T Zero(const T &)
Returns an object of the same type that represents 0.
Definition: CRNType.h:212
void ClearPrototypes() noexcept
Clears the prototypes.
Definition: CRNkMeans.h:80
A dimension error.
Definition: CRNException.h:119
size_t Classify(const value_type &obj, double *distance=nullptr)
Finds the closest prototype.
Definition: CRNkMeans.h:153
const std::vector< size_t > & GetClass(size_t k) const
Returns the content of one class.
Definition: CRNkMeans.h:168
size_t GetNbSamples() const noexcept
Returns the number of samples.
Definition: CRNkMeans.h:97
kMeans(const kMeans &)=delete
static ClassifResult NearestNeighbor(const typename std::iterator_traits< ConstIterator >::value_type &obj, ConstIterator begin, ConstIterator end)
Finds the nearest neighbor in a set of objects.
void ClearSamples() noexcept
Clears the samples.
Definition: CRNkMeans.h:101
std::pointer_traits< P >::element_type & Dereference(const P &p)
Definition: CRNType.h:193
typename std::result_of< decltype(Dereference< T >)&(const T &)>::type value_type
Definition: CRNkMeans.h:52
void AddPrototype(const T &sam)
Adds one prototype.
Definition: CRNkMeans.h:59
const std::vector< const T > & GetSamples() const noexcept
Returns the vector of samples.
Definition: CRNkMeans.h:99
An item was not found in a container.
Definition: CRNException.h:95