libcrn  3.9.5
A document image processing library
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CRNkMedoids.cpp
Go to the documentation of this file.
1 /* Copyright 2014 CoReNum
2  *
3  * This file is part of libcrn.
4  *
5  * libcrn is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * libcrn is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with libcrn. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * file: CRNkMedoids.cpp
19  * \author Yann LEYDIER
20  */
21 
22 #include <CRNAI/CRNkMedoids.h>
23 #include <CRNMath/CRNMath.h>
24 #include <algorithm>
25 #include <numeric> // for accumulate
26 
27 using namespace crn;
28 using namespace kmedoids;
29 
30 std::vector<size_t> init::Central::operator()(const std::vector<std::vector<double>> &distmat) const
31 {
32  const auto nelem = distmat.size();
33 
34  // sum on each line
35  auto lsum = std::vector<double>(nelem);
36  std::transform(distmat.begin(), distmat.end(), lsum.begin(),
37  [](const std::vector<double> &l){ return std::accumulate(l.begin(), l.end(), 0.0); });
38 
39  // v
40  auto v = std::vector<double>(nelem, 0.0);
41  for (size_t j = 0; j < nelem; ++j)
42  for (size_t i = 0; i < nelem; ++i)
43  v[j] += distmat[i][j] / lsum[i];
44 
45  // initial medoids
46  auto medoids = std::vector<size_t>(k);
47  for (size_t tmp = 0; tmp < k; ++tmp)
48  {
49  auto minit = std::min_element(v.begin(), v.end());
50  medoids[tmp] = minit - v.begin();
51  *minit = std::numeric_limits<double>::max();
52  }
53 
54  return medoids;
55 }
56 
57 std::vector<size_t> init::PAM::operator()(const std::vector<std::vector<double>> &distmat) const
58 {
59  const auto nelem = distmat.size();
60 
61  // sum on each line
62  auto lsum = std::vector<double>(nelem);
63  std::transform(distmat.begin(), distmat.end(), lsum.begin(),
64  [](const std::vector<double> &l){ return std::accumulate(l.begin(), l.end(), 0.0); });
65 
66  auto medoids = std::vector<size_t>(k);
67  // choose the element with the lowest distance to others
68  medoids[0] = std::min_element(lsum.begin(), lsum.end()) - lsum.begin();
69 
70  for (size_t m = 1; m < k; ++m)
71  {
72  // gain to add each element
73  auto g = std::vector<double>(nelem, 0.0);
74  for (size_t i = 0; i < nelem; ++i)
75  {
76  // check if already a medoid
77  auto found = false;
78  for (size_t tmp = 0; tmp < m; ++tmp)
79  if (medoids[tmp] == i) { found = true; break; }
80  if (found) continue;
81  for (size_t j = 0; j < nelem; ++j)
82  {
83  // look for closest medoid
84  auto ndist = std::numeric_limits<double>::max();
85  for (size_t tmp = 0; tmp < m; ++tmp)
86  if (distmat[j][medoids[tmp]] < ndist)
87  {
88  ndist = distmat[j][medoids[tmp]];
89  }
90  g[i] += crn::Max(ndist - distmat[j][i], 0.0);
91  }
92  }
93  medoids[m] = std::max_element(g.begin(), g.end()) - g.begin();
94  }
95  return medoids;
96 }
97 
98 void update::Local::operator()(std::vector<size_t> &medoids, const std::vector<std::multimap<double, size_t>> &clusters, const std::vector<std::vector<double>> &distmat) const
99 {
100  for (size_t m = 0; m < medoids.size(); ++m)
101  {
102  auto ndist = std::numeric_limits<double>::max();
103  auto nobj = size_t{0};
104  for (const auto &o : clusters[m])
105  {
106  auto sdist = 0.0;
107  for (const auto &n : clusters[m])
108  {
109  sdist += distmat[n.second][o.second];
110  }
111  if (sdist < ndist)
112  {
113  ndist = sdist;
114  nobj = o.second;
115  }
116  }
117  medoids[m] = nobj;
118  }
119 }
120 
121 void update::PAM::operator()(std::vector<size_t> &medoids, const std::vector<std::multimap<double, size_t>> &clusters, const std::vector<std::vector<double>> &distmat) const
122 {
123  size_t mini = 0, minh = 0;
124  auto mintih = std::numeric_limits<double>::max();
125  for (size_t i = 0; i < medoids.size(); ++i)
126  {
127  for (size_t c = 0; c < clusters.size(); ++c)
128  for (const auto &h : clusters[c])
129  {
130  // check if this is the medoid of the cluster
131  if (h.second == medoids[c]) continue;
132  auto tih = 0.0;
133  for (size_t tc = 0; tc < clusters.size(); ++tc)
134  for (const auto &j : clusters[tc])
135  {
136  if (distmat[medoids[i]][j.second] > j.first) // j is not in class i
137  tih += crn::Min(distmat[j.second][h.second] - j.first, 0.0);
138  else
139  { // j is in class i
140  auto ndist = std::numeric_limits<double>::max();
141  for (size_t tk = 0; tk < clusters.size(); ++tk)
142  {
143  if (tk == tc) continue;
144  if (distmat[j.second][medoids[tk]] < ndist)
145  {
146  ndist = distmat[j.second][medoids[tk]];
147  }
148  }
149  tih += crn::Min(distmat[j.second][h.second], ndist) - j.first;
150  } // j is in class i
151  if (tih > mintih)
152  break; // slight optimization
153  } // for each element
154  if (tih < mintih)
155  {
156  mintih = tih;
157  mini = i;
158  minh = h.second;
159  }
160  } // for each non-medoid element (h)
161  } // for each medoid (i)
162  //std::cout << "tih = " << mintih << " for " << medoids[mini] << " -> " << minh << std::endl;
163  if (mintih < 0)
164  medoids[mini] = minh;
165 }
166 
std::vector< size_t > operator()(const std::vector< std::vector< double >> &distmat) const
Definition: CRNkMedoids.cpp:57
const T & Max(const T &a, const T &b)
Returns the max of two values.
Definition: CRNMath.h:47
void operator()(std::vector< size_t > &medoids, const std::vector< std::multimap< double, size_t >> &clusters, const std::vector< std::vector< double >> &distmat) const
Definition: CRNkMedoids.cpp:98
std::vector< size_t > operator()(const std::vector< std::vector< double >> &distmat) const
Definition: CRNkMedoids.cpp:30
void operator()(std::vector< size_t > &medoids, const std::vector< std::multimap< double, size_t >> &clusters, const std::vector< std::vector< double >> &distmat) const
const T & Min(const T &a, const T &b)
Returns the min of two values.
Definition: CRNMath.h:49