libcrn  3.9.5
A document image processing library
•All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CRNMultivariateGaussianMixture.h
Go to the documentation of this file.
1 /* Copyright 2008-2016 INSA Lyon, CoReNum, Université Paris Descartes, ENS-Lyon
2  *
3  * This file is part of libcrn.
4  *
5  * libcrn is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * libcrn is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with libcrn. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * file: CRNMultivariateGaussianMixture.h
19  * \author Jean DUONG, Yann LEYDIER
20  */
21 
22 
23 #ifndef CRNMULTIVARIATEGAUSSIANMIXTURE_HEADER
24 #define CRNMULTIVARIATEGAUSSIANMIXTURE_HEADER
25 
26 #include <CRNString.h>
29 #include <limits>
30 
31 namespace crn
32 {
33  /****************************************************************************/
44  {
45  public:
47  MultivariateGaussianMixture():dimension(1){}
49  MultivariateGaussianMixture(size_t d):dimension(d){}
52 
62  template<typename ITER> MultivariateGaussianMixture(ITER it_begin, ITER it_end, size_t nb_seeds = 2)
63  {
64  if (nb_seeds == 1)
65  AddMember(MultivariateGaussianPDF(MatrixDouble(MeanPattern(it_begin, it_end)), SquareMatrixDouble(MakeCovariance(it_begin, it_end))), 1.0);
66  else
67  EM(it_begin, it_end, (unsigned int)(nb_seeds));
68  }
69 
72 
74  virtual ~MultivariateGaussianMixture() override;
75 
77  void SetTo(const MultivariateGaussianMixture& m);
79  size_t GetDimension() const noexcept { return dimension; }
81  void SetDimension(size_t k) noexcept;
83  size_t GetNbMembers() const noexcept { return members.size(); }
84 
86  double GetWeight(size_t k) const;
88  MultivariateGaussianPDF GetMember(size_t k) const;
90  MatrixDouble GetMean(size_t k) const;
92  SquareMatrixDouble GetVariance(size_t k) const;
93 
95  void AddMember(const MultivariateGaussianPDF &pdf, double Weight);
97  void SetMember(const MultivariateGaussianPDF &pdf, double w, size_t k);
98 
100  double ValueAt(const MatrixDouble& X) const;
102  double ValueAt(const MatrixDouble& X, size_t k, bool w = false) const;
104  double ValueAt(const std::vector<double> &x, size_t k, bool w = false) const;
105 
107  unsigned int EM(const MatrixDouble& patterns, size_t nbSeeds = 2, double epsilon = std::numeric_limits<double>::epsilon(), size_t MaximalIterations = 100);
108 
109  unsigned int EM(const std::vector<std::vector<double> > &patterns, size_t nbSeeds = 2, double epsilon = std::numeric_limits<double>::epsilon(), size_t MaximalIterations = 100);
110 
122  template<typename ITER> unsigned int EM(ITER it_begin, ITER it_end, size_t nbSeeds = 2, double epsilon = std::numeric_limits<double>::epsilon(), size_t MaximalIterations = 100)
123  {
124  unsigned int nbIterations = 0;
125  size_t nbMembers = nbSeeds;
126  size_t nbKeys = std::distance(it_begin, it_end);
127  size_t nbPatterns = 0;
128 
129  for (auto it = it_begin; it != it_end; ++it)
130  nbPatterns += it->second;
131 
132  dimension = it_begin->first.size();
133  MatrixDouble proba(nbPatterns, nbMembers, 0.0);
134 
136  // Setup //
138 
139  members.clear();
140 
141  MatrixDouble mu(MeanPattern(it_begin, it_end), Orientation::VERTICAL);
142  SquareMatrixDouble sigma(MakeCovariance(it_begin, it_end));
143 
144  std::vector<MatrixDouble> seeds;
145  //std::vector<double> deltas;
146 
147  if (nbMembers > 1)
148  {
149  for (size_t d = 0; d < dimension; ++d)
150  {
151  auto min = std::numeric_limits<double>::infinity();
152  auto max = -std::numeric_limits<double>::infinity();
153  auto it = it_begin;
154 
155  for (size_t k = 0; k < nbKeys; ++k)
156  {
157  double v(it->first[d]);
158 
159  min = Min(min, v);
160  max = Max(max, v);
161  ++it;
162  }
163 
164  double delta = (max - min) / (double(nbMembers) - 1.0);
165 
166  MatrixDouble sample = MatrixDouble(nbMembers, 1, 0.0);
167 
168  for (size_t k = 0; k < nbMembers; ++k)
169  sample[k][0] = min + double(k) * delta;
170 
171  seeds.push_back(sample);
172  }
173  }
174  else
175  {
176  for (size_t d = 0; d < dimension; ++d)
177  {
178  MatrixDouble sample = MatrixDouble(1, 1, mu[d][0]);
179 
180  seeds.push_back(sample);
181  }
182  }
183 
184  for (size_t k = 0; k < nbMembers; ++k)
185  {
186  // Set up mean
187 
188  MatrixDouble m(dimension, 1, 0.0);
189 
190  for (size_t d = 0; d < dimension; ++d)
191  m[d][0] = seeds[d][k][0];
192 
193  // Create member
194 
195  AddMember(MultivariateGaussianPDF(m, sigma), 1.0);
196  }
197 
199  // Iterations //
201 
202  // Some computational variables
203 
204  bool reloop = true;
205  double likelihood = - std::numeric_limits<double>::infinity();
206 
207  //std::vector<double> xi;
208 
209  SquareMatrixDouble m(dimension, 0.0);
211 
212  while (reloop && (nbIterations < MaximalIterations))
213  {
214  backup = *this;
215 
216  // Step E : Expectation
217 
218  auto it = it_begin;
219 
220  for (size_t i = 0; i < nbKeys; ++i)
221  {
222  const auto& xi = it->first;
223 
224  for (auto count = 0; count < it->second; ++count)
225  {
226  double pi = 0.0;
227 
228  for (size_t k = 0; k < nbMembers; ++k)
229  {
230  double pik = ValueAt(xi, k);
231 
232  proba[i][k] = pik;
233  pi += pik;
234  }
235 
236  proba.MultRow(i, 1.0 / pi);
237  }
238 
239  ++it;
240  }
241 
242  //Step M : Maximisation
243 
244  for (size_t k = 0; k < nbMembers; ++k)
245  {
246  double cumulPk = 0.0;
247  mu.SetAll(0.0);
248  sigma.SetAll(0.0);
249 
250  auto mean_k = GetMean(k);
251  auto it = it_begin;
252 
253  std::vector<double> tmp_pattern(dimension);
254 
255  for (size_t i = 0; i < nbKeys; ++i)
256  {
257  double pik = proba[i][k];
258 
259  for (auto count = 0; count < it->second; ++count)
260  {
261  cumulPk += pik;
262 
263  // Retrieve i-th pattern
264  for (size_t j = 0; j < dimension; ++j)
265  {
266  double val = it->first[j];
267 
268  mu[j][0] += val * pik;
269  tmp_pattern[j] = val - mean_k[j][0];
270  }
271 
272  for (size_t r = 0; r < dimension; ++r)
273  for (size_t c = 0; c < dimension; ++c)
274  m[r][c] = tmp_pattern[r] * tmp_pattern[c];
275 
276  m *= pik;
277  sigma += m;
278  }
279 
280  ++it;
281  }
282 
283  double invCumulPk = 1.0 / cumulPk;
284 
285  mu *= invCumulPk;
286  sigma *= invCumulPk;
287 
288  members[k].second = double(cumulPk) / double(nbPatterns);
289  members[k].first = MultivariateGaussianPDF(mu, sigma);
290  }
291 
292  double newLikelihood = MLLE(it_begin, it_end);
293  double likelihoodDiff = newLikelihood - likelihood;
294 
295  likelihood = newLikelihood;
296  nbIterations++;
297 
298  // Global output for each loop
299 
300  if (likelihoodDiff < 0.0)
301  SetTo(backup);
302  else
303  reloop = IsValid() && (likelihoodDiff > epsilon);
304  }
305 
306  return nbIterations;
307  }
308 
310  String ToString() const;
312  String ToString(size_t k) const;
313 
315  double MLLE(const MatrixDouble& data) const;
317  double MLLE(const std::vector< std::vector<double> > &data) const;
319  template<typename ITER> double MLLE(ITER it_begin, ITER it_end) const
320  {
321  double e = 0.0;
322 
323  for (auto it = it_begin; it != it_end; ++it)
324  e += double(it->second) * log(ValueAt(it->first));
325 
326  return e;
327  }
328 
330  bool IsValid() const;
331 
332  private:
334  bool isValidMemberIndex(size_t k) const { return k < members.size(); }
335 
336  std::vector<std::pair<MultivariateGaussianPDF, double>> members;
337  size_t dimension;
340  };
341 
342  template<> struct IsClonable<MultivariateGaussianMixture> : public std::true_type {};
343 
344 }
345 
346 #endif
std::vector< double > MeanPattern(const std::vector< std::vector< double >> &m)
Return mean pattern of sample.
void SetTo(const MultivariateGaussianMixture &m)
Set mixture from another one.
unsigned int EM(const MatrixDouble &patterns, size_t nbSeeds=2, double epsilon=std::numeric_limits< double >::epsilon(), size_t MaximalIterations=100)
Expectation Maximization.
const T & Max(const T &a, const T &b)
Returns the max of two values.
Definition: CRNMath.h:47
double MLLE(const MatrixDouble &data) const
Maximum log-likelihood estimation.
size_t GetNbMembers() const noexcept
Returns the number of density functions.
void SetMember(const MultivariateGaussianPDF &pdf, double w, size_t k)
Replaces a given density function and its weight.
void AddMember(const MultivariateGaussianPDF &pdf, double Weight)
Adds a density function.
A UTF32 character string class.
Definition: CRNString.h:61
double GetWeight(size_t k) const
Returns the weight of a given density function.
virtual ~MultivariateGaussianMixture() override
Destructor.
std::vector< std::vector< double > > MakeCovariance(const std::vector< std::vector< double >> &m)
Return covariance for sample.
String ToString() const
Dumps a summary of the mixture to a string.
MultivariateGaussianMixture(size_t d)
Default constructor.
MultivariateGaussianMixture & operator=(const MultivariateGaussianMixture &)=default
void MultRow(size_t r, double v)
Scale one row from matrix.
Definition: CRNMatrix.h:320
double ValueAt(const MatrixDouble &X) const
Evaluates a pattern.
void SetAll(const T &v)
Set all elements.
Definition: CRNMatrix.h:173
size_t GetDimension() const noexcept
Returns the number of features.
double matrix class
MultivariateGaussianPDF GetMember(size_t k) const
Returns a given density function.
const T & Min(const T &a, const T &b)
Returns the min of two values.
Definition: CRNMath.h:49
SquareMatrixDouble GetVariance(size_t k) const
Returns the variance of a given density function.
bool IsValid() const
Test if mixture is valid i.e. if all paratemers have finite values.
void SetDimension(size_t k) noexcept
Sets the number of features.
#define CRN_DECLARE_CLASS_CONSTRUCTOR(classname)
Declares a class constructor.
Definition: CRNObject.h:173
Square double matrix class.
Multivariate Gaussian distribution.
unsigned int EM(ITER it_begin, ITER it_end, size_t nbSeeds=2, double epsilon=std::numeric_limits< double >::epsilon(), size_t MaximalIterations=100)
double MLLE(ITER it_begin, ITER it_end) const
Maximum log-likelihood estimation.
MatrixDouble GetMean(size_t k) const
Returns the mean of a given density function.
MultivariateGaussianMixture(ITER it_begin, ITER it_end, size_t nb_seeds=2)