libcrn  3.9.5
A document image processing library
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CRNMultivariateGaussianMixture.cpp
Go to the documentation of this file.
1 /* Copyright 2008-2016 INSA Lyon, CoReNum, ENS-Lyon
2  *
3  * This file is part of libcrn.
4  *
5  * libcrn is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * libcrn is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with libcrn. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * file: CRNMultivariateGaussianMixture.cpp
19  * \author Jean DUONG
20  */
21 
22 #include <iostream>
23 
24 #include <CRNException.h>
31 #include <CRNStringUTF8.h>
32 #include <CRNProtocols.h>
33 #include <CRNi18n.h>
34 
35 using namespace crn;
36 
37 
42 {
43  members.clear();
44 }
45 
52 {
54 
55  members.clear();
56 
57  for (size_t tmp = 0; tmp < m.GetNbMembers(); tmp++)
58  {
59  const MultivariateGaussianPDF pdf = m.members[tmp].first;
60  double w = m.GetWeight(tmp);
61 
62  members.push_back(std::make_pair(pdf, w));
63  }
64 }
65 
66 
73 {
74  dimension = k;
75 }
76 
86 {
87  if (!isValidMemberIndex(k))
88  throw ExceptionDomain(StringUTF8("MultivariateGaussianMixture::GetMember(int k): ") +
89  _("index out of bounds."));
90 
91  return members[k].first;
92 }
93 
104 {
105  if (!isValidMemberIndex(k))
106  throw ExceptionDomain(StringUTF8("MultivariateGaussianMixture::GetMember(size_t k): ") +
107  _("index out of bounds."));
108 
109  return members[k].second;
110 }
111 
122 {
123  if (!isValidMemberIndex(k))
124  throw ExceptionDomain(StringUTF8("MultivariateGaussianMixture::GetMember(size_t k): ") +
125  _("index out of bounds."));
126 
127  return members[k].first.GetMean();
128 }
129 
140 {
141  if (!isValidMemberIndex(k))
142  throw ExceptionDomain(StringUTF8("MultivariateGaussianMixture::GetMember(size_t k): ") +
143  _("index out of bounds."));
144 
145  return members[k].first.GetVariance();
146 }
147 
156 {
157  size_t dim = pdf.GetDimension();
158 
159  if (members.empty())
160  dimension = dim;
161 
162  if (dimension == dim)
163  members.push_back(std::make_pair(pdf, w));
164  else
165  throw ExceptionDimension(_("Incompatible dimensions"));
166 }
167 
178 {
179  if (isValidMemberIndex(k))
180  {
181  size_t dim = pdf.GetDimension();
182 
183  if (dimension == dim)
184  members[k] = std::make_pair(pdf, w);
185  else
186  throw ExceptionDimension(_("Incompatible dimensions"));
187  }
188  else
189  throw ExceptionDomain(_("Invalid index"));
190 }
191 
192 
201 {
202  double d = 0.0;
203 
204  for (auto & elem : members)
205  d += elem.second * elem.first.ValueAt(X);
206 
207  return d;
208 }
209 
221 double MultivariateGaussianMixture::ValueAt(const MatrixDouble& X, size_t k, bool w) const
222 {
223  if (!isValidMemberIndex(k))
224  {
225  throw ExceptionDomain(StringUTF8("MultivariateGaussianMixture::ValueAt(const MatrixDouble& X, size_t k): ") +
226  _("index out of bounds."));
227  }
228  else if ((X.GetRows() != dimension) || (X.GetCols() != 1))
229  {
230  throw ExceptionDomain(StringUTF8("MultivariateGaussianMixture::ValueAt(const MatrixDouble* X, size_t k): ") +
231  _("incompatible dimensions."));
232  }
233 
234  if (w)
235  return members[k].second * members[k].first.ValueAt(X);
236  else
237  return members[k].first.ValueAt(X);
238 }
239 
251 double MultivariateGaussianMixture::ValueAt(const std::vector<double> &x, size_t k, bool w) const
252 {
253  if (!isValidMemberIndex(k))
254  {
255  throw ExceptionDomain(StringUTF8("MultivariateGaussianMixture::ValueAt(const std::vector<double> &x, size_t k): ") +
256  _("index out of bounds."));
257  }
258  else if (x.size() != dimension)
259  {
260  throw ExceptionDomain(StringUTF8("MultivariateGaussianMixture::ValueAt(const std::vector<double> &x, size_t k): ") +
261  _("incompatible dimensions."));
262  }
263 
264  double val = members[k].first.ValueAt(x);
265 
266  if (w)
267  val *= members[k].second;
268 
269  return val;
270 }
271 
280 {
281  size_t nbPatterns = Data.GetRows();
282  double E = 0.0;
283 
284  MatrixDouble x(dimension, 1, 0.0);
285 
286  for (size_t k = 0; k < nbPatterns; ++k)
287  {
288  for (size_t c = 0; c < dimension; ++c)
289  x.At(c, 0) = Data[k][c];
290 
291  E += log(ValueAt(x));
292  }
293 
294  return E;
295 }
296 
304 double MultivariateGaussianMixture::MLLE(const std::vector< std::vector<double> > &data) const
305 {
306  double E = 0.0;
307 
308  for (auto & elem : data)
309  E += log(ValueAt(elem));
310 
311  return E;
312 }
313 
324 unsigned int MultivariateGaussianMixture::EM(const MatrixDouble& patterns, size_t nbSeeds, double epsilon, size_t maximalIterations)
325 {
326  unsigned int nbIterations = 0;
327  size_t nbMembers = nbSeeds;
328  size_t nbPatterns = patterns.GetRows();
329 
330  dimension = patterns.GetCols();
331  MatrixDouble proba(nbPatterns, nbMembers, 0.0);
332 
334  // Setup //
336 
337  members.clear();
338 
339  MatrixDouble mu = patterns.MakeColumnMeans().Transpose();
340  SquareMatrixDouble sigma = patterns.MakeCovariance();
341 
342  std::vector<MatrixDouble> g_samples;
343  //std::vector<double> deltas;
344 
345  if (nbMembers > 1)
346  {
347  for (size_t d = 0; d < dimension; ++d)
348  {
349  MatrixDouble ck = patterns.MakeColumn(d);
350 
351  double min = ck.GetMin();
352  double max = ck.GetMax();
353  double delta = (max - min) / (double(nbMembers) - 1.0);
354 
355  MatrixDouble sample(nbMembers, 1, 0.0);
356 
357  for (size_t k = 0; k < nbMembers; k++)
358  sample[k][0] = min + double(k) * delta;
359 
360  g_samples.push_back(sample);
361  }
362  }
363  else
364  {
365  for (size_t d = 0; d < dimension; ++d)
366  {
367  MatrixDouble sample(1, 1, mu[d][0]);
368 
369  g_samples.push_back(sample);
370  }
371  }
372 
373  for (size_t k = 0; k < nbMembers; ++k)
374  {
375  // Set up mean
376 
377  MatrixDouble m(dimension, 1, 0.0);
378 
379  for (size_t d = 0; d < dimension; ++d)
380  m[d][0] = g_samples[d][k][0];
381 
382  // Create member.
383  AddMember(MultivariateGaussianPDF(m, sigma), 1.0);
384  }
385 
387  // Iterations //
389 
390  // Some computational variables
391 
392  bool reloop = true;
393  double likelihood = - std::numeric_limits<double>::infinity();
394 
395  std::vector<double> xi(dimension);
396 
397  SquareMatrixDouble m(dimension, 0.0);
399 
400  while (reloop && (nbIterations < maximalIterations))
401  {
402  backup = *this;
403 
404  // Step E : Expectation
405 
406  for (size_t i = 0; i < nbPatterns; ++i)
407  {
408  // Retrieve i-th pattern
409  for (size_t j = 0; j < dimension; ++j)
410  xi[j] = patterns[i][j];
411 
412  double pi = 0.0;
413 
414  for (size_t k = 0; k < nbMembers; ++k)
415  {
416  double pik = ValueAt(xi, k);
417 
418  proba[i][k] = pik;
419  pi += pik;
420  }
421 
422  proba.MultRow(i, 1.0 / pi);
423  }
424 
425  //Step M : Maximisation
426 
427  for (size_t k = 0; k < nbMembers; ++k)
428  {
429  double cumulPk = 0.0;
430  mu.SetAll(0.0);
431  sigma.SetAll(0.0);
432 
433  auto mean_k = GetMean(k);
434 
435  for (size_t i = 0; i < nbPatterns; ++i)
436  {
437  double pik = proba[i][k];
438  cumulPk += pik;
439 
440  // Retrieve i-th pattern
441  for (size_t j = 0; j < dimension; ++j)
442  {
443  double val = patterns[i][j];
444 
445  mu[j][0] += val * pik;
446  xi[j] = val - mean_k[j][0];
447  }
448 
449  for (size_t r = 0; r < dimension; ++r)
450  for (size_t c = 0; c < dimension; ++c)
451  m[r][c] = xi[r] * xi[c];
452 
453  m *= pik;
454  sigma += m;
455  }
456 
457  double invCumulPk = 1.0 / cumulPk;
458 
459  mu *= invCumulPk;
460  sigma *= invCumulPk;
461 
462  members[k].second = double(cumulPk) / double(nbPatterns);
463  members[k].first = MultivariateGaussianPDF(mu, sigma);
464  }
465 
466  double newLikelihood = MLLE(patterns);
467  double likelihoodDiff = newLikelihood - likelihood;
468 
469  likelihood = newLikelihood;
470  nbIterations++;
471 
472  // Global output for each loop
473 
474  //std::cout << "lle = " << newLikelihood << std::endl;
475 
476  if (likelihoodDiff < 0.0)
477  SetTo(backup);
478  else
479  reloop = IsValid() && (likelihoodDiff > epsilon);
480  }
481 
482  return nbIterations;
483 }
484 
495 unsigned int MultivariateGaussianMixture::EM(const std::vector<std::vector<double> > &patterns, size_t nbSeeds, double epsilon, size_t maximalIterations)
496 {
497  unsigned int nbIterations = 0;
498  size_t nbMembers = nbSeeds;
499  size_t nbPatterns = patterns.size();
500 
501  dimension = patterns.front().size();
502  MatrixDouble proba(nbPatterns, nbMembers, 0.0);
503 
505  // Setup //
507 
508  members.clear();
509 
511  SquareMatrixDouble sigma(MakeCovariance(patterns));
512 
513  std::vector<MatrixDouble> g_samples;
514  //std::vector<double> deltas;
515 
516  if (nbMembers > 1)
517  {
518  for (size_t d = 0; d < dimension; ++d)
519  {
520  double min = std::numeric_limits<double>::infinity();
521  double max = -std::numeric_limits<double>::infinity();
522 
523  for (size_t k = 0; k < nbPatterns; ++k)
524  {
525  double v = patterns[k][d];
526 
527  min = Min(min, v);
528  max = Max(max, v);
529  }
530 
531  double delta = (max - min) / (double(nbMembers) - 1.0);
532 
533  MatrixDouble sample = MatrixDouble(nbMembers, 1, 0.0);
534 
535  for (size_t k = 0; k < nbMembers; ++k)
536  sample[k][0] = min + double(k) * delta;
537 
538  g_samples.push_back(sample);
539  }
540  }
541  else
542  {
543  for (size_t d = 0; d < dimension; ++d)
544  {
545  MatrixDouble sample = MatrixDouble(1, 1, mu[d][0]);
546 
547  g_samples.push_back(sample);
548  }
549  }
550 
551  for (size_t k = 0; k < nbMembers; ++k)
552  {
553  // Set up mean
554 
555  MatrixDouble m(dimension, 1, 0.0);
556 
557  for (size_t d = 0; d < dimension; ++d)
558  m[d][0] = g_samples[d][k][0];
559 
560  // Create member
561 
562  AddMember(MultivariateGaussianPDF(m, sigma), 1.0);
563  }
564 
566  // Iterations //
568 
569  // Some computational variables
570 
571  bool reloop = true;
572  double likelihood = - std::numeric_limits<double>::infinity();
573 
574  std::vector<double> xi;
575 
576  SquareMatrixDouble m(dimension, 0.0);
578 
579  while (reloop && (nbIterations < maximalIterations))
580  {
581  backup = *this;
582 
583  // Step E : Expectation
584 
585  for (size_t i = 0; i < nbPatterns; ++i)
586  {
587  xi = patterns[i];
588 
589  double pi = 0.0;
590 
591  for (size_t k = 0; k < nbMembers; ++k)
592  {
593  double pik = ValueAt(xi, k);
594 
595  proba[i][k] = pik;
596  pi += pik;
597  }
598 
599  proba.MultRow(i, 1.0 / pi);
600  }
601 
602  //Step M : Maximisation
603 
604  for (size_t k = 0; k < nbMembers; ++k)
605  {
606  double cumulPk = 0.0;
607  mu.SetAll(0.0);
608  sigma.SetAll(0.0);
609 
610  auto mean_k = GetMean(k);
611 
612  for (size_t i = 0; i < nbPatterns; ++i)
613  {
614  double pik = proba[i][k];
615  cumulPk += pik;
616 
617  // Retrieve i-th pattern
618  for (size_t j = 0; j < dimension; ++j)
619  {
620  double val = patterns[i][j];
621 
622  mu[j][0] += val * pik;
623  xi[j] = val - mean_k[j][0];
624  }
625 
626  for (size_t r = 0; r < dimension; ++r)
627  for (size_t c = 0; c < dimension; ++c)
628  m[r][c] = xi[r] * xi[c];
629 
630  m *= pik;
631  sigma += m;
632  }
633 
634  double invCumulPk = 1.0 / cumulPk;
635 
636  mu *= invCumulPk;
637  sigma *= invCumulPk;
638 
639  members[k].second = double(cumulPk) / double(nbPatterns);
640  members[k].first = MultivariateGaussianPDF(mu, sigma);
641  }
642 
643  double newLikelihood = MLLE(patterns);
644  double likelihoodDiff = newLikelihood - likelihood;
645 
646  likelihood = newLikelihood;
647  nbIterations++;
648 
649  // Global output for each loop
650 
651  if (likelihoodDiff < 0.0)
652  SetTo(backup);
653  else
654  reloop = IsValid() && (likelihoodDiff > epsilon);
655  }
656 
657  return nbIterations;
658 }
659 
666 {
667  String s;
668 
669  for (size_t k = 0; k < members.size(); k++)
670  s += ToString(k) + U"\n";
671 
672  return s;
673 }
674 
683 {
684  String s;
685 
686  if (isValidMemberIndex(k))
687  {
688  s += U"Member : ";
689  s += k;
690  s += U" [0..";
691  s += members.size() - 1;
692  s += U"]\nWeight = ";
693  s += members[k].second;
694  s += U"\n\nMean = \n";
695  s += ((members[k].first).GetMean()).ToString();
696  s += U"\nVariance = \n";
697  s += ((members[k].first).GetVariance()).ToString();
698  }
699  return s;
700 }
701 
709 {
710  bool status = true;
711 
712  size_t k = 0;
713 
714  while ((k < GetNbMembers()) && status)
715  {
716  double w = GetWeight(k);
717 
718  status = GetMember(k).IsValid() && (w == w);
719  k++;
720  }
721 
722  return status;
723 }
724 
726  Cloner::Register<MultivariateGaussianMixture>();
727 CRN_END_CLASS_CONSTRUCTOR(MultivariateGaussianMixture)
size_t GetRows() const noexcept
Returns the number of rows.
Definition: CRNMatrix.h:157
size_t GetCols() const noexcept
Returns the number of columns.
Definition: CRNMatrix.h:163
Matrix MakeCovariance() const
Definition: CRNMatrix.h:624
std::vector< double > MeanPattern(const std::vector< std::vector< double >> &m)
Return mean pattern of sample.
void SetTo(const MultivariateGaussianMixture &m)
Set mixture from another one.
virtual Matrix & Transpose()
Inplace transposition.
Definition: CRNMatrix.h:472
unsigned int EM(const MatrixDouble &patterns, size_t nbSeeds=2, double epsilon=std::numeric_limits< double >::epsilon(), size_t MaximalIterations=100)
Expectation Maximization.
#define _(String)
Definition: CRNi18n.h:51
const T & Max(const T &a, const T &b)
Returns the max of two values.
Definition: CRNMath.h:47
double MLLE(const MatrixDouble &data) const
Maximum log-likelihood estimation.
Matrix MakeColumn(size_t k) const
Extracts one column from matrix.
Definition: CRNMatrix.h:513
size_t GetNbMembers() const noexcept
Returns the number of density functions.
void SetMember(const MultivariateGaussianPDF &pdf, double w, size_t k)
Replaces a given density function and its weight.
#define CRN_END_CLASS_CONSTRUCTOR(classname)
Defines a class constructor.
Definition: CRNObject.h:198
void AddMember(const MultivariateGaussianPDF &pdf, double Weight)
Adds a density function.
A UTF32 character string class.
Definition: CRNString.h:61
double GetWeight(size_t k) const
Returns the weight of a given density function.
virtual ~MultivariateGaussianMixture() override
Destructor.
std::vector< std::vector< double > > MakeCovariance(const std::vector< std::vector< double >> &m)
Return covariance for sample.
A generic domain error.
Definition: CRNException.h:83
T GetMin() const
Definition: CRNMatrix.h:432
String ToString() const
Dumps a summary of the mixture to a string.
void MultRow(size_t r, double v)
Scale one row from matrix.
Definition: CRNMatrix.h:320
double ValueAt(const MatrixDouble &X) const
Evaluates a pattern.
void SetAll(const T &v)
Set all elements.
Definition: CRNMatrix.h:173
A dimension error.
Definition: CRNException.h:119
size_t GetDimension() const noexcept
Returns the number of features.
double matrix class
size_t GetDimension() const noexcept
Returns the number of features.
MultivariateGaussianPDF GetMember(size_t k) const
Returns a given density function.
Matrix MakeColumnMeans() const
Definition: CRNMatrix.h:591
const T & Min(const T &a, const T &b)
Returns the min of two values.
Definition: CRNMath.h:49
SquareMatrixDouble GetVariance(size_t k) const
Returns the variance of a given density function.
const T & At(size_t pos) const noexcept
Definition: CRNMatrix.h:165
bool IsValid() const
Test if mixture is valid i.e. if all paratemers have finite values.
void SetDimension(size_t k) noexcept
Sets the number of features.
Square double matrix class.
bool IsValid() const
Check if PDF is valid (with finite values)
T GetMax() const
Definition: CRNMatrix.h:438
A character string class.
Definition: CRNStringUTF8.h:49
Multivariate Gaussian distribution.
MatrixDouble GetMean(size_t k) const
Returns the mean of a given density function.
#define CRN_BEGIN_CLASS_CONSTRUCTOR(classname)
Defines a class constructor.
Definition: CRNObject.h:185