libcrn  3.9.5
A document image processing library
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CRNUnivariateGaussianMixture.h
Go to the documentation of this file.
1 /* Copyright 2008-2016 INSA Lyon, CoReNum, ENS-Lyon
2  *
3  * This file is part of libcrn.
4  *
5  * libcrn is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * libcrn is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with libcrn. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * file: CRNUnivariateGaussianMixture.h
19  * \author Jean DUONG, Yann LEYDIER
20  */
21 
22 
23 #ifndef CRNUNIVARIATEGAUSSIANMIXTURE_HEADER
24 #define CRNUNIVARIATEGAUSSIANMIXTURE_HEADER
25 
26 #include <vector>
27 #include <limits>
30 
31 #include <iostream>
32 
33 namespace crn
34 {
35  /****************************************************************************/
46  {
47  public:
61  template<typename ITER> UnivariateGaussianMixture(ITER it_begin, ITER it_end, size_t nb_seeds = 2)
62  {
63  if (nb_seeds == 1)
64  {
65  auto mvd = MeanVarDev(it_begin, it_end);
66 
67  AddMember(UnivariateGaussianPDF(std::get<0>(mvd), std::get<1>(mvd)), 1.0);
68  }
69  else
70  EM(it_begin, it_end, (unsigned int)(nb_seeds));
71  }
72 
74  virtual ~UnivariateGaussianMixture() override;
75 
78 
80  size_t GetNbMembers() const noexcept { return members.size(); }
81 
83  double GetWeight(size_t k) const;
85  UnivariateGaussianPDF GetMember(size_t k) const;
87  double GetMean(size_t k) const;
89  double GetVariance(size_t k) const;
90 
92  void AddMember(UnivariateGaussianPDF pdf, double Weight);
94  void SetMember(UnivariateGaussianPDF pdf, double w, size_t k);
96  void SortMembersByMeans(bool reverse = false);
98  void SortMembersByVariances(bool reverse = false);
100  void SortMembersByWeights(bool reverse = false);
101 
103  double ValueAt(const double x) const;
105  double ValueAt(const double x, size_t k, bool weight_flag = false) const;
106 
108  unsigned int EM(const MatrixDouble& patterns, size_t nbSeeds = 2, double epsilon = std::numeric_limits<double>::epsilon(), size_t maximalIterations = 100);
110  unsigned int EM(const std::vector<double> &patterns, size_t nbSeeds = 2, double epsilon = std::numeric_limits<double>::epsilon(), size_t maximalIterations = 100);
112  template<typename ITER> unsigned int EM(ITER it_begin, ITER it_end, size_t nbSeeds = 2, double epsilon = std::numeric_limits<double>::epsilon(), size_t maximalIterations = 100);
113 
115  String ToString() const;
117  String ToString(size_t k) const;
118 
120  double MLLE(const MatrixDouble& Data) const;
122  double MLLE(const std::vector<double> &Data) const;
124  template<typename ITER> double MLLE(ITER it_begin, ITER it_end) const;
125 
127  double BIC(double l, size_t n) const {return (-2.0 * log(l) + (double)members.size() * log((double)n)) ;}
129  double BIC(const std::vector<double> &Data) const {return (-2.0 * MLLE(Data) + (double)members.size() * log((double)(Data.size()))) ;}
130 
132  std::vector<double> MakeRandomSample(size_t n = 1, size_t m = 100, bool reseed = true) const;
133 
134  void Deserialize(xml::Element &el);
135  xml::Element Serialize(xml::Element &parent) const;
136 
137  private:
139  bool isValidMemberIndex(size_t k) const { return k < members.size(); }
140 
141  std::vector<std::pair<UnivariateGaussianPDF, double>> members;
145  };
146  template<> struct IsSerializable<UnivariateGaussianMixture> : public std::true_type {};
147  template<> struct IsClonable<UnivariateGaussianMixture> : public std::true_type {};
148 
150 
151 
159  template<typename ITER> double UnivariateGaussianMixture::MLLE(ITER it_begin, ITER it_end) const
160  {
161  double e = 0.0;
162 
163  for (auto it = it_begin; it != it_end; ++it)
164  e += double(it->second) * log(ValueAt(it->first));
165 
166  return e;
167  }
168 
180  template<typename ITER> unsigned int UnivariateGaussianMixture::EM(ITER it_begin, ITER it_end, size_t nbSeeds, double epsilon, size_t maximalIterations)
181  {
182  unsigned int nbIterations = 0;
183  size_t nbMembers = size_t(nbSeeds);
184  size_t nbPatterns = 0;
185  size_t nbKeys = std::distance(it_begin, it_end);
186 
188  // Setup //
190 
191  auto min_value = std::numeric_limits<double>::infinity();
192  auto max_value = -std::numeric_limits<double>::infinity();
193 
194  for (auto it = it_begin; it != it_end; ++it)
195  {
196  auto v = it->first;
197 
198  min_value = Min(min_value, v);
199  max_value = Max(max_value, v);
200  nbPatterns += it->second;
201  }
202 
203  members.clear();
204 
205 
206  double delta = (max_value - min_value) / double(nbSeeds);
207  double Seed = min_value + delta / 2.0;
208 
209  // Heuristic setup : gaussians are placed equidistant from Min to Max value
210  // variance set to catch at least one data value
211 
212  for (size_t k = 0; k < nbMembers; ++k)
213  {
214  // Warning : if data distribution is too sparse, variance should be adapted
215 
216  bool redo = true;
217  double var = delta;
218  double sigma = sqrt(var);
219 
220  auto it = it_begin;
221 
222  while (redo)
223  {
224  redo = (Abs(Seed - it->first) > sigma);
225  ++it;
226 
227  if ((it == it_end) && redo)
228  {
229  var += delta;
230  sigma = sqrt(var);
231  it = it_begin;
232  }
233  }
234 
235  AddMember(UnivariateGaussianPDF(Seed, var), 1.0);
236  Seed += delta;
237  }
238 
239  // Heuristic setup : mobile centroids
240 
241 
242 
243  MatrixDouble Proba(nbKeys, nbMembers, 0.0);
244 
245  bool Continue = true;
246  double likelihood = 0.0;
247 
249  // Iterations //
251 
252  while(Continue)
253  {
254  // Expectation ("E" step)
255 
256  auto it = it_begin;
257 
258  for (size_t i = 0; i < nbKeys; ++i)
259  {
260  double xi = it->first;
261  double Pi = 0.0;
262 
263  for (size_t k = 0; k < nbMembers; ++k)
264  {
265  double pik = this->ValueAt(xi, k);
266 
267  Proba[i][k] = pik;
268  Pi += pik;
269  }
270 
271  Proba.MultRow(i, 1.0 / Pi);
272  ++it;
273  }
274 
275  // Maximization ("M" step)
276 
277  for (size_t k = 0; k < nbMembers; ++k)
278  {
279  const auto& Gk = members[k].first;
280  double CumulPk = 0.0;
281  double muk = Gk.GetMean();
282 
283  double mu = 0.0;
284  double v = 0.0;
285 
286  it = it_begin;
287 
288  for (size_t i = 0; i < nbKeys; ++i)
289  {
290  double pik = Proba[i][k];
291  double xi = it->first;
292  double ni = double(it->second);
293 
294  CumulPk += ni * pik;
295  mu += ni * pik * xi;
296  v += ni * pik * Sqr(xi - muk);
297  ++it;
298  }
299 
300  if (std::isfinite(mu) && std::isfinite(v))
301  {
302  mu /= CumulPk;
303  v /= CumulPk;
304  }
305  else // Numeric limit reached. Recomputation needed
306  {
307  it = it_begin;
308  mu = 0.0;
309  v = 0.0;
310 
311  for (size_t i = 0; i < nbKeys; ++i)
312  {
313  double pik = Proba[i][k];
314  double xi = it->first;
315  double ni = double(it->second);
316 
317  mu += ni * pik * xi / CumulPk;
318  v += ni * pik * Sqr(xi - muk) / CumulPk;
319  ++it;
320  }
321  }
322 
323  members[k].second = CumulPk / double(nbPatterns);
324  members[k].first = UnivariateGaussianPDF(mu, v);
325  }
326 
327  double NewLikelihood = MLLE(it_begin, it_end);
328  double LikelihoodDiff = fabs(NewLikelihood - likelihood);
329 
330  likelihood = NewLikelihood;
331  ++nbIterations;
332 
333  Continue = ((nbIterations < maximalIterations) && (LikelihoodDiff > epsilon));
334  }
335 
336  return nbIterations;
337  }
338 
339 }
340 
341 #endif
342 
#define CRN_SERIALIZATION_CONSTRUCTOR(classname)
Defines a default constructor from xml element.
Definition: CRNObject.h:165
Univariate Gaussian distribution.
XML element.
Definition: CRNXml.h:135
double GetWeight(size_t k) const
Returns the weight of a given density function.
String ToString() const
Dumps a summary of the mixture to a string.
const T & Max(const T &a, const T &b)
Returns the max of two values.
Definition: CRNMath.h:47
void AddMember(UnivariateGaussianPDF pdf, double Weight)
Adds a density function.
void SetMember(UnivariateGaussianPDF pdf, double w, size_t k)
Replaces a given density function and its weight.
UnivariateGaussianPDF GetMember(size_t k) const
Returns a given density function.
double ValueAt(const double x) const
Evaluates a pattern.
UnivariateGaussianMixture(ITER it_begin, ITER it_end, size_t nb_seeds=2)
A UTF32 character string class.
Definition: CRNString.h:61
virtual ~UnivariateGaussianMixture() override
Destructor.
size_t GetNbMembers() const noexcept
Returns the number of density functions.
UnivariateGaussianMixture()
Default constructor.
unsigned int EM(const MatrixDouble &patterns, size_t nbSeeds=2, double epsilon=std::numeric_limits< double >::epsilon(), size_t maximalIterations=100)
Expectation Maximization.
UnivariateGaussianMixture & operator=(const UnivariateGaussianMixture &)=delete
double BIC(double l, size_t n) const
Bayes Information Criterion estimation.
void MultRow(size_t r, double v)
Scale one row from matrix.
Definition: CRNMatrix.h:320
constexpr SumType< T > Sqr(const T &v) noexcept(noexcept(v *v))
Returns the square of a value.
Definition: CRNMath.h:61
void SortMembersByWeights(bool reverse=false)
Sort members by weights.
double matrix class
void Abs(Image< T > &img, typename std::enable_if< std::is_arithmetic< T >::value >::type *dummy=nullptr) noexcept
Replaces each pixel by its absolute value.
Definition: CRNImageGray.h:47
double BIC(const std::vector< double > &Data) const
Bayes Information Criterion estimation.
const T & Min(const T &a, const T &b)
Returns the min of two values.
Definition: CRNMath.h:49
#define CRN_DECLARE_CLASS_CONSTRUCTOR(classname)
Declares a class constructor.
Definition: CRNObject.h:173
double GetVariance(size_t k) const
Returns the variance of a given density function.
CRN_ALIAS_SMART_PTR(ImageBW)
std::vector< double > MakeRandomSample(size_t n=1, size_t m=100, bool reseed=true) const
Creates a data sample following the mixture's probability law.
void SortMembersByMeans(bool reverse=false)
Sort members by means.
xml::Element Serialize(xml::Element &parent) const
std::tuple< double, double, double > MeanVarDev(const std::vector< double > &v)
Return mean, variance and standard deviation of sample.
void SortMembersByVariances(bool reverse=false)
Sort members by means.
double MLLE(const MatrixDouble &Data) const
Maximum log-likelihood estimation.
double GetMean(size_t k) const
Returns the mean of a given density function.