libcrn  3.9.5
A document image processing library
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CRNStatisticSample.cpp
Go to the documentation of this file.
1 /* Copyright 2014-2015 INSA Lyon
2  *
3  * This file is part of libcrn.
4  *
5  * libcrn is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * libcrn is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with libcrn. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * file: CRNStatisticSample.cpp
19  * \author Jean DUONG, Yann LEYDIER
20  */
21 
22 #include <CRNException.h>
23 #include <CRNMath/CRNMath.h>
28 #include <CRNStringUTF8.h>
29 #include <functional>
30 #include <limits>
31 #include <CRNi18n.h>
32 
33 using namespace crn;
34 using namespace std::placeholders;
35 
40 double crn::Mean(const std::vector<double> &v)
41 {
42  const auto s = double(v.size());
43  auto m = std::accumulate(v.begin(), v.end(), 0.0);
44 
45  if (!std::isinf(m))
46  return m / s;
47  else
48  {
49  m = 0.0;
50  for (auto x : v)
51  m += x / s;
52  return m;
53  }
54 }
55 
60 std::vector<double> crn::MeanPattern(const std::vector<std::vector<double>> &m)
61 {
62  auto mp = std::vector<double>(m.front().size(), 0.0);
63 
64  for (const auto &row : m)
65  for (auto k : Range(row))
66  mp[k] += row[k];
67 
68  auto valid = true;
69  for (auto k : Range(mp))
70  if (std::isinf(mp[k]))
71  {
72  valid = false;
73  break;
74  }
75 
76  if (valid)
77  std::transform(mp.begin(), mp.end(), mp.begin(), std::bind(std::divides<double>{}, _1, double(m.size())));
78  else
79  {
80  auto s = double(m.size());
81  std::vector<double>(m.front().size(), 0.0).swap(mp);
82 
83  for (const auto &row : m)
84  for (auto k : Range(row))
85  mp[k] += row[k] / s;
86  }
87 
88  return mp;
89 }
90 
95 double crn::StdDeviation(const std::vector<double> &v)
96 {
97  return sqrt(Variance(v));
98 }
99 
104 double crn::Variance(const std::vector<double> &v)
105 {
106  const auto m = Mean(v);
107  auto var = 0.0;
108  const auto s = double(v.size());
109 
110  for (double d : v)
111  var += Sqr(d - m);
112 
113  if (!std::isinf(var))
114  var /= s;
115  else
116  {
117  var = 0.0;
118  for (double d : v)
119  var += Sqr(d - m) / s;
120  }
121 
122  return var;
123 }
124 
129 std::vector<std::vector<double>> crn::MakeCovariance(const std::vector<std::vector<double>> &m)
130 {
131  const auto card = m.size();
132  const auto dim = m.front().size();
133  auto cov = std::vector<std::vector<double>>(dim, std::vector<double>(dim));
134 
135  for (size_t i = 0; i < dim; ++i)
136  for (auto j = i; j < dim; ++j)
137  {
138  for (size_t k = 0; k < card; ++k)
139  cov[i][j] += m[k][i] * m[k][j];
140  cov[i][j] /= double(card);
141 
142  if (j != i)
143  cov[j][i] = cov[i][j];
144  }
145  return cov;
146 }
147 
152 std::tuple<double, double, double> crn::MeanVarDev(const std::vector<double> &v)
153 {
154  // Compute cumul and cumul of squares in a same loop
155  auto m = 0.0;
156  auto m_2 = 0.0;
157  const auto s = double(v.size());
158 
159  for (double x : v)
160  {
161  m += x;
162  m_2 += Sqr(x);
163  }
164 
165  if (!std::isinf(m))
166  {
167  m /= s;
168  m_2 /= s;
169  }
170  else // If cumul too large, re-compute mean with values downscaled by cardinal
171  {
172  m = 0.0;
173  m_2 = 0.0;
174 
175  for (double x : v)
176  {
177  m += x / s;
178  m_2 += Sqr(x) / s;
179  }
180  }
181 
182  if (!std::isinf(m_2))
183  {
184  auto var = m_2 - Sqr(m);
185  return std::make_tuple(m, var, sqrt(var));
186  }
187  else // Konig-Huygens formula for variance cannot be used (too large values in cumul of squares)
188  {
189  auto var = 0.0;
190  for (double x : v)
191  var += Sqr(x - m);
192 
193  if (!std::isinf(var))
194  var /= s;
195  else
196  {
197  var = 0.0;
198  for (double x : v)
199  var += Sqr(x - m) / s;
200  }
201  return std::make_tuple(m, var, sqrt(var));
202  }
203 }
204 
216 std::vector<double> crn::Quantiles(const std::vector<double> &v, size_t q, bool sort_flag)
217 {
218  if ((q < 3) || (q > v.size()))
219  throw ExceptionDomain(StringUTF8("StatisticSample::Quantiles(const std::vector<double>&, size_t, bool): ") + _("Illegal range."));
220 
221  auto qt = std::vector<double>(q - 1);
222  const auto jump = v.size() / q;
223  auto index = jump;
224 
225  if (sort_flag)
226  {
227  auto w = v;
228  std::sort(w.begin(), w.end());
229  for(size_t k = 0; k < q - 1; ++k)
230  {
231  qt[k] = w[index];
232  index += jump;
233  }
234  }
235  else
236  for(size_t k = 0; k < q - 1; ++k)
237  {
238  qt[k] = v[index];
239  index += jump;
240  }
241 
242  return qt;
243 }
244 
250 Histogram crn::MakeHistogram(const std::vector<double> &v, size_t nb_bins)
251 {
252  auto mM = MinMax(v);
253  auto left_bound = std::get<0>(mM);
254  auto right_bound = std::get<1>(mM);
255  auto range = right_bound - left_bound;
256  auto delta = range / double(nb_bins);
257  auto h = Histogram(nb_bins);
258  if (nb_bins == 1) // trivial case
259  h.SetBin(0, (unsigned int)v.size());
260  else
261  for (auto d : v)
262  {
263  auto id = size_t((d - left_bound) / delta);
264  if (id >= nb_bins)
265  id = nb_bins - 1;
266  h.IncBin(id);
267  }
268  return h;
269 }
270 
275 Histogram crn::MakeHistogramSquareRoot(const std::vector<double> &v)
276 {
277  return MakeHistogram(v, size_t(sqrt(double(v.size()))));
278 }
279 
284 Histogram crn::MakeHistogramSturges(const std::vector<double> &v)
285 {
286  return MakeHistogram(v, 1 + size_t(log2(double(v.size()))));
287 }
288 
293 Histogram crn::MakeHistogramRice(const std::vector<double> &v)
294 {
295  return MakeHistogram(v, size_t(2 * pow(double(v.size()), 1.0 / 3.0)));
296 }
297 
302 Histogram crn::MakeHistogramScott(const std::vector<double> &v)
303 {
304  const auto delta = 3.5 * StdDeviation(v) / pow(double(v.size()), 1.0 / 3.0);
305  const auto& mM = MinMax(v);
306  return MakeHistogram(v, 1 + size_t((std::get<1>(mM) - std::get<0>(mM)) / delta));
307 }
308 
313 Histogram crn::MakeHistogramFreedmanDiaconis(const std::vector<double> &v, bool sort_flag)
314 {
315  const auto& qt = Quantiles(v, size_t(4), sort_flag);
316  const auto delta = 2 * (qt.back() - qt.front()) / pow(double(v.size()), 1.0 / 3.0);
317  const auto& mM = MinMax(v);
318  return MakeHistogram(v, 1 + size_t((std::get<1>(mM) - std::get<0>(mM)) / delta));
319 }
320 
326 UnivariateGaussianMixture crn::MakeGaussianMixtureModel(const std::vector<double> &v, size_t nb_seeds)
327 {
328  UnivariateGaussianMixture gaussian_mixture_model;
329 
330  if (nb_seeds == 1)
331  {
332  auto mvd = MeanVarDev(v);
333  gaussian_mixture_model.AddMember(UnivariateGaussianPDF(std::get<0>(mvd), std::get<1>(mvd)), 1.0);
334  }
335  else
336  gaussian_mixture_model.EM(v, nb_seeds);
337 
338  return gaussian_mixture_model;
339 }
340 
348 MultivariateGaussianMixture crn::MakeGaussianMixtureModel(const std::vector<std::vector<double>> &patterns, size_t nb_seeds)
349 {
350  MultivariateGaussianMixture gaussian_mixture_model;
351 
352  if (nb_seeds == 1)
353  gaussian_mixture_model.AddMember(MultivariateGaussianPDF(MatrixDouble(MeanPattern(patterns)), SquareMatrixDouble(MakeCovariance(patterns))), 1.0);
354  else
355  gaussian_mixture_model.EM(patterns, (unsigned int)(nb_seeds));
356 
357  return gaussian_mixture_model;
358 }
359 
360 
ScalarRange< T > Range(T b, T e)
Creates a range [[b, e[[.
Definition: CRNType.h:257
double Variance(const std::vector< double > &v)
Return variance of sample.
Univariate Gaussian distribution.
std::vector< double > MeanPattern(const std::vector< std::vector< double >> &m)
Return mean pattern of sample.
unsigned int EM(const MatrixDouble &patterns, size_t nbSeeds=2, double epsilon=std::numeric_limits< double >::epsilon(), size_t MaximalIterations=100)
Expectation Maximization.
#define _(String)
Definition: CRNi18n.h:51
Histogram MakeHistogramSquareRoot(const std::vector< double > &v)
Returns count histogram (#bins = sqrt(pop) )
void AddMember(UnivariateGaussianPDF pdf, double Weight)
Adds a density function.
std::vector< double > Quantiles(const std::vector< double > &v, size_t q, bool sort_flag=true)
Return quantile values of sample.
Histogram MakeHistogramSturges(const std::vector< double > &v)
Returns count histogram (#bins = 1+log_2(pop) )
void AddMember(const MultivariateGaussianPDF &pdf, double Weight)
Adds a density function.
std::vector< std::vector< double > > MakeCovariance(const std::vector< std::vector< double >> &m)
Return covariance for sample.
A generic domain error.
Definition: CRNException.h:83
unsigned int EM(const MatrixDouble &patterns, size_t nbSeeds=2, double epsilon=std::numeric_limits< double >::epsilon(), size_t maximalIterations=100)
Expectation Maximization.
double Mean(const std::vector< double > &v)
Return mean value of sample.
constexpr SumType< T > Sqr(const T &v) noexcept(noexcept(v *v))
Returns the square of a value.
Definition: CRNMath.h:61
double StdDeviation(const std::vector< double > &v)
Return deviation of sample.
double matrix class
UnivariateGaussianMixture MakeGaussianMixtureModel(const std::vector< double > &v, size_t nb_seeds=2)
Return Gaussian mixture model modeling current (univariate) sample.
Mother class for integer histograms.
Definition: CRNHistogram.h:44
std::pair< T, T > MinMax(const Image< T > &img, CMP cmp=CMP{})
Returns min and max pixel values.
Definition: CRNImage.hpp:1447
Histogram MakeHistogramScott(const std::vector< double > &v)
Returns count histogram (bin width = 3.5 * stddev / pop^(1/3))
Square double matrix class.
A character string class.
Definition: CRNStringUTF8.h:49
Multivariate Gaussian distribution.
Histogram MakeHistogramRice(const std::vector< double > &v)
Returns count histogram (#bins = 2n^(1/3) )
Histogram MakeHistogramFreedmanDiaconis(const std::vector< double > &v, bool sort_flag=true)
Returns count histogram (bin width = 2 * IQR(v) / pop^(1/3))
std::tuple< double, double, double > MeanVarDev(const std::vector< double > &v)
Return mean, variance and standard deviation of sample.
Histogram MakeHistogram(const Image< T > &img, typename std::enable_if< std::is_arithmetic< T >::value >::type *dummy=nullptr)
Definition: CRNImageGray.h:64