libcrn  3.9.5
A document image processing library
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CRNStatisticSample.h
Go to the documentation of this file.
1 /* Copyright 2014-2015 INSA-Lyon
2  *
3  * This file is part of libcrn.
4  *
5  * libcrn is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * libcrn is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with libcrn. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * file: CRNUnivariateStatisticSample.h
19  * \author Jean DUONG, Yann LEYDIER
20  */
21 
22 #ifndef CRNSTATISTICSAMPLE_HEADER
23 #define CRNSTATISTICSAMPLE_HEADER
24 
25 #include <vector>
26 #include <map>
27 #include <algorithm>
28 #include <iterator>
29 #include <numeric>
30 #include <tuple>
31 
32 #include <CRNMath/CRNMath.h>
33 
37 namespace crn
38 {
39  class Histogram;
40  class UnivariateGaussianMixture;
41  class MultivariateGaussianMixture;
42 
45 
46  /****************************************************************************/
57  template<typename T> inline T Max(const std::vector<T> &v) { return *std::max_element(v.begin(), v.end()); }
59  template<typename T> inline T Max(const std::vector<std::vector<T>> &m)
60  {
61  auto ma = m.front().front();
62  for (const auto &row : m)
63  {
64  auto rmax = *std::max_element(row.begin(), row.end());
65  if (rmax > ma) ma = rmax;
66  }
67  return ma;
68  }
70  template<typename T> inline T Min(const std::vector<T> &v) { return *std::min_element(v.begin(), v.end()); }
72  template<typename T> inline T Min(const std::vector<std::vector<T>> &m)
73  {
74  auto mi = m.front().front();
75  for (const auto &row : m)
76  {
77  auto rmin = *std::max_element(row.begin(), row.end());
78  if (rmin < mi) mi = rmin;
79  }
80  return mi;
81  }
83  template<typename T> inline std::tuple<T, T> MinMax(const std::vector<T> &v)
84  {
85  auto res = std::minmax_element(v.begin(), v.end());
86  return std::make_tuple(*res.first, *res.second);
87  }
89  template<typename T> inline std::tuple<T, T> MinMax(const std::vector<std::vector<T>> &m)
90  {
91  auto mi = m.front().front();
92  auto ma = m.front().front();
93  for (const auto &row : m)
94  {
95  auto rmM = std::minmax_element(row.begin(), row.end());
96  if (*rmM.first < mi) mi = *rmM.first;
97  if (*rmM.second > ma) ma = *rmM.second;
98  }
99  return std::make_tuple(mi, ma);
100  }
102  template<typename T> inline size_t Argmax(const std::vector<T> &v) { return std::max_element(v.begin(), v.end()) - v.begin(); }
104  template<typename T> inline size_t Argmin(const std::vector<T> &v) { return std::min_element(v.begin(), v.end()) - v.begin(); }
106  template<typename T> size_t ColumnArgmax(const std::vector<std::vector<T>> &m, size_t col)
107  {
108  auto ma = m.front().front();
109  auto index = size_t(0);
110  for (size_t tmp = 1; tmp < m.size(); ++tmp)
111  if (m[tmp][col] > ma)
112  {
113  ma = m[tmp][col];
114  index = tmp;
115  }
116  return index;
117  }
119  template<typename T> size_t ColumnArgmin(const std::vector<std::vector<T>> &m, size_t col)
120  {
121  auto mi = m.front().front();
122  auto index = size_t(0);
123  for (size_t tmp = 1; tmp < m.size(); ++tmp)
124  if (m[tmp][col] < mi)
125  {
126  mi = m[tmp][col];
127  index = tmp;
128  }
129  return index;
130  }
131 
133  double Mean(const std::vector<double> &v);
142  template<typename ITER> typename std::iterator_traits<ITER>::value_type Mean(ITER be, ITER en)
143  {
144  using value_type = typename std::iterator_traits<ITER>::value_type;
145  auto cumul = std::accumulate(be, en, SumType<value_type>(0));
146  cumul /= SumType<value_type>(std::distance(be, en));
147  return value_type(cumul);
148  }
155  template<typename ITER> double MeanAsDouble(ITER be, ITER en)
156  {
157  const auto s = double(std::distance(be, en));
158  auto m = std::accumulate(be, en, 0.0);
159 
160  if (!isinf(m))
161  return m / s;
162  else
163  {
164  m = 0.0;
165  for (auto it = be; it != en; ++it)
166  m += double(*it) / s;
167  return m;
168  }
169  }
170 
172  std::vector<double> MeanPattern(const std::vector<std::vector<double>> &m);
173 
181  template<typename ITER> std::vector<double> MeanPattern(ITER it_begin, ITER it_end)
182  {
183  auto cardinal = 0.0;
184  auto dimension = it_begin->first.size();
185  auto pattern = std::vector<double>(dimension, 0.0);
186 
187  for (auto it = it_begin; it != it_end; ++it)
188  {
189  const auto& pat = it->first;
190 
191  if (pat.size() == dimension)
192  {
193  const auto weight = double(it->second);
194 
195  for (size_t k = 0; k < dimension; ++k)
196  pattern[k] += weight * pat[k];
197 
198  cardinal += weight;
199  }
200  }
201 
202  auto valid = true;
203 
204  for (size_t k = 0; k < dimension; ++k)
205  if (std::isinf(pattern[k]))
206  {
207  valid = false;
208  break;
209  }
210 
211  if (valid)
212  for (size_t k = 0; k < dimension; ++k)
213  pattern[k] /= cardinal;
214  else
215  {
216  for (size_t k = 0; k < dimension; ++k)
217  pattern[k] = 0.0;
218 
219  for (auto it = it_begin; it != it_end; ++it)
220  if (it->first.size() == dimension)
221  {
222  const auto scale = double(it->second) / double(cardinal);
223  const auto& pt = it->first;
224 
225  for (size_t k = 0; k < dimension; ++k)
226  pattern[k] += pt[k] * scale;
227  }
228  }
229 
230  return pattern;
231  }
232 
234  double StdDeviation(const std::vector<double> &v);
236  double Variance(const std::vector<double> &v);
238  std::vector<std::vector<double>> MakeCovariance(const std::vector<std::vector<double>> &m);
239 
240  template<typename ITER> std::vector<std::vector<double>> MakeCovariance(ITER it_begin, ITER it_end)
241  {
242  auto card = 0.0;
243  const auto dim = it_begin->first.size();
244  auto cov = std::vector<std::vector<double>>(dim, std::vector<double>(dim));
245 
246  for (auto it = it_begin; it != it_end; ++it)
247  card += double(it->second);
248 
249  for (size_t i = 0; i < dim; ++i)
250  for (auto j = i; j < dim; ++j)
251  {
252  for (auto it = it_begin; it != it_end; ++it)
253  cov[i][j] += it->first[i] * it->first[j] * double(it->second);
254 
255  cov[i][j] /= double(card);
256 
257  if (j != i)
258  cov[j][i] = cov[i][j];
259  }
260 
261  return cov;
262  }
263 
265  std::tuple<double, double, double> MeanVarDev(const std::vector<double> &v);
273  template<typename ITER> std::tuple<double, double, double> MeanVarDev(ITER it_begin, ITER it_end)
274  {
275  // Compute cumul and cumul of squares in a same loop
276  auto m = 0.0;
277  auto m_2 = 0.0;
278  auto s = 0.0;
279 
280  for (auto it = it_begin; it != it_end; ++it)
281  {
282  auto val = it->first;
283  auto cnt = it->second;
284 
285  m += val * cnt;
286  m_2 += Sqr(val) * cnt;
287  s += double(cnt);
288  }
289 
290  if (!std::isinf(m))
291  {
292  m /= s;
293  m_2 /= s;
294  }
295  else // If cumul too large, re-compute mean with values downscaled by cardinal
296  {
297  m = 0.0;
298  m_2 = 0.0;
299 
300  for (auto it = it_begin; it != it_end; ++it)
301  {
302  auto val = it->first;
303  auto cnt = it->second;
304 
305  m += val * cnt / s;
306  m_2 += Sqr(val) * cnt / s;
307  }
308  }
309 
310  if (!std::isinf(m_2))
311  {
312  auto var = m_2 - Sqr(m);
313  return std::make_tuple(m, var, sqrt(var));
314  }
315  else // Konig-Huygens formula for variance cannot be used (too large values in cumul of squares)
316  {
317  auto var = 0.0;
318 
319  for (auto it = it_begin; it != it_end; ++it)
320  var += Sqr(it->first - m) * it->second;
321 
322  if (!std::isinf(var))
323  var /= s;
324  else
325  {
326  var = 0.0;
327 
328  for (auto it = it_begin; it != it_end; ++it)
329  var += Sqr(it->first - m) * it->second / s;
330  }
331 
332  return std::make_tuple(m, var, sqrt(var));
333  }
334  }
335 
337  std::vector<double> Quantiles(const std::vector<double> &v, size_t q, bool sort_flag = true);
338 
340  template<typename T> T MedianValue(const std::vector<T> &v)
341  {
342  auto sv = v;
343  std::sort(sv.begin(), sv.end());
344  return sv[sv.size() / 2];
345  }
346 
348  template<typename T> inline bool AllEqual(const std::vector<T> &v) { return std::all_of(v.begin(), v.end(), [&v](const T &x){ return x == v.front(); }); }
350  template<typename T> inline bool AllEqual(const std::vector<std::vector<T>> &m)
351  {
352  auto refval = m.front().front();
353  for (const auto &row : m)
354  if (std::any_of(row.begin(), row.end(), [refval](const T &x){ return x != refval; }))
355  return false;
356  return true;
357  }
358 
360  Histogram MakeHistogram(const std::vector<double> &v, size_t nb_bins);
362  Histogram MakeHistogramSquareRoot(const std::vector<double> &v);
364  Histogram MakeHistogramSturges(const std::vector<double> &v);
366  Histogram MakeHistogramRice(const std::vector<double> &v);
368  Histogram MakeHistogramScott(const std::vector<double> &v);
370  Histogram MakeHistogramFreedmanDiaconis(const std::vector<double> &v, bool sort_flag = true);
371 
373  UnivariateGaussianMixture MakeGaussianMixtureModel(const std::vector<double> &v, size_t nb_seeds = 2);
375  MultivariateGaussianMixture MakeGaussianMixtureModel(const std::vector<std::vector<double>> &patterns, size_t nb_seeds = 2);
376 
379 }
380 
381 #endif
382 
T MedianValue(const std::vector< T > &v)
Median value.
typename TypeInfo< T >::SumType SumType
Definition: CRNType.h:185
size_t Argmin(const std::vector< T > &v)
Return index of a minimal.
size_t ColumnArgmax(const std::vector< std::vector< T >> &m, size_t col)
Return index of a maximal on a column.
double Variance(const std::vector< double > &v)
Return variance of sample.
std::vector< double > MeanPattern(const std::vector< std::vector< double >> &m)
Return mean pattern of sample.
size_t ColumnArgmin(const std::vector< std::vector< T >> &m, size_t col)
Return index of a minimal on a column.
const T & Max(const T &a, const T &b)
Returns the max of two values.
Definition: CRNMath.h:47
Histogram MakeHistogramSquareRoot(const std::vector< double > &v)
Returns count histogram (#bins = sqrt(pop) )
std::vector< double > Quantiles(const std::vector< double > &v, size_t q, bool sort_flag=true)
Return quantile values of sample.
bool AllEqual(const std::vector< T > &v)
Test if all data values are equal.
Histogram MakeHistogramSturges(const std::vector< double > &v)
Returns count histogram (#bins = 1+log_2(pop) )
std::vector< std::vector< double > > MakeCovariance(const std::vector< std::vector< double >> &m)
Return covariance for sample.
size_t Argmax(const std::vector< T > &v)
Return index of a maximal.
double MeanAsDouble(ITER be, ITER en)
Return mean value of sample as a double value.
double Mean(const std::vector< double > &v)
Return mean value of sample.
constexpr SumType< T > Sqr(const T &v) noexcept(noexcept(v *v))
Returns the square of a value.
Definition: CRNMath.h:61
double StdDeviation(const std::vector< double > &v)
Return deviation of sample.
UnivariateGaussianMixture MakeGaussianMixtureModel(const std::vector< double > &v, size_t nb_seeds=2)
Return Gaussian mixture model modeling current (univariate) sample.
const T & Min(const T &a, const T &b)
Returns the min of two values.
Definition: CRNMath.h:49
std::pair< T, T > MinMax(const Image< T > &img, CMP cmp=CMP{})
Returns min and max pixel values.
Definition: CRNImage.hpp:1447
Histogram MakeHistogramScott(const std::vector< double > &v)
Returns count histogram (bin width = 3.5 * stddev / pop^(1/3))
Histogram MakeHistogramRice(const std::vector< double > &v)
Returns count histogram (#bins = 2n^(1/3) )
Histogram MakeHistogramFreedmanDiaconis(const std::vector< double > &v, bool sort_flag=true)
Returns count histogram (bin width = 2 * IQR(v) / pop^(1/3))
std::tuple< double, double, double > MeanVarDev(const std::vector< double > &v)
Return mean, variance and standard deviation of sample.
Histogram MakeHistogram(const Image< T > &img, typename std::enable_if< std::is_arithmetic< T >::value >::type *dummy=nullptr)
Definition: CRNImageGray.h:64