libcrn  3.9.5
A document image processing library
•All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CRNPCA.h
Go to the documentation of this file.
1 /* Copyright 2009-2016 Jean DUONG, CoReNum, INSA-Lyon, Université Paris Descartes, ENS-Lyon
2  *
3  * This file is part of libcrn.
4  *
5  * libcrn is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * libcrn is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with libcrn. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * file: CRNPCA.h
19  * \author Jean DUONG, Yann LEYDIER
20  */
21 
22 #ifndef CRNPCA_HEADER
23 #define CRNPCA_HEADER
24 
26 #include <map>
27 
28 namespace crn
29 {
39  class PCA: public Object
40  {
41  public:
43  PCA(const MatrixDouble &data, bool data_reduction_flag = true);
45  PCA(const std::vector< std::vector<double> > &data, bool data_reduction_flag = true);
47  PCA(const std::vector< std::vector<double> > &data, const std::vector<size_t> &cards, bool data_reduction_flag = true);
49  PCA(const std::map< std::vector<double>, size_t > &data, bool data_reduction_flag = true);
51  template<typename ITER> PCA(ITER begin, ITER end, bool data_reduction_flag = true);
53  PCA(const PCA&) = default;
54  PCA(PCA&&) = default;
56  virtual ~PCA() override = default;
57 
58  PCA& operator=(const PCA&) = default;
59  PCA& operator=(PCA&&) = default;
60 
62  size_t GetDimension() const noexcept { return dimension; }
64  const std::vector<double>& GetMeans() const noexcept { return means; }
66  double GetMean(size_t d) const;
67 
69  const std::vector<double>& GetDeviations() const noexcept { return deviations; }
71  double GetDeviation(size_t d) const;
72 
74  const std::multimap<double, MatrixDouble>& GetEigensystem() const noexcept { return eigensystem; }
75 
77  MatrixDouble Transform(const MatrixDouble &patterns, size_t nb_features = 1u) const;
79  std::vector<std::vector<double>> Transform(const std::vector< std::vector<double> > &data, const size_t nb_features = 0u) const;
80 
82  std::vector<std::vector<double>> ReverseTransform(const std::vector< std::vector<double> > &data) const;
83 
84  void Deserialize(xml::Element &el);
85  xml::Element Serialize(xml::Element &parent) const;
86  private:
87 
89  std::multimap<double, MatrixDouble> makeCorrelationSpectralEigensystem(double g) const;
90 
91  size_t dimension;
92  std::vector<double> means;
93  std::vector<double> deviations;
94  std::multimap<double, MatrixDouble> eigensystem;
95 
97  public : PCA(xml::Element& el):means(1,1),dimension(1),deviations(1,1){Deserialize(el);}
98  };
99  template<> struct IsSerializable<PCA> : public std::true_type {};
100  template<> struct IsClonable<PCA> : public std::true_type {};
101 
103 
104 
114  template<typename ITER> PCA::PCA(ITER begin, ITER end, bool data_reduction_flag)
115  {
116  std::vector< std::vector<double> > altered_data;
117  std::vector<double> scales;
118  dimension = (begin->first).size();
119  size_t nb_patterns = 0;
120  size_t nb_prototypes = 0;
121 
122  // Compute means
123 
124  for (ITER it = begin; it != end; ++it)
125  {
126  nb_patterns += it->second;
127  ++nb_prototypes;
128  }
129 
130  means = std::vector<double>(dimension);
131 
132  for (ITER it = begin; it != end; ++it)
133  {
134  std::vector<double> pat = it->first;
135 
136  double scale = (double) it->second / (double) nb_patterns;
137 
138  for (size_t ft = 0; ft < dimension; ++ft)
139  means[ft] += pat[ft] * scale;
140  }
141 
142  // Data centering
143 
144  for (ITER it = begin; it != end; ++it)
145  {
146  std::vector<double> pat = it->first;
147  size_t pop = it->second;
148 
149  std::vector<double> new_pat;
150 
151  for (size_t ft = 0; ft < dimension; ++ft)
152  new_pat.push_back(pat[ft] - means[ft]);
153 
154  altered_data.push_back(new_pat);
155  scales.push_back((double) pop / (double) (nb_patterns - 1));
156  }
157 
158  // Compute standard deviation on centered data
159 
160  deviations = std::vector<double>(dimension);
161 
162  for (size_t k = 0; k < nb_prototypes; ++k)
163  {
164  std::vector<double> pat = altered_data[k];
165  double scale = scales[k];
166 
167  for (size_t ft = 0; ft < dimension; ++ft)
168  {
169  double val = pat[ft];
170  deviations[ft] += scale * val * val;
171  }
172  }
173 
174  for (size_t ft = 0; ft < dimension; ++ft)
175  deviations[ft] = sqrt(deviations[ft]);
176 
177  // Data reduction
178 
179  if (data_reduction_flag)
180  {
181  for (size_t ft = 0; ft < dimension; ++ft)
182  {
183  double ratio = 1.0 / deviations[ft];
184 
185  for (size_t k = 0; k < nb_prototypes; ++k)
186  (altered_data[k])[ft] *= ratio;
187  }
188  }
189 
190  // Covariance matrix
191 
192  SquareMatrixDouble cmat(dimension, 0.0);
193  double scale_correction = (double)(nb_patterns - 1) / (double) nb_patterns;
194 
195  for (size_t k = 0; k < nb_prototypes; ++k)
196  {
197  std::vector<double> pat = altered_data[k];
198  double scale = (double) scales[k] * scale_correction;
199 
200  for (size_t i = 0; i < dimension; ++i)
201  for (size_t j = i; j < dimension; ++j)
202  cmat.IncreaseElement(i, j, scale * pat[i] * pat[j]);
203  }
204 
205  for (size_t i = 0; i < dimension; ++i)
206  for (size_t j = i + 1; j < dimension; ++j)
207  cmat.At(j, i) = cmat.At(i, j);
208 
209  if (dimension == 2)
210  {
211  if (data_reduction_flag)
212  eigensystem = makeCorrelationSpectralEigensystem(cmat.At(0, 1));
213  else
214  eigensystem = cmat.MakeSpectralEigensystem();
215  }
216  else
217  eigensystem = cmat.MakeJacobiEigensystem();
218  }
219 }
220 
221 #endif
222 
const std::vector< double > & GetDeviations() const noexcept
Returns the deviation computed for sample data.
Definition: CRNPCA.h:69
XML element.
Definition: CRNXml.h:135
virtual ~PCA() override=default
Destructor.
size_t GetDimension() const noexcept
Returns the dimension of feature space.
Definition: CRNPCA.h:62
PCA(const MatrixDouble &data, bool data_reduction_flag=true)
Constructor.
Definition: CRNPCA.cpp:41
std::multimap< double, MatrixDouble > MakeJacobiEigensystem(size_t MaxIteration=100) const
Perform diagonalization for symmetric matrix.
MatrixDouble Transform(const MatrixDouble &patterns, size_t nb_features=1u) const
Apply transform to given patterns.
Definition: CRNPCA.cpp:560
void Deserialize(xml::Element &el)
Definition: CRNPCA.cpp:713
const std::vector< double > & GetMeans() const noexcept
Returns the mean values computed for sample data.
Definition: CRNPCA.h:64
double GetDeviation(size_t d) const
Returns the deviation of d-th feature computed for sample data.
Definition: CRNPCA.cpp:539
double matrix class
Class to perform Principal Componant Analysis.
Definition: CRNPCA.h:39
PCA & operator=(const PCA &)=default
std::vector< std::vector< double > > ReverseTransform(const std::vector< std::vector< double > > &data) const
Apply reverse transform to get given patterns' pre-images.
Definition: CRNPCA.cpp:663
const T & At(size_t pos) const noexcept
Definition: CRNMatrix.h:165
#define CRN_DECLARE_CLASS_CONSTRUCTOR(classname)
Declares a class constructor.
Definition: CRNObject.h:173
Square double matrix class.
std::multimap< double, MatrixDouble > MakeSpectralEigensystem() const
Perform diagonalization for 2x2 symmetric matrix.
xml::Element Serialize(xml::Element &parent) const
Definition: CRNPCA.cpp:773
CRN_ALIAS_SMART_PTR(ImageBW)
const std::multimap< double, MatrixDouble > & GetEigensystem() const noexcept
Returns the eigensystem computed on covariance matrix of last sample data.
Definition: CRNPCA.h:74
double GetMean(size_t d) const
Returns the mean value of d-th feature computed for sample data.
Definition: CRNPCA.cpp:524
void IncreaseElement(size_t r, size_t c, const T &delta)
Increases the value of an element.
Definition: CRNMatrix.h:230