45 size_t nb_patterns = data.
GetRows();
46 bool status = ((nb_patterns > 1) && (dimension > 1));
50 means = std::vector<double>(dimension, 0.0);
51 deviations = std::vector<double>(dimension, 0.0);
55 std::vector<double> means_of_squares(dimension, 0.0);
57 for (
size_t k = 0; k < nb_patterns; ++k)
58 for (
size_t d = 0; d < dimension; ++d)
60 double val = data[k][d];
62 means[d] += val / double(nb_patterns);
63 means_of_squares[d] +=
Sqr(val) / double(nb_patterns);
67 status = (means_of_squares[0] == means_of_squares[0]);
69 for (
size_t d = 1; d < dimension; ++d)
70 status &= (means_of_squares[d] == means_of_squares[d]);
72 std::vector< std::vector<double> > transformed_data(nb_patterns, std::vector<double>(dimension));
77 for (
size_t d = 0; d < dimension; ++d)
78 deviations[d] = sqrt(means_of_squares[d] -
Sqr(means[d]));
81 for (
size_t d = 0; d < dimension; ++d)
84 double sigma = deviations[d];
87 for (
size_t k = 0; k < nb_patterns; ++k)
88 transformed_data[k][d] = (data[k][d] - mu) / sigma;
90 for (
size_t k = 0; k < nb_patterns; ++k)
91 transformed_data[k][d] = data[k][d] - mu;
98 for (
size_t d = 0; d < dimension; ++d)
100 double mu = means[d];
103 for (
size_t k = 0; k < nb_patterns; ++k)
105 double val = data[k][d];
107 transformed_data[k][d] = val - mu;
108 sigma +=
Sqr(val - mu) / double(nb_patterns);
112 deviations[d] = sigma;
115 for (
size_t k = 0; k < nb_patterns; ++k)
116 transformed_data[k][d] /= sigma;
124 if (data_reduction_flag)
125 eigensystem = makeCorrelationSpectralEigensystem(cmat[0][1]);
140 PCA::PCA(
const std::vector< std::vector<double> > &data,
bool data_reduction_flag)
142 dimension = data.front().size();
144 size_t nb_patterns = data.size();
145 bool status = ((nb_patterns > 1) && (dimension > 1));
147 for (
size_t k = 1; k < nb_patterns; ++k)
148 if (data[k].size() != dimension)
156 means = std::vector<double>(dimension, 0.0);
157 deviations = std::vector<double>(dimension, 0.0);
161 std::vector<double> means_of_squares(dimension, 0.0);
163 for (
size_t k = 0; k < nb_patterns; ++k)
164 for (
size_t d = 0; d < dimension; ++d)
166 double val = data[k][d];
168 means[d] += val / double(nb_patterns);
169 means_of_squares[d] +=
Sqr(val) / double(nb_patterns);
173 status = (means_of_squares[0] == means_of_squares[0]);
175 for (
size_t d = 1; d < dimension; ++d)
176 status &= (means_of_squares[d] == means_of_squares[d]);
178 std::vector< std::vector<double> > transformed_data(nb_patterns, std::vector<double>(dimension));
183 for (
size_t d = 0; d < dimension; ++d)
184 deviations[d] = sqrt(means_of_squares[d] -
Sqr(means[d]));
187 for (
size_t d = 0; d < dimension; ++d)
189 double mu = means[d];
190 double sigma = deviations[d];
192 if ((sigma != 1.0) && (sigma != 0.0) && data_reduction_flag)
193 for (
size_t k = 0; k < nb_patterns; ++k)
194 transformed_data[k][d] = (data[k][d] - mu) / sigma;
196 for (
size_t k = 0; k < nb_patterns; ++k)
197 transformed_data[k][d] = data[k][d] - mu;
204 for (
size_t d = 0; d < dimension; ++d)
206 double mu = means[d];
209 for (
size_t k = 0; k < nb_patterns; ++k)
211 double val = data[k][d];
213 transformed_data[k][d] = val - mu;
214 sigma +=
Sqr(val - mu) / double(nb_patterns);
218 deviations[d] = sigma;
220 if ((sigma != 1.0) && (sigma != 0.0) && data_reduction_flag)
221 for (
size_t k = 0; k < nb_patterns; ++k)
222 transformed_data[k][d] /= sigma;
230 if (data_reduction_flag)
231 eigensystem = makeCorrelationSpectralEigensystem(cmat[0][1]);
247 PCA::PCA(
const std::vector< std::vector<double> > &data,
const std::vector<size_t> &cards,
bool data_reduction_flag)
249 dimension = data.front().size();
251 size_t nb_patterns = data.size();
252 double sample_cardinal = 0.0;
253 bool status = ((nb_patterns > 1) && (dimension > 1));
255 for (
size_t k = 1; k < nb_patterns; ++k)
257 if (data[k].size() != dimension)
263 sample_cardinal += double(cards[k]);
268 means = std::vector<double>(dimension, 0.0);
269 deviations = std::vector<double>(dimension, 0.0);
273 std::vector<double> means_of_squares(dimension, 0.0);
275 for (
size_t k = 0; k < nb_patterns; ++k)
276 for (
size_t d = 0; d < dimension; ++d)
278 double val = data[k][d];
279 double wgt = double(cards[k]);
281 means[d] += val * wgt / sample_cardinal;
282 means_of_squares[d] +=
Sqr(val) * wgt / sample_cardinal;
286 status = (means_of_squares[0] == means_of_squares[0]);
288 for (
size_t d = 1; d < dimension; ++d)
289 status &= (means_of_squares[d] == means_of_squares[d]);
291 std::vector< std::vector<double> > transformed_data(nb_patterns, std::vector<double>(dimension));
296 for (
size_t d = 0; d < dimension; ++d)
297 deviations[d] = sqrt(means_of_squares[d] -
Sqr(means[d]));
300 for (
size_t d = 0; d < dimension; ++d)
302 double mu = means[d];
303 double sigma = deviations[d];
306 for (
size_t k = 0; k < nb_patterns; ++k)
307 transformed_data[k][d] = (data[k][d] - mu) / sigma;
309 for (
size_t k = 0; k < nb_patterns; ++k)
310 transformed_data[k][d] = data[k][d] - mu;
317 for (
size_t d = 0; d < dimension; ++d)
319 double mu = means[d];
322 for (
size_t k = 0; k < nb_patterns; ++k)
324 double val = data[k][d];
326 transformed_data[k][d] = val - mu;
327 sigma +=
Sqr(val - mu) * double(cards[k]) / sample_cardinal;
331 deviations[d] = sigma;
334 for (
size_t k = 0; k < nb_patterns; ++k)
335 transformed_data[k][d] /= sigma;
343 if (data_reduction_flag)
344 eigensystem = makeCorrelationSpectralEigensystem(cmat[0][1]);
359 PCA::PCA(
const std::map< std::vector<double>,
size_t > &data,
bool data_reduction_flag)
361 dimension = data.begin()->first.size();
363 size_t nb_patterns = data.size();
364 double sample_cardinal = 0.0;
365 bool status = ((nb_patterns > 1) && (dimension > 1));
367 for (
const auto& weighted_pattern:data)
369 if (weighted_pattern.first.size() != dimension)
375 sample_cardinal += double(weighted_pattern.second);
380 means = std::vector<double>(dimension, 0.0);
381 deviations = std::vector<double>(dimension, 0.0);
385 std::vector<double> means_of_squares(dimension, 0.0);
387 for (
const auto& weighted_pattern:data)
388 for (
size_t d = 0; d < dimension; ++d)
390 double val = weighted_pattern.first[d];
391 double wgt = double(weighted_pattern.second);
393 means[d] += val * wgt / sample_cardinal;
394 means_of_squares[d] +=
Sqr(val) * wgt / sample_cardinal;
398 status = (means_of_squares[0] == means_of_squares[0]);
400 for (
size_t d = 1; d < dimension; ++d)
401 status &= (means_of_squares[d] == means_of_squares[d]);
403 std::vector< std::vector<double> > transformed_data(nb_patterns, std::vector<double>(dimension));
408 for (
size_t d = 0; d < dimension; ++d)
409 deviations[d] = sqrt(means_of_squares[d] -
Sqr(means[d]));
412 for (
size_t d = 0; d < dimension; ++d)
414 double mu = means[d];
415 double sigma = deviations[d];
421 for (
const auto& weighted_pattern:data)
423 transformed_data[k][d] = (weighted_pattern.first[d] - mu) / sigma;
431 for (
const auto& weighted_pattern:data)
433 transformed_data[k][d] = weighted_pattern.first[d] - mu;
443 for (
size_t d = 0; d < dimension; ++d)
445 double mu = means[d];
449 for (
const auto& weighted_pattern:data)
451 double val = weighted_pattern.first[d];
453 transformed_data[k][d] = val - mu;
454 sigma +=
Sqr(val - mu) * double(weighted_pattern.second) / sample_cardinal;
459 deviations[d] = sigma;
462 for (k = 0; k < nb_patterns; ++k)
463 transformed_data[k][d] /= sigma;
471 if (data_reduction_flag)
472 eigensystem = makeCorrelationSpectralEigensystem(cmat[0][1]);
489 std::multimap<double, MatrixDouble> PCA::makeCorrelationSpectralEigensystem(
double g)
const
491 std::multimap<double, MatrixDouble> eigen_pairs;
493 double delta = 4 * g * g;
495 double m_1 = sqrt(delta) / (2 * g);
497 double fraction = 1.0 / sqrt(1.0 + m_1 * m_1);
502 eigen_vector_1.At(0, 0) = fraction;
503 eigen_vector_1.At(1, 0) = m_1 * fraction;
505 eigen_vector_2.At(0, 0) = - m_1 * fraction;
506 eigen_vector_2.At(1, 0) = fraction;
508 double eigen_value_1 = (2 + sqrt(delta)) / 2.0;
509 double eigen_value_2 = (2 - sqrt(delta)) / 2.0;
511 eigen_pairs.insert(std::make_pair(eigen_value_1, eigen_vector_1));
512 eigen_pairs.insert(std::make_pair(eigen_value_2, eigen_vector_2));
544 return deviations[d];
562 if (patterns.
GetCols() != dimension)
564 throw ExceptionDimension(
StringUTF8(
"PCA::ApplyTransform(const MatrixDouble &patterns, const size_t nb_features): ") +
_(
"Incompatible input pattern dimensions."));
566 else if (nb_features > dimension)
568 throw ExceptionDomain(
StringUTF8(
"PCA::ApplyTransform(const MatrixDouble &patterns, const size_t nb_features): ") +
_(
"Incompatible output dimensions."));
571 size_t nb_patterns = patterns.
GetRows();
574 std::multimap<double, MatrixDouble>::const_reverse_iterator rev_iter;
576 std::vector<double> translated_pattern(dimension);
578 for (
size_t p = 0; p < nb_patterns; p++)
580 for (
size_t k = 0; k < dimension; k++)
581 translated_pattern[k] = (patterns[p][k] - means[k]);
583 rev_iter = eigensystem.rbegin();
585 for (
size_t f = 0; f < nb_features; f++)
592 for (
size_t k = 0; k < dimension; ++k)
593 cumul += translated_pattern[k] * eigen_vector[k][0];
595 new_patterns.
At(p, f) = cumul;
614 std::vector<std::vector<double>>
PCA::Transform(
const std::vector< std::vector<double> > &data,
const size_t nb_features)
const
616 size_t new_dimension = nb_features;
618 if ((nb_features <= 0) || (nb_features > dimension))
619 new_dimension = dimension;
621 size_t nb_patterns = data.size();
623 std::vector< std::vector<double> > new_patterns(nb_patterns, std::vector<double>(new_dimension));
626 std::multimap<double, MatrixDouble>::const_reverse_iterator rev_iter;
628 std::vector<double> translated_pattern(dimension);
630 for (
size_t p = 0; p < nb_patterns; ++p)
632 for (
size_t k = 0; k < dimension; ++k)
633 translated_pattern[k] = (data[p][k] - means[k]);
635 rev_iter = eigensystem.rbegin();
637 for (
size_t f = 0; f < new_dimension; ++f)
642 const auto& eigen_vector = rev_iter->second;
644 for (
size_t k = 0; k < dimension; ++k)
645 cumul += translated_pattern[k] * eigen_vector[k][0];
647 new_patterns[p][f] = cumul;
665 size_t nb_patterns = data.size();
666 std::vector< std::vector<double> > new_patterns;
671 for (
auto rev_iter = eigensystem.rbegin(); rev_iter != eigensystem.rend(); ++rev_iter)
675 for (
size_t i = 0; i < dimension; ++i)
676 mat[i][j] = vect[i][0];
681 for (
size_t p = 0; p < nb_patterns; ++p)
683 std::vector<double> pattern = data[p];
684 std::vector<double> new_pattern(dimension);
686 for (
size_t i = 0; i < dimension; ++i)
690 for (
size_t k = 0; k < dimension; ++k)
691 cumul += mat[i][k] * pattern[k];
693 new_pattern[i] = cumul + means[i];
696 new_patterns.push_back(new_pattern);
718 _(
"Wrong XML element."));
721 std::multimap<double, MatrixDouble> newsystem;
726 double w = sub_el.
GetAttribute<
double>(
"eigenvalue",
false);
729 xml::Element mat_el(sub_el.GetFirstChildElement(
"MatrixDouble"));
732 newsystem.insert(std::make_pair(w, mat));
734 sub_el = sub_el.GetNextSiblingElement(
"eigenpair");
737 SMatrixDouble mmat, dmat;
743 mmat = std::make_shared<MatrixDouble>(sub_el);
744 else if (role ==
"deviations")
745 dmat = std::make_shared<MatrixDouble>(sub_el);
746 sub_el = sub_el.GetNextSiblingElement(
"MatrixDouble");
751 dimension = mmat->GetCols();
752 means = std::vector<double>(dimension);
753 deviations = std::vector<double>(dimension);
755 for (
size_t d = 0; d < dimension; ++d)
757 means[d] = mmat->At(0, d);
758 deviations[d] = dmat->At(0, d);
761 eigensystem.swap(newsystem);
781 for (
size_t d = 0; d < dimension; ++d)
783 mmat.
At(0, d) = means[d];
784 dmat.
At(0, d) = deviations[d];
792 std::multimap<double, MatrixDouble>::const_reverse_iterator rev_iter;
794 for (rev_iter = eigensystem.rbegin(); rev_iter != eigensystem.rend(); ++rev_iter)
800 rev_iter->second.Serialize(sub_el);
size_t GetRows() const noexcept
Returns the number of rows.
size_t GetCols() const noexcept
Returns the number of columns.
StringUTF8 GetName() const
Gets the label of the element.
#define CRN_END_CLASS_CONSTRUCTOR(classname)
Defines a class constructor.
PCA(const MatrixDouble &data, bool data_reduction_flag=true)
Constructor.
std::multimap< double, MatrixDouble > MakeJacobiEigensystem(size_t MaxIteration=100) const
Perform diagonalization for symmetric matrix.
std::vector< std::vector< double > > MakeCovariance(const std::vector< std::vector< double >> &m)
Return covariance for sample.
MatrixDouble Transform(const MatrixDouble &patterns, size_t nb_features=1u) const
Apply transform to given patterns.
void Deserialize(xml::Element &el)
virtual xml::Element Serialize(xml::Element &parent) const
Element GetFirstChildElement(const StringUTF8 &name="")
Gets the first child element.
#define CRN_DATA_FACTORY_REGISTER(elemname, classname)
Registers a class to the data factory.
constexpr SumType< T > Sqr(const T &v) noexcept(noexcept(v *v))
Returns the square of a value.
void SetAttribute(const StringUTF8 &name, const StringUTF8 &value)
Sets the value of an attribute.
double GetDeviation(size_t d) const
Returns the deviation of d-th feature computed for sample data.
T GetAttribute(const StringUTF8 &name, bool silent=true) const
Gets an attribute.
Class to perform Principal Componant Analysis.
std::vector< std::vector< double > > ReverseTransform(const std::vector< std::vector< double > > &data) const
Apply reverse transform to get given patterns' pre-images.
const T & At(size_t pos) const noexcept
Square double matrix class.
std::multimap< double, MatrixDouble > MakeSpectralEigensystem() const
Perform diagonalization for 2x2 symmetric matrix.
xml::Element Serialize(xml::Element &parent) const
A character string class.
double GetMean(size_t d) const
Returns the mean value of d-th feature computed for sample data.
Element PushBackElement(const StringUTF8 &name)
Adds an element at the end of the children list.
Invalid argument error (e.g.: nullptr pointer)
#define CRN_BEGIN_CLASS_CONSTRUCTOR(classname)
Defines a class constructor.
An item was not found in a container.