libcrn  3.9.5
A document image processing library
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CRNString.cpp
Go to the documentation of this file.
1 /* Copyright 2008-2016 INSA Lyon, CoReNum, ENS-Lyon
2  *
3  * This file is part of libcrn.
4  *
5  * libcrn is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * libcrn is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with libcrn. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * file: CRNString.cpp
19  * \author Yann LEYDIER
20  */
21 
22 #include <CRNi18n.h>
23 #include <CRNString.h>
24 #include <CRNStringUTF8.h>
25 #include <CRNException.h>
27 #include <CRNStringUTF8.h>
28 #include <CRNMath/CRNProp3.h>
29 #include <CRNData/CRNDataFactory.h>
30 #include <CRNProtocols.h>
31 #include <algorithm> // for min & max
32 
33 using namespace crn;
34 
38 int& String::Precision() noexcept
39 {
40  static int precision = 16;
41  return precision;
42 }
43 
47 size_t String::NPos() noexcept
48 {
49  return std::u32string::npos;
50 }
51 
58 String::String(char32_t c, size_t n)
59 {
60  data = std::u32string(n, c);
61 }
62 
69 {
70  data = UtfConverter::FromUtf8(s.Std());
71 }
72 
78 {
79  if (p.IsTrue())
80  data = U"true";
81  else if (p.IsFalse())
82  data = U"false";
83  else
84  data = U"unknown";
85 }
86 
93 {
94  data = UtfConverter::FromUtf8(s);
95 }
96 
102 String::String(const char *s)
103 {
104  data = UtfConverter::FromUtf8(s);
105 }
106 
112 String::String(const std::string &s)
113 {
114  data = UtfConverter::FromUtf8(s);
115 }
116 
123 String::String(char c, size_t n)
124 {
125  data = UtfConverter::FromUtf8(std::string(n, c));
126 }
127 
137 char32_t& String::operator[](size_t index)
138 {
139  if (index >= Size())
140  throw ExceptionDomain(StringUTF8("char& String::operator[](size_t index): ") +
141  _("index out of bounds."));
142  return data[index];
143 }
144 
154 const char32_t& String::operator[](size_t index) const
155 {
156  if (index >= Size())
157  throw ExceptionDomain(StringUTF8("const char& String::operator[](size_t index) const: ") +
158  _("index out of bounds."));
159  return data[index];
160 }
161 
167 const char* String::CStr() const
168 {
169  cdata = UtfConverter::ToUtf8(data);
170  return cdata.c_str();
171 }
172 
177 {
178  if (data == U"true")
179  return Prop3::True();
180  else if (data == U"false")
181  return Prop3::False();
182  else
183  return Prop3::Unknown();
184 }
185 
195 String& String::Insert(size_t pos, const String &s)
196 {
197  if (pos == NPos())
198  {
199  *this += s;
200  return *this;
201  }
202  if (pos >= Size())
203  {
204  throw ExceptionDomain(StringUTF8("String& String::Insert(int pos, const String &s): ") +
205  _("index out of bounds."));
206  }
207  data.insert(pos, s.data);
208  return *this;
209 }
210 
220 String String::SubString(size_t pos, size_t n) const
221 {
222  if ((pos >= Size()) || (pos + n > Size()))
223  {
224  throw ExceptionDomain(StringUTF8("String String::SubString(int pos, int n): ") +
225  StringUTF8(_("index out of bounds.")) + StringUTF8(pos) +
226  StringUTF8(" + ") + StringUTF8(n) + StringUTF8(" > ") + StringUTF8(Size()));
227  }
228  String s;
229  if (n == 0)
230  {
231  s = data.substr(pos);
232  }
233  else
234  {
235  s = data.substr(pos, n);
236  }
237  return s;
238 }
239 
247 String& String::Crop(size_t pos, size_t n)
248 {
249  *this = SubString(pos, n);
250  return *this;
251 }
252 
262 String& String::Erase(size_t pos, size_t n)
263 {
264  if ((pos >= Size()) || (pos + n > Size()))
265  {
266  throw ExceptionDomain(StringUTF8("String& String::Erase(int pos, int n): ") +
267  _("index out of bounds."));
268  }
269  if (n <= 0)
270  data.erase(pos);
271  else
272  data.erase(pos, n);
273  return *this;
274 }
275 
286 String& String::Replace(const String &s, size_t pos, size_t n)
287 {
288  if ((pos >= Size()) || (pos + n > Size()))
289  {
290  throw ExceptionDomain(StringUTF8("String& String::Replace(const String &s, int pos, int n): ") +
291  _("index out of bounds."));
292  }
293  data.replace(pos, n, s.data);
294  return *this;
295 }
296 
304 String& String::ReplaceSuffix(const String &old_suffix, const String &new_suffix)
305 {
306  size_t old_suffix_length = old_suffix.Length();
307  if (!old_suffix_length)
308  throw ExceptionInvalidArgument(_("Null suffix to search."));
309  size_t word_length = Length();
310 
311  if (old_suffix_length <= word_length)
312  {
313  size_t pivot = word_length - old_suffix_length;
314 
315  if (EndsWith(old_suffix))
316  {
317  data.erase(pivot);
318  data.append(new_suffix.Std());
319  }
320  }
321 
322  return *this;
323 }
324 
332 String& String::ReplaceSuffix(const size_t n, const String &new_suffix)
333 {
334  size_t word_length = Length();
335 
336  if (n <= word_length)
337  {
338  size_t pivot = word_length - n;
339 
340  data.erase(pivot);
341  data.append(new_suffix.Std());
342  }
343 
344  return *this;
345 }
346 
355 {
356  size_t suffix_length = suffix.Length();
357  if (!suffix_length)
358  throw ExceptionInvalidArgument(_("Null suffix to delete."));
359  size_t word_length = Length();
360 
361  if (suffix_length <= word_length)
362  {
363  size_t pivot = word_length - suffix_length;
364 
365  if (EndsWith(suffix))
366  {
367  data.erase(pivot);
368  }
369  }
370 
371  return *this;
372 }
373 
380 std::vector<String> String::WhichSuffixes(const std::vector<String> &suffixes) const
381 {
382  std::vector<String> found;
383 
384  size_t n = suffixes.size();
385 
386  for (size_t k = 0; k < n; k++)
387  {
388  String sf = suffixes[k];
389 
390  if (EndsWith(sf))
391  {
392  found.push_back(sf);
393  }
394  }
395 
396  return found;
397 }
398 
405 std::vector<String> String::WhichPrefixes(const std::vector<String> &prefixes) const
406 {
407  std::vector<String> found;
408 
409  size_t n = prefixes.size();
410 
411  for (size_t k = 0; k < n; k++)
412  {
413  String pf = prefixes[k];
414 
415  if (StartsWith(pf))
416  {
417  found.push_back(pf);
418  }
419  }
420 
421  return found;
422 }
423 
430 std::vector<String> String::WhichSubstrings(const std::vector<String> &substrings) const
431 {
432  std::vector<String> found;
433 
434  for (auto str : substrings)
435  {
436 
437 
438  if (Find(str) != String::NPos())
439  {
440  found.push_back(str);
441  }
442  }
443 
444  return found;
445 }
446 
447 
448 
456 String& String::Replace(char32_t from, char32_t to)
457 {
458  for (auto & elem : data)
459  if (elem == from)
460  elem = to;
461  return *this;
462 }
463 
470 {
471  for (auto & elem : data)
472  elem = char32_t(ToWUpper(wint_t(elem)));
473  return *this;
474 }
475 
482 {
483  data[0] = char32_t(ToWUpper(wint_t(data[0])));
484 
485  return *this;
486 }
487 
494 {
495  for (auto & elem : data)
496  elem = char32_t(ToWLower(wint_t(elem)));
497  return *this;
498 }
499 
509 size_t String::Find(const String &s, size_t from_pos) const
510 {
511  if (IsEmpty())
512  return NPos();
513  if (s.Size() > Size())
514  {
515  return NPos();
516  }
517  if (from_pos >= Size())
518  {
519  throw ExceptionDomain(StringUTF8("int String::Find(const String &s, int from_pos = 0) const: ") +
520  _("index out of bounds."));
521  }
522  return data.find(s.data, from_pos);
523 }
524 
525 
533 bool String::StartsWith(const String &s) const
534 {
535  size_t idx = Find(s, 0);
536 
537  if (idx == NPos())
538  {
539  return false;
540  }
541  else
542  {
543  if (idx == 0)
544  {
545  return true;
546  }
547  else
548  {
549  return false;
550  }
551  }
552 }
553 
562 bool String::EndsWith(const String &s) const
563 {
564  size_t length_main = Length();
565  size_t length_suffix = s.Length();
566 
567  if (length_main < length_suffix)
568  {
569  return false;
570  }
571  else
572  {
573  size_t pos = length_main - length_suffix;
574  size_t idx = Find(s, pos);
575 
576  return (idx == pos);
577  }
578 }
579 
587 bool String::EndsWith(const std::vector<String> &suffixes) const
588 {
589  size_t nb_suffixes = suffixes.size();
590  size_t k = 0;
591  bool found = false;
592 
593  while (!found && (k < nb_suffixes))
594  {
595  found = EndsWith(suffixes[k]);
596  k++;
597  }
598 
599  return found;
600 }
601 
611 size_t String::FindAnyOf(const String &s, size_t from_pos) const
612 {
613  if (from_pos >= Size())
614  {
615  throw ExceptionDomain(StringUTF8("int String::FindAnyOf(const String &s, int from_pos = 0) const: ") +
616  _("index out of bounds."));
617  }
618 
619  return data.find_first_of(s.data, from_pos);
620 }
621 
631 size_t String::FindNotOf(const String &s, size_t from_pos) const
632 {
633  if (from_pos >= Size())
634  {
635  throw ExceptionDomain(StringUTF8("int String::NotOf::FindNotOf(const String &s, int from_pos = 0) const: ") +
636  _("index out of bounds."));
637  }
638 
639  return data.find_first_not_of(s.data, from_pos);
640 }
641 
651 size_t String::BackwardFind(const String &s, size_t last_pos) const
652 {
653  if (s.Size() > Size())
654  return NPos();
655  if (last_pos == NPos())
656  return data.rfind(s.data);
657  if (last_pos >= Size())
658  {
659  throw ExceptionDomain(StringUTF8("int String::BackwardFind(const String &s, int last_pos = -1) const: ") +
660  _("index out of bounds."));
661  }
662  return data.rfind(s.data, last_pos);
663 }
664 
674 size_t String::BackwardFindAnyOf(const String &s, size_t last_pos) const
675 {
676  if (last_pos == NPos())
677  return data.find_last_of(s.data);
678  if (last_pos >= Size())
679  {
680  throw ExceptionDomain(StringUTF8("int String::BackwardFindAnyOf(const String &s, int last_pos = -1) const: ") +
681  _("index out of bounds."));
682  }
683  return data.find_last_of(s.data, last_pos);
684 }
685 
695 size_t String::BackwardFindNotOf(const String &s, size_t last_pos) const
696 {
697  if (last_pos == NPos())
698  return data.find_last_not_of(s.data);
699  if (last_pos >= Size())
700  {
701  throw ExceptionDomain(StringUTF8("int String::BackwardFindNotOf(const String &s, int last_pos = -1) const: ") +
702  _("index out of bounds."));
703  }
704  return data.find_last_not_of(s.data, last_pos);
705 }
706 
714 int String::EditDistance(const String &s) const
715 {
716  unsigned int s1 = (unsigned int)Size();
717  unsigned int s2 = (unsigned int)s.Size();
718  std::vector<std::vector<unsigned int> > d(s1 + 1, std::vector<unsigned int>(s2 + 1));
719 
720  for (unsigned int i = 1; i <= s1; ++i) d[i][0] = i;
721  for (unsigned int i = 1; i <= s2; ++i) d[0][i] = i;
722 
723  for (unsigned int i = 1; i <= s1; ++i)
724  for (unsigned int j = 1; j <= s2; ++j)
725  d[i][j] = std::min(std::min(d[i - 1][j] + 1, d[i][j - 1] + 1),
726  d[i - 1][j - 1] + (data[i - 1] == s[j - 1] ? 0 : 1));
727  return d[s1][s2];
728 }
729 
739 {
740  if (el.GetName() != "String")
741  {
742  throw ExceptionInvalidArgument(StringUTF8("void String::Deserialize(xml::Element &el): ") +
743  _("Wrong XML element."));
744  }
745  xml::Node c(el.GetFirstChild());
746  if (!c)
747  return; // no content
748  xml::Text t(c.AsText()); // may throw
749  *this = t.GetValue();
750  ShrinkToFit();
751 }
752 
760 {
761  xml::Element el(parent.PushBackElement("String"));
762  el.PushBackText(CStr());
763  return el;
764 }
765 
772 std::vector<String> String::Split(const String &sep) const
773 {
774  size_t n = data.length();
775  auto start = data.find_first_not_of(sep.data);
776  std::vector<String> words;
777  while (start < n)
778  {
779  auto stop = data.find_first_of(sep.data, start);
780  if (stop > n) stop = n;
781  words.push_back(data.substr(start, stop - start));
782  start = data.find_first_not_of(sep.data, stop + 1);
783  }
784  return words;
785 }
786 
789 {
790  data.shrink_to_fit();
791 }
792 
800 {
801  return StringUTF8::CreateUniqueId(len);
802 }
803 
806  Cloner::Register<String>();
807  Ruler::Register<String>();
809 
virtual StringUTF8 GetValue() const override
Gets the content of the node.
Definition: CRNXml.cpp:888
wint_t ToWLower(wint_t c)
Transforms a character to its lower case.
Definition: CRNString.h:47
static Prop3 Unknown()
Definition: CRNProp3.h:90
size_t BackwardFind(const String &s, size_t last_pos=NPos()) const
Finds the last occurrence of a string.
Definition: CRNString.cpp:651
wint_t ToWUpper(wint_t c)
Transforms a character to its upper case.
Definition: CRNString.h:45
static crn::StringUTF8 CreateUniqueId(size_t len=8)
Generates an almost unique id.
std::vector< String > Split(const String &sep) const
Splits the string in multiple strings delimited by a set of separators.
Definition: CRNString.cpp:772
size_t Size() const noexcept
Returns the length of the string.
Definition: CRNString.h:160
XML element.
Definition: CRNXml.h:135
StringUTF8 GetName() const
Gets the label of the element.
Definition: CRNXml.h:146
#define _(String)
Definition: CRNi18n.h:51
bool StartsWith(const String &s) const
Check if string has a given prefix.
Definition: CRNString.cpp:533
const char * CStr() const
Conversion to UTF8 cstring.
Definition: CRNString.cpp:167
size_t BackwardFindNotOf(const String &s, size_t from_pos=NPos()) const
Finds the last occurrence of character not in a list.
Definition: CRNString.cpp:695
bool IsEmpty() const noexcept
Checks if the string is empty.
Definition: CRNString.h:166
size_t Find(const String &s, size_t from_pos=0) const
Finds the first occurrence of a string.
Definition: CRNString.cpp:509
String()=default
Default constructor (empty string)
bool IsFalse() const noexcept
Is false?
Definition: CRNProp3.cpp:177
#define CRN_END_CLASS_CONSTRUCTOR(classname)
Defines a class constructor.
Definition: CRNObject.h:198
bool IsTrue() const noexcept
Is true?
Definition: CRNProp3.cpp:175
std::string ToUtf8(const std::basic_string< char32_t > &widestring)
std::basic_string< char32_t > FromUtf8(const std::string &utf8string)
A UTF32 character string class.
Definition: CRNString.h:61
void Deserialize(xml::Element &el)
Initializes the object from an XML element. Unsafe.
Definition: CRNString.cpp:738
static int & Precision() noexcept
Precision of the floating point conversion.
Definition: CRNString.cpp:38
Text PushBackText(const StringUTF8 &text, bool cdata=false)
Adds a text at the end of the children list.
Definition: CRNXml.cpp:467
static Prop3 False()
Definition: CRNProp3.h:89
void ShrinkToFit()
Optimizes the memory usage.
Definition: CRNString.cpp:788
A generic domain error.
Definition: CRNException.h:83
String & ReplaceSuffix(const String &old_suffix, const String &new_suffix)
Replaces suffix by another pattern if present.
Definition: CRNString.cpp:304
String SubString(size_t pos, size_t n=0) const
Extracts a part of the string.
Definition: CRNString.cpp:220
char32_t & operator[](size_t index)
Access to a character.
Definition: CRNString.cpp:137
std::vector< String > WhichSuffixes(const std::vector< String > &suffixes) const
Returns suffixes found within a collection.
Definition: CRNString.cpp:380
XML text.
Definition: CRNXml.h:394
#define CRN_DATA_FACTORY_REGISTER(elemname, classname)
Registers a class to the data factory.
String & FirstCharacterToUpper()
Converts the first character of string to uppercase.
Definition: CRNString.cpp:481
static String CreateUniqueId(size_t len=8)
Generates an almost unique id.
Definition: CRNString.cpp:799
size_t Length() const noexcept
Returns the length of the string.
Definition: CRNString.h:162
Prop3 ToProp3() const
Conversion to Prop3.
Definition: CRNString.cpp:176
size_t BackwardFindAnyOf(const String &s, size_t from_pos=NPos()) const
Finds the last occurrence of character in a list.
Definition: CRNString.cpp:674
Node GetFirstChild()
Gets the first child node.
Definition: CRNXml.cpp:303
std::vector< String > WhichPrefixes(const std::vector< String > &prefixes) const
Returns prefixes found within a collection.
Definition: CRNString.cpp:405
size_t FindAnyOf(const String &s, size_t from_pos=0) const
Finds the first occurrence of character in a list.
Definition: CRNString.cpp:611
String & ToLower()
Converts the string to lowercase.
Definition: CRNString.cpp:493
bool EndsWith(const String &s) const
Check if string has a given suffix.
Definition: CRNString.cpp:562
std::vector< String > WhichSubstrings(const std::vector< String > &substrings) const
Returns substring found within a collection.
Definition: CRNString.cpp:430
xml::Element Serialize(xml::Element &parent) const
Dumps the object to an XML element. Unsafe.
Definition: CRNString.cpp:759
String & Replace(const String &s, size_t pos, size_t n=0)
Replaces a part of the string.
Definition: CRNString.cpp:286
std::string & Std()&noexcept
Conversion to std string.
static Prop3 True()
Definition: CRNProp3.h:88
A character string class.
Definition: CRNStringUTF8.h:49
A ternary proposition.
Definition: CRNProp3.h:40
int EditDistance(const String &s) const
Edit distance.
Definition: CRNString.cpp:714
String & Erase(size_t pos, size_t n=0)
Erases a part of the string.
Definition: CRNString.cpp:262
String & ToUpper()
Converts the string to uppercase.
Definition: CRNString.cpp:469
Element PushBackElement(const StringUTF8 &name)
Adds an element at the end of the children list.
Definition: CRNXml.cpp:355
std::u32string & Std()&noexcept
Conversion to std u32string.
Definition: CRNString.h:122
String & Insert(size_t pos, const String &s)
Inserts a string.
Definition: CRNString.cpp:195
XML node.
Definition: CRNXml.h:60
Invalid argument error (e.g.: nullptr pointer)
Definition: CRNException.h:107
static size_t NPos() noexcept
Last position in a string.
Definition: CRNString.cpp:47
String & DeleteSuffix(const String &suffix)
Delete suffix if found.
Definition: CRNString.cpp:354
size_t FindNotOf(const String &s, size_t from_pos=0) const
Finds the first occurrence of character not in a list.
Definition: CRNString.cpp:631
String & Crop(size_t pos, size_t n=0)
Crops the string.
Definition: CRNString.cpp:247
#define CRN_BEGIN_CLASS_CONSTRUCTOR(classname)
Defines a class constructor.
Definition: CRNObject.h:185