libcrn  3.9.5
A document image processing library
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CRNAltoTextBlock.cpp
Go to the documentation of this file.
1 /* Copyright 2011-2016 CoReNum
2  *
3  * This file is part of libcrn.
4  *
5  * libcrn is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * libcrn is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with libcrn. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * file: CRNAltoTextBlock.cpp
19  * \author Yann LEYDIER
20  */
21 
22 #include <CRNXml/CRNAlto.h>
23 #include <CRNException.h>
24 #include <CRNString.h>
25 #include <algorithm>
26 #include <CRNi18n.h>
27 
28 using namespace crn;
29 using namespace xml;
30 
31 
33 // TextBlock
35 
41 Alto::Layout::Page::Space::TextBlock::TextBlock(const Element &el):
42  Block(el)
43 {
44  update_subelements();
45 }
46 
48 void Alto::Layout::Page::Space::TextBlock::update_subelements()
49 {
50  lines.clear();
51  id_lines.clear();
52  Element cel = GetFirstChildElement("TextLine");
53  while (cel)
54  {
55  lines.push_back(std::shared_ptr<TextLine>(new TextLine(cel)));
56  id_lines[lines.back()->GetId()] = lines.back();
57  cel = cel.GetNextSiblingElement("TextLine");
58  }
59 }
60 
70 Alto::Layout::Page::Space::TextBlock::TextBlock(const Element &el, const Id &id_, int x, int y, int w, int h):
71  Block(el, id_, x, y, w, h)
72 { }
73 
75 Option<StringUTF8> Alto::Layout::Page::Space::TextBlock::GetLanguage() const
76 {
77  Option<StringUTF8> language;
78  StringUTF8 str = GetAttribute<StringUTF8>("language");
79  if (str.IsNotEmpty())
80  language = str;
81  return language;
82 }
83 
85 std::vector<Alto::Layout::Page::Space::TextBlock::TextLinePtr> Alto::Layout::Page::Space::TextBlock::GetTextLines() const
86 {
87  if (GetNbSubelements() != lines.size())
88  const_cast<TextBlock*>(this)->update_subelements();
89  return std::vector<TextLinePtr>(lines.begin(), lines.end());
90 }
91 
97 Alto::Layout::Page::Space::TextBlock::TextLine& Alto::Layout::Page::Space::TextBlock::GetTextLine(const Id &id_)
98 {
99  if (GetNbSubelements() != lines.size())
100  const_cast<TextBlock*>(this)->update_subelements();
101 
102  std::map<Id, TextLinePtr>::iterator it(id_lines.find(id_));
103  if ((it != id_lines.end()) && !it->second.expired())
104  return *(it->second.lock());
105  for (const std::shared_ptr<TextLine> &tl : lines)
106  {
107  if (tl->GetId() == id_)
108  {
109  id_lines[id_] = tl;
110  return *tl;
111  }
112  }
113  throw ExceptionNotFound(_("The block contains no text line with this id."));
114 }
115 
123 Alto::Layout::Page::Space::TextBlock::TextLine& Alto::Layout::Page::Space::TextBlock::AddTextLine(const Id &id_, double x, double y, double w, double h)
124 {
125  lines.push_back(std::shared_ptr<TextLine>(new TextLine(PushBackElement("TextLine"), id_, x, y, w, h)));
126  id_lines[id_] = lines.back();
127  return *lines.back();
128 }
129 
139 Alto::Layout::Page::Space::TextBlock::TextLine& Alto::Layout::Page::Space::TextBlock::AddTextLineAfter(const Id &pred, const Id &id_, double x, double y, double w, double h)
140 {
141  for (std::vector<std::shared_ptr<TextLine> >::iterator it = lines.begin(); it != lines.end(); ++it)
142  {
143  if ((*it)->GetId() == pred)
144  {
145  Element pel(**it);
146  ++it;
147  if (it == lines.end())
148  return AddTextLine(id_, x, y, w, h);
149  else
150  {
151  std::shared_ptr<TextLine> tl(new TextLine(InsertElement(pel, "TextLine"), id_, x, y, w, h));
152  lines.insert(it, tl);
153  id_lines[id_] = tl;
154  return *tl;
155  }
156  }
157  }
158  throw crn::ExceptionNotFound(_("Page not found."));
159 }
160 
170 Alto::Layout::Page::Space::TextBlock::TextLine& Alto::Layout::Page::Space::TextBlock::AddTextLineBefore(const Id &next, const Id &id_, double x, double y, double w, double h)
171 {
172  for (std::vector<std::shared_ptr<TextLine> >::iterator it = lines.begin(); it != lines.end(); ++it)
173  {
174  if ((*it)->GetId() == next)
175  {
176  std::shared_ptr<TextLine> newline;
177  if (it == lines.begin())
178  newline.reset(new TextLine(PushFrontElement("TextLine"), id_, x, y, w, h));
179  else
180  newline.reset(new TextLine(InsertElement(**(it - 1), "TextLine"), id_, x, y, w, h));
181  lines.insert(it, newline);
182  id_lines[id_] = newline;
183  return *newline;
184  }
185  }
186  throw crn::ExceptionNotFound(_("Page not found."));
187 }
188 
193 void Alto::Layout::Page::Space::TextBlock::RemoveTextLine(const Id &tid)
194 {
195  for (std::vector<std::shared_ptr<TextLine> >::iterator it = lines.begin(); it != lines.end(); ++it)
196  {
197  if ((*it)->GetId() == tid)
198  {
199  RemoveChild(**it);
200  lines.erase(it);
201  id_lines.erase(tid);
202  }
203  }
204  throw crn::ExceptionNotFound(_("Page not found."));
205 }
206 
208 // TextLine
210 
216 Alto::Layout::Page::Space::TextBlock::TextLine::TextLine(const Element &el):
217  Element(el)
218 {
219  id = GetAttribute<StringUTF8>("ID", false); // may throw;
220  GetAttribute<double>("HEIGHT", false); // may throw
221  GetAttribute<double>("WIDTH", false); // may throw
222  GetAttribute<double>("HPOS", false); // may throw
223  GetAttribute<double>("VPOS", false); // may throw
224 
225  update_subelements();
226 }
227 
229 void Alto::Layout::Page::Space::TextBlock::TextLine::update_subelements()
230 {
231  lineElements.clear();
232  words.clear();
233  id_words.clear();
234 
235  for (Element cel = BeginElement(); cel != EndElement(); ++cel)
236  {
237  std::shared_ptr<LineElement> newnode(NULL);
238  String elname(cel.GetName());
239  if (elname == "String")
240  {
241  newnode.reset(new Word(cel));
242  words.push_back(std::static_pointer_cast<Word>(newnode));
243  if (words.back().lock()->GetId())
244  id_words[words.back().lock()->GetId().Get()] = words.back();
245  }
246  else if (elname == "SP")
247  {
248  newnode.reset(new WhiteSpace(cel));
249  }
250  else if (elname == "HYP")
251  {
252  newnode.reset(new Hyphen(cel));
253  }
254  if (newnode)
255  lineElements.push_back(newnode);
256  }
257 }
258 
268 Alto::Layout::Page::Space::TextBlock::TextLine::TextLine(const Element &el, const Id &id_, double x, double y, double w, double h):
269  Element(el),
270  id(id_)
271 {
272  SetAttribute("ID", id);
273  SetAttribute("HPOS", x);
274  SetAttribute("VPOS", y);
275  SetAttribute("WIDTH", w);
276  SetAttribute("HEIGHT", h);
277 }
278 
280 std::vector<Id> Alto::Layout::Page::Space::TextBlock::TextLine::GetStyles() const
281 {
282  return GetStyleRefs(*this);
283 }
284 
288 void Alto::Layout::Page::Space::TextBlock::TextLine::AddStyle(const Id &styleid)
289 {
290  AddStyleRef(*this, styleid);
291 }
292 
296 void Alto::Layout::Page::Space::TextBlock::TextLine::RemoveStyle(const Id &styleid)
297 {
298  RemoveStyleRef(*this, styleid);
299 }
300 
302 double Alto::Layout::Page::Space::TextBlock::TextLine::GetWidth() const
303 {
304  return GetAttribute<double>("WIDTH", false); // should not throw
305 }
306 
310 void Alto::Layout::Page::Space::TextBlock::TextLine::SetWidth(double d)
311 {
312  SetAttribute("WIDTH", d);
313 }
314 
316 double Alto::Layout::Page::Space::TextBlock::TextLine::GetHeight() const
317 {
318  return GetAttribute<double>("HEIGHT", false); // should not throw
319 }
320 
324 void Alto::Layout::Page::Space::TextBlock::TextLine::SetHeight(double d)
325 {
326  SetAttribute("HEIGHT", d);
327 }
328 
330 double Alto::Layout::Page::Space::TextBlock::TextLine::GetHPos() const
331 {
332  return GetAttribute<double>("HPOS", false); // should not throw
333 }
334 
338 void Alto::Layout::Page::Space::TextBlock::TextLine::SetHPos(double d)
339 {
340  SetAttribute("HPOS", d);
341 }
342 
344 double Alto::Layout::Page::Space::TextBlock::TextLine::GetVPos() const
345 {
346  return GetAttribute<double>("VPOS", false); // should not throw
347 }
348 
352 void Alto::Layout::Page::Space::TextBlock::TextLine::SetVPos(double d)
353 {
354  SetAttribute("VPOS", d);
355 }
356 
358 Option<double> Alto::Layout::Page::Space::TextBlock::TextLine::GetBaseline() const
359 {
360  Option<double> baseline;
361  try { baseline = GetAttribute<double>("BASELINE", false); } catch (...) { }
362  return baseline;
363 }
364 
369 void Alto::Layout::Page::Space::TextBlock::TextLine::SetBaseline(double d, bool check_bounds)
370 {
371  if (check_bounds && ((d < GetVPos()) || (d > GetVPos() + GetHeight())))
372  throw ExceptionDomain(_("The baseline is out of the line's bounds"));
373  SetAttribute("BASELINE", d);
374 }
375 
377 void Alto::Layout::Page::Space::TextBlock::TextLine::UnsetBaseline()
378 {
379  RemoveAttribute("BASELINE");
380 }
381 
383 Option<bool> Alto::Layout::Page::Space::TextBlock::TextLine::GetManuallyCorrected() const
384 {
385  Option<bool> manuallyCorrected;
386  try { manuallyCorrected = GetAttribute<bool>("CS", false); } catch (...) { }
387  return manuallyCorrected;
388 }
389 
393 void Alto::Layout::Page::Space::TextBlock::TextLine::SetManuallyCorrected(bool c)
394 {
395  SetAttribute("CS", c);
396 }
397 
399 void Alto::Layout::Page::Space::TextBlock::TextLine::UnsetManuallyCorrected()
400 {
401  RemoveAttribute("CS");
402 }
403 
405 std::vector<Alto::Layout::Page::Space::TextBlock::TextLine::LineElementPtr> Alto::Layout::Page::Space::TextBlock::TextLine::GetLineElements() const
406 {
407  if (GetNbSubelements() != lineElements.size())
408  const_cast<TextLine*>(this)->update_subelements();
409  return std::vector<LineElementPtr>(lineElements.begin(), lineElements.end());
410 }
411 
413 const std::vector<Alto::Layout::Page::Space::TextBlock::TextLine::WordPtr>& Alto::Layout::Page::Space::TextBlock::TextLine::GetWords() const
414 {
415  if (GetNbSubelements() != lineElements.size())
416  const_cast<TextLine*>(this)->update_subelements();
417  return words;
418 }
419 
425 Alto::Layout::Page::Space::TextBlock::TextLine::Word& Alto::Layout::Page::Space::TextBlock::TextLine::GetWord(const Id &id_)
426 {
427  if (GetNbSubelements() != lineElements.size())
428  update_subelements();
429  std::map<Id, WordPtr>::iterator it(id_words.find(id_));
430  if ((it != id_words.end()) && !it->second.expired())
431  return *(it->second.lock());
432  for (const WordPtr &word : words)
433  {
434  const std::shared_ptr<Word> sword(word.lock());
435  if (sword->GetId() && (sword->GetId().Get() == id_))
436  {
437  id_words[id_] = word;
438  return *sword;
439  }
440  }
441  throw ExceptionNotFound(_("The line contains no word with this id."));
442 }
443 
452 Alto::Layout::Page::Space::TextBlock::TextLine::Word& Alto::Layout::Page::Space::TextBlock::TextLine::AddWord(const Id &id_, const StringUTF8 &text, const Option<double> &x, const Option<double> &y, const Option<double> &w, const Option<double> &h)
453 {
454  std::shared_ptr<Word> word(new Word(PushBackElement("String"), id_, text, x, y, w, h));
455  words.push_back(word);
456  id_words[id_] = word;
457  lineElements.push_back(word);
458  return *word;
459 }
460 
471 Alto::Layout::Page::Space::TextBlock::TextLine::Word& Alto::Layout::Page::Space::TextBlock::TextLine::AddWordAfter(const Id &pred, const Id &id_, const StringUTF8 &text, const Option<double> &x, const Option<double> &y, const Option<double> &w, const Option<double> &h)
472 {
473  for (std::vector<WordPtr>::iterator it = words.begin(); it != words.end(); ++it)
474  {
475  std::shared_ptr<Word> sw(it->lock());
476  if (sw->GetId())
477  {
478  if (sw->GetId().Get() == pred)
479  {
480  Element &pw(*sw);
481  ++it;
482  if (it == words.end())
483  return AddWord(id_, text, x, y, w, h);
484  else
485  {
486  std::shared_ptr<Word> nw(new Word(InsertElement(pw, "String"), id_, text, x, y, w, h));
487  words.insert(it, nw);
488  id_words[id_] = nw;
489  sw = it->lock();
490  std::vector<std::shared_ptr<LineElement> >::iterator lit = std::find(lineElements.begin(), lineElements.end(), sw);
491  if (lit == lineElements.end())
492  lineElements.push_back(nw);
493  else
494  {
495  ++lit;
496  if (lit == lineElements.end())
497  lineElements.push_back(nw);
498  else
499  lineElements.insert(lit, nw);
500  }
501  return *nw;
502  }
503  } // found predecessor
504  } // has an Id
505  }
506  throw ExceptionNotFound(_("Cannot find element."));
507 }
508 
519 Alto::Layout::Page::Space::TextBlock::TextLine::Word& Alto::Layout::Page::Space::TextBlock::TextLine::AddWordBefore(const Id &next, const Id &id_, const StringUTF8 &text, const Option<double> &x, const Option<double> &y, const Option<double> &w, const Option<double> &h)
520 {
521  for (std::vector<WordPtr>::iterator it = words.begin(); it != words.end(); ++it)
522  {
523  const std::shared_ptr<Word> sw(it->lock());
524  if (sw->GetId())
525  {
526  if (sw->GetId().Get() == next)
527  {
528  std::shared_ptr<Word> nw;
529  if (it == words.begin())
530  nw.reset(new Word(PushFrontElement("String"), id_, text, x, y, w, h));
531  else
532  nw.reset(new Word(InsertElement(*(it - 1)->lock(), "String"), id_, text, x, y, w, h));
533  words.insert(it, nw);
534  id_words[id_] = nw;
535  std::vector<std::shared_ptr<LineElement> >::iterator lit = std::find(lineElements.begin(), lineElements.end(), sw);
536  if (lit == lineElements.end())
537  lineElements.push_back(nw);
538  else
539  lineElements.insert(lit, nw);
540  return *nw;
541  } // found successor
542  } // has an Id
543  }
544  throw ExceptionNotFound(_("Cannot find element."));
545 }
546 
551 void Alto::Layout::Page::Space::TextBlock::TextLine::RemoveWord(const Id &wid)
552 {
553  for (std::vector<WordPtr>::iterator it = words.begin(); it != words.end(); ++it)
554  {
555  const std::shared_ptr<Word> sw(it->lock());
556  if (sw->GetId())
557  {
558  if (sw->GetId().Get() == wid)
559  {
560  RemoveChild(*sw);
561  std::vector<std::shared_ptr<LineElement> >::iterator lit = std::find(lineElements.begin(), lineElements.end(), sw);
562  words.erase(it);
563  id_words.erase(wid);
564  if (lit != lineElements.end())
565  lineElements.erase(lit);
566  return;
567  }
568  }
569  }
570  throw ExceptionNotFound(_("Cannot find word."));
571 }
572 
574 // LineElement
576 
581 Alto::Layout::Page::Space::TextBlock::TextLine::LineElement::LineElement(const Element &el):
582  Element(el)
583 {
584  if (!el)
585  throw ExceptionInvalidArgument(_("Null node."));
586 }
587 
595 Alto::Layout::Page::Space::TextBlock::TextLine::LineElement::LineElement(const Element &el, const Option<double> &x, const Option<double> &y, const Option<double> &w):
596  Element(el)
597 {
598  if (!el)
599  throw ExceptionInvalidArgument(_("Null node."));
600  if (x)
601  SetAttribute("HPOS", *x);
602  if (y)
603  SetAttribute("VPOS", *y);
604  if (w)
605  SetAttribute("WIDTH", *w);
606 }
607 
609 Option<double> Alto::Layout::Page::Space::TextBlock::TextLine::LineElement::GetWidth() const
610 {
611  Option<double> width;
612  try { width = GetAttribute<double>("WIDTH", false); } catch (...) { }
613  return width;
614 }
615 
619 void Alto::Layout::Page::Space::TextBlock::TextLine::LineElement::SetWidth(double d)
620 {
621  SetAttribute("WIDTH", d);
622 }
623 
625 Option<double> Alto::Layout::Page::Space::TextBlock::TextLine::LineElement::GetHPos() const
626 {
627  Option<double> hpos;
628  try { hpos = GetAttribute<double>("HPOS", false); } catch (...) { }
629  return hpos;
630 }
631 
635 void Alto::Layout::Page::Space::TextBlock::TextLine::LineElement::SetHPos(double d)
636 {
637  SetAttribute("HPOS", d);
638 }
639 
641 Option<double> Alto::Layout::Page::Space::TextBlock::TextLine::LineElement::GetVPos() const
642 {
643  Option<double> vpos;
644  try { vpos = GetAttribute<double>("VPOS", false); } catch (...) { }
645  return vpos;
646 }
647 
651 void Alto::Layout::Page::Space::TextBlock::TextLine::LineElement::SetVPos(double d)
652 {
653  SetAttribute("VPOS", d);
654 }
655 
657 // Word
659 
665 Alto::Layout::Page::Space::TextBlock::TextLine::Word::Word(const Element &el):
666  LineElement(el)
667 {
668  StringUTF8 str = GetAttribute<StringUTF8>("ID");
669  if (str.IsNotEmpty())
670  id = str;
671  GetAttribute<StringUTF8>("CONTENT", false); // may throw
672 
673  /*
674  for (Element cel = BeginElement(); cel != EndElement(); ++cel)
675  {
676  // TODO alternatives
677  }
678  */
679 }
680 
690 Alto::Layout::Page::Space::TextBlock::TextLine::Word::Word(const Element &el, const Id &id_, const StringUTF8 &text, const Option<double> &x, const Option<double> &y, const Option<double> &w, const Option<double> &h):
691  LineElement(el, x, y, w),
692  id(id_)
693 {
694  SetAttribute("ID", *id);
695  SetAttribute("CONTENT", text);
696  if (h)
697  SetAttribute("HEIGHT", *h);
698 }
699 
701 std::vector<Id> Alto::Layout::Page::Space::TextBlock::TextLine::Word::GetStyles() const
702 {
703  return GetStyleRefs(*this);
704 }
705 
709 void Alto::Layout::Page::Space::TextBlock::TextLine::Word::AddStyle(const Id &styleid)
710 {
711  AddStyleRef(*this, styleid);
712 }
713 
717 void Alto::Layout::Page::Space::TextBlock::TextLine::Word::RemoveStyle(const Id &styleid)
718 {
719  RemoveStyleRef(*this, styleid);
720 }
721 
723 StringUTF8 Alto::Layout::Page::Space::TextBlock::TextLine::Word::GetContent() const
724 {
725  return GetAttribute<StringUTF8>("CONTENT", false); // should not throw
726 }
727 
731 void Alto::Layout::Page::Space::TextBlock::TextLine::Word::SetContent(const StringUTF8 &s)
732 {
733  SetAttribute("CONTENT", s);
734 }
735 
737 Option<double> Alto::Layout::Page::Space::TextBlock::TextLine::Word::GetHeight() const
738 {
739  Option<double> height;
740  try { height = GetAttribute<double>("HEIGHT", false); } catch (...) { }
741  return height;
742 }
743 
747 void Alto::Layout::Page::Space::TextBlock::TextLine::Word::SetHeight(double d)
748 {
749  SetAttribute("HEIGHT", d);
750 }
751 
754 Option<Alto::Styles::Text::FontStyle> Alto::Layout::Page::Space::TextBlock::TextLine::Word::GetFontStyle() const
755 {
757  StringUTF8 str = GetAttribute<StringUTF8>("STYLE");
758  if (str.IsNotEmpty())
759  {
760  Alto::Styles::Text::FontStyle val = Alto::Styles::Text::FontStyle::Undef;
761  if (str.Find("bold") != StringUTF8::NPos())
762  val |= Alto::Styles::Text::FontStyle::Bold;
763  if (str.Find("italics") != StringUTF8::NPos())
764  val |= Alto::Styles::Text::FontStyle::Italics;
765  if (str.Find("subscript") != StringUTF8::NPos())
766  val |= Alto::Styles::Text::FontStyle::Subscript;
767  if (str.Find("superscript") != StringUTF8::NPos())
768  val |= Alto::Styles::Text::FontStyle::Superscript;
769  if (str.Find("smallcaps") != StringUTF8::NPos())
770  val |= Alto::Styles::Text::FontStyle::SmallCaps;
771  if (str.Find("underline") != StringUTF8::NPos())
772  val |= Alto::Styles::Text::FontStyle::Underline;
773  if (val != Alto::Styles::Text::FontStyle::Undef)
774  fontStyle = val;
775  }
776  return fontStyle;
777 }
778 
782 void Alto::Layout::Page::Space::TextBlock::TextLine::Word::SetFontStyle(Alto::Styles::Text::FontStyle fs)
783 {
784  StringUTF8 attr;
785  if (!!(fs & Alto::Styles::Text::FontStyle::Bold))
786  attr += "bold ";
787  if (!!(fs & Alto::Styles::Text::FontStyle::Italics))
788  attr += "italics ";
789  if (!!(fs & Alto::Styles::Text::FontStyle::Subscript))
790  attr += "subscript ";
791  if (!!(fs & Alto::Styles::Text::FontStyle::Superscript))
792  attr += "superscript ";
793  if (!!(fs & Alto::Styles::Text::FontStyle::SmallCaps))
794  attr += "smallcaps ";
795  if (!!(fs & Alto::Styles::Text::FontStyle::Underline))
796  attr += "underline ";
797  SetAttribute("STYLE", attr);
798 }
799 
802 void Alto::Layout::Page::Space::TextBlock::TextLine::Word::UnsetFontStyle()
803 {
804  RemoveAttribute("STYLE");
805 }
806 
808 Option<Alto::Layout::Page::Space::TextBlock::TextLine::Word::SubstitutionType> Alto::Layout::Page::Space::TextBlock::TextLine::Word::GetSubstitutionType() const
809 {
810  Option<SubstitutionType> substitutionType;
811  StringUTF8 str = GetAttribute<StringUTF8>("SUBS_TYPE");
812  if (str.IsNotEmpty())
813  {
814  if (str == "HypPart1")
815  substitutionType = SubstitutionType::HypPart1;
816  else if (str == "HypPart2")
817  substitutionType = SubstitutionType::HypPart2;
818  else if (str == "Abbreviation")
819  substitutionType = SubstitutionType::Abbreviation;
820  }
821  return substitutionType;
822 }
823 
825 Option<StringUTF8> Alto::Layout::Page::Space::TextBlock::TextLine::Word::GetSubstitutionContent() const
826 {
827  Option<StringUTF8> substitutionContent;
828  StringUTF8 str = GetAttribute<StringUTF8>("SUBS_CONTENT");
829  if (str.IsNotEmpty())
830  substitutionContent = str;
831  return substitutionContent;
832 }
833 
839 void Alto::Layout::Page::Space::TextBlock::TextLine::Word::SetSubstitution(SubstitutionType stype, const StringUTF8 &scontent)
840 {
841  StringUTF8 atyp;
842  switch (stype)
843  {
844  case SubstitutionType::HypPart1:
845  atyp = "HypPart1";
846  break;
847  case SubstitutionType::HypPart2:
848  atyp = "HypPart2";
849  break;
850  case SubstitutionType::Abbreviation:
851  atyp = "Abbreviation";
852  break;
853  default:
854  throw crn::ExceptionInvalidArgument(_("Wrong type of substitution."));
855  }
856  SetAttribute("SUBS_TYPE", atyp);
857  SetAttribute("SUBS_CONTENT", scontent);
858 }
859 
861 Option<double> Alto::Layout::Page::Space::TextBlock::TextLine::Word::GetWC() const
862 {
863  Option<double> wordConfidence;
864  try { wordConfidence = GetAttribute<double>("WC", false); } catch (...) { }
865  return wordConfidence;
866 }
867 
872 void Alto::Layout::Page::Space::TextBlock::TextLine::Word::SetWC(double conf)
873 {
874  if ((conf < 0.0) || (conf > 1.0))
875  throw crn::ExceptionDomain(_("The word confidence must be in [0, 1]."));
876  SetAttribute("WC", conf);
877 }
878 
882 void Alto::Layout::Page::Space::TextBlock::TextLine::Word::UnsetWC()
883 {
884  RemoveAttribute("WC");
885 }
886 
887 
889 Option<StringUTF8> Alto::Layout::Page::Space::TextBlock::TextLine::Word::GetCC() const
890 {
891  Option<StringUTF8> charactersConfidence;
892  StringUTF8 str = GetAttribute<StringUTF8>("CC");
893  if (str.IsNotEmpty())
894  charactersConfidence = str;
895  return charactersConfidence;
896 }
897 
899 // WhiteSpace
901 
907 Alto::Layout::Page::Space::TextBlock::TextLine::WhiteSpace::WhiteSpace(const Element &el):
908  LineElement(el)
909 {
910  StringUTF8 str = GetAttribute<StringUTF8>("ID");
911  if (str.IsNotEmpty())
912  id = str;
913 }
914 
916 // Hyphen
918 
924 Alto::Layout::Page::Space::TextBlock::TextLine::Hyphen::Hyphen(const Element &el):
925  LineElement(el)
926 {
927  GetAttribute<StringUTF8>("CONTENT", false); // may throw
928 }
929 
931 StringUTF8 Alto::Layout::Page::Space::TextBlock::TextLine::Hyphen::GetContent() const
932 {
933  return GetAttribute<StringUTF8>("CONTENT", false); // should not throw
934 }
935 
939 void Alto::Layout::Page::Space::TextBlock::TextLine::Hyphen::SetContent(const StringUTF8 &s)
940 {
941  SetAttribute("CONTENT", s);
942 }
943 
XML element.
Definition: CRNXml.h:135
StringUTF8 GetName() const
Gets the label of the element.
Definition: CRNXml.h:146
#define _(String)
Definition: CRNi18n.h:51
std::vector< Id > GetStyleRefs(const Element &el)
Gets the list of style references.
bool IsNotEmpty() const noexcept
Checks if the string is not empty.
Element GetNextSiblingElement(const StringUTF8 &name="")
Gets the next sibling element.
Definition: CRNXml.cpp:247
A UTF32 character string class.
Definition: CRNString.h:61
void AddStyleRef(Element &el, const Id &id)
Adds a style reference to an element.
A generic domain error.
Definition: CRNException.h:83
A block.
Definition: CRNBlock.h:52
void RemoveStyleRef(Element &el, const Id &id)
Removes a style reference to an element.
A character string class.
Definition: CRNStringUTF8.h:49
size_t Find(const StringUTF8 &s, size_t from_pos=0) const
Finds the first occurrence of a string.
A class to store an optional value.
Definition: CRNOption.h:33
Invalid argument error (e.g.: nullptr pointer)
Definition: CRNException.h:107
An item was not found in a container.
Definition: CRNException.h:95