41 Alto::Layout::Page::Space::TextBlock::TextBlock(
const Element &el):
48 void Alto::Layout::Page::Space::TextBlock::update_subelements()
52 Element cel = GetFirstChildElement(
"TextLine");
55 lines.push_back(std::shared_ptr<TextLine>(
new TextLine(cel)));
56 id_lines[lines.back()->GetId()] = lines.back();
70 Alto::Layout::Page::Space::TextBlock::TextBlock(
const Element &el,
const Id &id_,
int x,
int y,
int w,
int h):
71 Block(el, id_, x, y, w, h)
78 StringUTF8 str = GetAttribute<StringUTF8>(
"language");
85 std::vector<Alto::Layout::Page::Space::TextBlock::TextLinePtr> Alto::Layout::Page::Space::TextBlock::GetTextLines()
const
87 if (GetNbSubelements() != lines.size())
88 const_cast<TextBlock*>(
this)->update_subelements();
89 return std::vector<TextLinePtr>(lines.begin(), lines.end());
97 Alto::Layout::Page::Space::TextBlock::TextLine& Alto::Layout::Page::Space::TextBlock::GetTextLine(
const Id &id_)
99 if (GetNbSubelements() != lines.size())
100 const_cast<TextBlock*>(
this)->update_subelements();
102 std::map<Id, TextLinePtr>::iterator it(id_lines.find(id_));
103 if ((it != id_lines.end()) && !it->second.expired())
104 return *(it->second.lock());
105 for (
const std::shared_ptr<TextLine> &tl : lines)
107 if (tl->GetId() == id_)
123 Alto::Layout::Page::Space::TextBlock::TextLine& Alto::Layout::Page::Space::TextBlock::AddTextLine(
const Id &id_,
double x,
double y,
double w,
double h)
125 lines.push_back(std::shared_ptr<TextLine>(
new TextLine(PushBackElement(
"TextLine"), id_, x, y, w, h)));
126 id_lines[id_] = lines.back();
127 return *lines.back();
139 Alto::Layout::Page::Space::TextBlock::TextLine& Alto::Layout::Page::Space::TextBlock::AddTextLineAfter(
const Id &pred,
const Id &id_,
double x,
double y,
double w,
double h)
141 for (std::vector<std::shared_ptr<TextLine> >::iterator it = lines.begin(); it != lines.end(); ++it)
143 if ((*it)->GetId() == pred)
147 if (it == lines.end())
148 return AddTextLine(id_, x, y, w, h);
151 std::shared_ptr<TextLine> tl(
new TextLine(InsertElement(pel,
"TextLine"), id_, x, y, w, h));
152 lines.insert(it, tl);
170 Alto::Layout::Page::Space::TextBlock::TextLine& Alto::Layout::Page::Space::TextBlock::AddTextLineBefore(
const Id &next,
const Id &id_,
double x,
double y,
double w,
double h)
172 for (std::vector<std::shared_ptr<TextLine> >::iterator it = lines.begin(); it != lines.end(); ++it)
174 if ((*it)->GetId() == next)
176 std::shared_ptr<TextLine> newline;
177 if (it == lines.begin())
178 newline.reset(
new TextLine(PushFrontElement(
"TextLine"), id_, x, y, w, h));
180 newline.reset(
new TextLine(InsertElement(**(it - 1),
"TextLine"), id_, x, y, w, h));
181 lines.insert(it, newline);
182 id_lines[id_] = newline;
193 void Alto::Layout::Page::Space::TextBlock::RemoveTextLine(
const Id &tid)
195 for (std::vector<std::shared_ptr<TextLine> >::iterator it = lines.begin(); it != lines.end(); ++it)
197 if ((*it)->GetId() == tid)
216 Alto::Layout::Page::Space::TextBlock::TextLine::TextLine(
const Element &el):
219 id = GetAttribute<StringUTF8>(
"ID",
false);
220 GetAttribute<double>(
"HEIGHT",
false);
221 GetAttribute<double>(
"WIDTH",
false);
222 GetAttribute<double>(
"HPOS",
false);
223 GetAttribute<double>(
"VPOS",
false);
225 update_subelements();
229 void Alto::Layout::Page::Space::TextBlock::TextLine::update_subelements()
231 lineElements.clear();
235 for (
Element cel = BeginElement(); cel != EndElement(); ++cel)
237 std::shared_ptr<LineElement> newnode(NULL);
239 if (elname ==
"String")
241 newnode.reset(
new Word(cel));
242 words.push_back(std::static_pointer_cast<Word>(newnode));
243 if (words.back().lock()->GetId())
244 id_words[words.back().lock()->GetId().Get()] = words.back();
246 else if (elname ==
"SP")
248 newnode.reset(
new WhiteSpace(cel));
250 else if (elname ==
"HYP")
252 newnode.reset(
new Hyphen(cel));
255 lineElements.push_back(newnode);
268 Alto::Layout::Page::Space::TextBlock::TextLine::TextLine(
const Element &el,
const Id &id_,
double x,
double y,
double w,
double h):
272 SetAttribute(
"ID",
id);
273 SetAttribute(
"HPOS", x);
274 SetAttribute(
"VPOS", y);
275 SetAttribute(
"WIDTH", w);
276 SetAttribute(
"HEIGHT", h);
280 std::vector<Id> Alto::Layout::Page::Space::TextBlock::TextLine::GetStyles()
const
288 void Alto::Layout::Page::Space::TextBlock::TextLine::AddStyle(
const Id &styleid)
296 void Alto::Layout::Page::Space::TextBlock::TextLine::RemoveStyle(
const Id &styleid)
302 double Alto::Layout::Page::Space::TextBlock::TextLine::GetWidth()
const
304 return GetAttribute<double>(
"WIDTH",
false);
310 void Alto::Layout::Page::Space::TextBlock::TextLine::SetWidth(
double d)
312 SetAttribute(
"WIDTH", d);
316 double Alto::Layout::Page::Space::TextBlock::TextLine::GetHeight()
const
318 return GetAttribute<double>(
"HEIGHT",
false);
324 void Alto::Layout::Page::Space::TextBlock::TextLine::SetHeight(
double d)
326 SetAttribute(
"HEIGHT", d);
330 double Alto::Layout::Page::Space::TextBlock::TextLine::GetHPos()
const
332 return GetAttribute<double>(
"HPOS",
false);
338 void Alto::Layout::Page::Space::TextBlock::TextLine::SetHPos(
double d)
340 SetAttribute(
"HPOS", d);
344 double Alto::Layout::Page::Space::TextBlock::TextLine::GetVPos()
const
346 return GetAttribute<double>(
"VPOS",
false);
352 void Alto::Layout::Page::Space::TextBlock::TextLine::SetVPos(
double d)
354 SetAttribute(
"VPOS", d);
358 Option<double> Alto::Layout::Page::Space::TextBlock::TextLine::GetBaseline()
const
361 try { baseline = GetAttribute<double>(
"BASELINE",
false); }
catch (...) { }
369 void Alto::Layout::Page::Space::TextBlock::TextLine::SetBaseline(
double d,
bool check_bounds)
371 if (check_bounds && ((d < GetVPos()) || (d > GetVPos() + GetHeight())))
373 SetAttribute(
"BASELINE", d);
377 void Alto::Layout::Page::Space::TextBlock::TextLine::UnsetBaseline()
379 RemoveAttribute(
"BASELINE");
383 Option<bool> Alto::Layout::Page::Space::TextBlock::TextLine::GetManuallyCorrected()
const
386 try { manuallyCorrected = GetAttribute<bool>(
"CS",
false); }
catch (...) { }
387 return manuallyCorrected;
393 void Alto::Layout::Page::Space::TextBlock::TextLine::SetManuallyCorrected(
bool c)
395 SetAttribute(
"CS", c);
399 void Alto::Layout::Page::Space::TextBlock::TextLine::UnsetManuallyCorrected()
401 RemoveAttribute(
"CS");
405 std::vector<Alto::Layout::Page::Space::TextBlock::TextLine::LineElementPtr> Alto::Layout::Page::Space::TextBlock::TextLine::GetLineElements()
const
407 if (GetNbSubelements() != lineElements.size())
408 const_cast<TextLine*>(
this)->update_subelements();
409 return std::vector<LineElementPtr>(lineElements.begin(), lineElements.end());
413 const std::vector<Alto::Layout::Page::Space::TextBlock::TextLine::WordPtr>& Alto::Layout::Page::Space::TextBlock::TextLine::GetWords()
const
415 if (GetNbSubelements() != lineElements.size())
416 const_cast<TextLine*>(
this)->update_subelements();
425 Alto::Layout::Page::Space::TextBlock::TextLine::Word& Alto::Layout::Page::Space::TextBlock::TextLine::GetWord(
const Id &id_)
427 if (GetNbSubelements() != lineElements.size())
428 update_subelements();
429 std::map<Id, WordPtr>::iterator it(id_words.find(id_));
430 if ((it != id_words.end()) && !it->second.expired())
431 return *(it->second.lock());
432 for (
const WordPtr &word : words)
434 const std::shared_ptr<Word> sword(word.lock());
435 if (sword->GetId() && (sword->GetId().Get() == id_))
437 id_words[id_] = word;
454 std::shared_ptr<Word> word(
new Word(PushBackElement(
"String"), id_, text, x, y, w, h));
455 words.push_back(word);
456 id_words[id_] = word;
457 lineElements.push_back(word);
473 for (std::vector<WordPtr>::iterator it = words.begin(); it != words.end(); ++it)
475 std::shared_ptr<Word> sw(it->lock());
478 if (sw->GetId().Get() == pred)
482 if (it == words.end())
483 return AddWord(id_, text, x, y, w, h);
486 std::shared_ptr<Word> nw(
new Word(InsertElement(pw,
"String"), id_, text, x, y, w, h));
487 words.insert(it, nw);
490 std::vector<std::shared_ptr<LineElement> >::iterator lit = std::find(lineElements.begin(), lineElements.end(), sw);
491 if (lit == lineElements.end())
492 lineElements.push_back(nw);
496 if (lit == lineElements.end())
497 lineElements.push_back(nw);
499 lineElements.insert(lit, nw);
521 for (std::vector<WordPtr>::iterator it = words.begin(); it != words.end(); ++it)
523 const std::shared_ptr<Word> sw(it->lock());
526 if (sw->GetId().Get() == next)
528 std::shared_ptr<Word> nw;
529 if (it == words.begin())
530 nw.reset(
new Word(PushFrontElement(
"String"), id_, text, x, y, w, h));
532 nw.reset(
new Word(InsertElement(*(it - 1)->lock(),
"String"), id_, text, x, y, w, h));
533 words.insert(it, nw);
535 std::vector<std::shared_ptr<LineElement> >::iterator lit = std::find(lineElements.begin(), lineElements.end(), sw);
536 if (lit == lineElements.end())
537 lineElements.push_back(nw);
539 lineElements.insert(lit, nw);
551 void Alto::Layout::Page::Space::TextBlock::TextLine::RemoveWord(
const Id &wid)
553 for (std::vector<WordPtr>::iterator it = words.begin(); it != words.end(); ++it)
555 const std::shared_ptr<Word> sw(it->lock());
558 if (sw->GetId().Get() == wid)
561 std::vector<std::shared_ptr<LineElement> >::iterator lit = std::find(lineElements.begin(), lineElements.end(), sw);
564 if (lit != lineElements.end())
565 lineElements.erase(lit);
581 Alto::Layout::Page::Space::TextBlock::TextLine::LineElement::LineElement(
const Element &el):
601 SetAttribute(
"HPOS", *x);
603 SetAttribute(
"VPOS", *y);
605 SetAttribute(
"WIDTH", *w);
609 Option<double> Alto::Layout::Page::Space::TextBlock::TextLine::LineElement::GetWidth()
const
612 try { width = GetAttribute<double>(
"WIDTH",
false); }
catch (...) { }
619 void Alto::Layout::Page::Space::TextBlock::TextLine::LineElement::SetWidth(
double d)
621 SetAttribute(
"WIDTH", d);
625 Option<double> Alto::Layout::Page::Space::TextBlock::TextLine::LineElement::GetHPos()
const
628 try { hpos = GetAttribute<double>(
"HPOS",
false); }
catch (...) { }
635 void Alto::Layout::Page::Space::TextBlock::TextLine::LineElement::SetHPos(
double d)
637 SetAttribute(
"HPOS", d);
641 Option<double> Alto::Layout::Page::Space::TextBlock::TextLine::LineElement::GetVPos()
const
644 try { vpos = GetAttribute<double>(
"VPOS",
false); }
catch (...) { }
651 void Alto::Layout::Page::Space::TextBlock::TextLine::LineElement::SetVPos(
double d)
653 SetAttribute(
"VPOS", d);
665 Alto::Layout::Page::Space::TextBlock::TextLine::Word::Word(
const Element &el):
668 StringUTF8 str = GetAttribute<StringUTF8>(
"ID");
671 GetAttribute<StringUTF8>(
"CONTENT",
false);
691 LineElement(el, x, y, w),
694 SetAttribute(
"ID", *
id);
695 SetAttribute(
"CONTENT", text);
697 SetAttribute(
"HEIGHT", *h);
701 std::vector<Id> Alto::Layout::Page::Space::TextBlock::TextLine::Word::GetStyles()
const
709 void Alto::Layout::Page::Space::TextBlock::TextLine::Word::AddStyle(
const Id &styleid)
717 void Alto::Layout::Page::Space::TextBlock::TextLine::Word::RemoveStyle(
const Id &styleid)
723 StringUTF8 Alto::Layout::Page::Space::TextBlock::TextLine::Word::GetContent()
const
725 return GetAttribute<StringUTF8>(
"CONTENT",
false);
731 void Alto::Layout::Page::Space::TextBlock::TextLine::Word::SetContent(
const StringUTF8 &s)
733 SetAttribute(
"CONTENT", s);
737 Option<double> Alto::Layout::Page::Space::TextBlock::TextLine::Word::GetHeight()
const
740 try { height = GetAttribute<double>(
"HEIGHT",
false); }
catch (...) { }
747 void Alto::Layout::Page::Space::TextBlock::TextLine::Word::SetHeight(
double d)
749 SetAttribute(
"HEIGHT", d);
757 StringUTF8 str = GetAttribute<StringUTF8>(
"STYLE");
760 Alto::Styles::Text::FontStyle val = Alto::Styles::Text::FontStyle::Undef;
761 if (str.
Find(
"bold") != StringUTF8::NPos())
762 val |= Alto::Styles::Text::FontStyle::Bold;
763 if (str.
Find(
"italics") != StringUTF8::NPos())
764 val |= Alto::Styles::Text::FontStyle::Italics;
765 if (str.
Find(
"subscript") != StringUTF8::NPos())
766 val |= Alto::Styles::Text::FontStyle::Subscript;
767 if (str.
Find(
"superscript") != StringUTF8::NPos())
768 val |= Alto::Styles::Text::FontStyle::Superscript;
769 if (str.
Find(
"smallcaps") != StringUTF8::NPos())
770 val |= Alto::Styles::Text::FontStyle::SmallCaps;
771 if (str.
Find(
"underline") != StringUTF8::NPos())
772 val |= Alto::Styles::Text::FontStyle::Underline;
773 if (val != Alto::Styles::Text::FontStyle::Undef)
782 void Alto::Layout::Page::Space::TextBlock::TextLine::Word::SetFontStyle(Alto::Styles::Text::FontStyle fs)
785 if (!!(fs & Alto::Styles::Text::FontStyle::Bold))
787 if (!!(fs & Alto::Styles::Text::FontStyle::Italics))
789 if (!!(fs & Alto::Styles::Text::FontStyle::Subscript))
790 attr +=
"subscript ";
791 if (!!(fs & Alto::Styles::Text::FontStyle::Superscript))
792 attr +=
"superscript ";
793 if (!!(fs & Alto::Styles::Text::FontStyle::SmallCaps))
794 attr +=
"smallcaps ";
795 if (!!(fs & Alto::Styles::Text::FontStyle::Underline))
796 attr +=
"underline ";
797 SetAttribute(
"STYLE", attr);
802 void Alto::Layout::Page::Space::TextBlock::TextLine::Word::UnsetFontStyle()
804 RemoveAttribute(
"STYLE");
811 StringUTF8 str = GetAttribute<StringUTF8>(
"SUBS_TYPE");
814 if (str ==
"HypPart1")
815 substitutionType = SubstitutionType::HypPart1;
816 else if (str ==
"HypPart2")
817 substitutionType = SubstitutionType::HypPart2;
818 else if (str ==
"Abbreviation")
819 substitutionType = SubstitutionType::Abbreviation;
821 return substitutionType;
825 Option<StringUTF8> Alto::Layout::Page::Space::TextBlock::TextLine::Word::GetSubstitutionContent()
const
828 StringUTF8 str = GetAttribute<StringUTF8>(
"SUBS_CONTENT");
830 substitutionContent = str;
831 return substitutionContent;
839 void Alto::Layout::Page::Space::TextBlock::TextLine::Word::SetSubstitution(SubstitutionType stype,
const StringUTF8 &scontent)
844 case SubstitutionType::HypPart1:
847 case SubstitutionType::HypPart2:
850 case SubstitutionType::Abbreviation:
851 atyp =
"Abbreviation";
856 SetAttribute(
"SUBS_TYPE", atyp);
857 SetAttribute(
"SUBS_CONTENT", scontent);
861 Option<double> Alto::Layout::Page::Space::TextBlock::TextLine::Word::GetWC()
const
864 try { wordConfidence = GetAttribute<double>(
"WC",
false); }
catch (...) { }
865 return wordConfidence;
872 void Alto::Layout::Page::Space::TextBlock::TextLine::Word::SetWC(
double conf)
874 if ((conf < 0.0) || (conf > 1.0))
876 SetAttribute(
"WC", conf);
882 void Alto::Layout::Page::Space::TextBlock::TextLine::Word::UnsetWC()
884 RemoveAttribute(
"WC");
889 Option<StringUTF8> Alto::Layout::Page::Space::TextBlock::TextLine::Word::GetCC()
const
892 StringUTF8 str = GetAttribute<StringUTF8>(
"CC");
894 charactersConfidence = str;
895 return charactersConfidence;
907 Alto::Layout::Page::Space::TextBlock::TextLine::WhiteSpace::WhiteSpace(
const Element &el):
910 StringUTF8 str = GetAttribute<StringUTF8>(
"ID");
924 Alto::Layout::Page::Space::TextBlock::TextLine::Hyphen::Hyphen(
const Element &el):
927 GetAttribute<StringUTF8>(
"CONTENT",
false);
931 StringUTF8 Alto::Layout::Page::Space::TextBlock::TextLine::Hyphen::GetContent()
const
933 return GetAttribute<StringUTF8>(
"CONTENT",
false);
939 void Alto::Layout::Page::Space::TextBlock::TextLine::Hyphen::SetContent(
const StringUTF8 &s)
941 SetAttribute(
"CONTENT", s);
StringUTF8 GetName() const
Gets the label of the element.
std::vector< Id > GetStyleRefs(const Element &el)
Gets the list of style references.
bool IsNotEmpty() const noexcept
Checks if the string is not empty.
Element GetNextSiblingElement(const StringUTF8 &name="")
Gets the next sibling element.
A UTF32 character string class.
void AddStyleRef(Element &el, const Id &id)
Adds a style reference to an element.
void RemoveStyleRef(Element &el, const Id &id)
Removes a style reference to an element.
A character string class.
size_t Find(const StringUTF8 &s, size_t from_pos=0) const
Finds the first occurrence of a string.
A class to store an optional value.
Invalid argument error (e.g.: nullptr pointer)
An item was not found in a container.