37 Document(fname, char_conversion_throws)
39 root.reset(
new Root(
GetRoot()));
51 Document(encoding, version, char_conversion_throws)
65 std::vector<Layout::PagePtr> pages(
GetLayout().GetPages());
68 const std::shared_ptr<Layout::Page> sp(p.lock());
69 if (sp->GetId() == id)
83 std::vector<Layout::PagePtr> pages(
GetLayout().GetPages());
86 const std::shared_ptr<const Layout::Page> sp(p.lock());
87 if (sp->GetId() == id)
101 std::vector<Layout::PagePtr> pages(
GetLayout().GetPages());
104 std::vector<Layout::Page::SpacePtr> spaces(p.lock()->GetSpaces());
107 const std::shared_ptr<Layout::Page::Space> ss(s.lock());
109 if (ss->GetId().Get() == id)
124 std::vector<Layout::PagePtr> pages(
GetLayout().GetPages());
127 std::vector<Layout::Page::SpacePtr> spaces(p.lock()->GetSpaces());
130 const std::shared_ptr<const Layout::Page::Space> ss(s.lock());
132 if (ss->GetId().Get() == id)
147 std::vector<Layout::PagePtr> pages(
GetLayout().GetPages());
150 std::vector<Layout::Page::SpacePtr> spaces(p.lock()->GetSpaces());
153 std::vector<Layout::Page::Space::BlockPtr> blocks(s.lock()->GetBlocks());
154 for (
const Layout::Page::Space::BlockPtr &b : blocks)
156 const std::shared_ptr<Layout::Page::Space::Block> sb(b.lock());
157 if (sb->GetId() == id)
173 std::vector<Layout::PagePtr> pages(
GetLayout().GetPages());
176 std::vector<Layout::Page::SpacePtr> spaces(p.lock()->GetSpaces());
179 std::vector<Layout::Page::Space::BlockPtr> blocks(s.lock()->GetBlocks());
180 for (
const Layout::Page::Space::BlockPtr &b : blocks)
182 const std::shared_ptr<const Layout::Page::Space::Block> sb(b.lock());
183 if (sb->GetId() == id)
199 std::vector<Layout::PagePtr> pages(
GetLayout().GetPages());
202 std::vector<Layout::Page::SpacePtr> spaces(p.lock()->GetSpaces());
205 std::vector<Layout::Page::Space::BlockPtr> blocks(s.lock()->GetBlocks());
206 for (
const Layout::Page::Space::BlockPtr &b : blocks)
208 const std::shared_ptr<Layout::Page::Space::TextBlock> tb(std::dynamic_pointer_cast<Layout::Page::Space::TextBlock>(b.lock()));
211 if (tb->GetId() == id)
228 std::vector<Layout::PagePtr> pages(
GetLayout().GetPages());
231 std::vector<Layout::Page::SpacePtr> spaces(p.lock()->GetSpaces());
234 std::vector<Layout::Page::Space::BlockPtr> blocks(s.lock()->GetBlocks());
235 for (
const Layout::Page::Space::BlockPtr &b : blocks)
237 const std::shared_ptr<const Layout::Page::Space::TextBlock> tb(std::dynamic_pointer_cast<Layout::Page::Space::TextBlock>(b.lock()));
240 if (tb->GetId() == id)
257 std::vector<Layout::PagePtr> pages(
GetLayout().GetPages());
260 std::vector<Layout::Page::SpacePtr> spaces(p.lock()->GetSpaces());
263 std::vector<Layout::Page::Space::BlockPtr> blocks(s.lock()->GetBlocks());
264 for (
const Layout::Page::Space::BlockPtr &b : blocks)
266 const std::shared_ptr<Layout::Page::Space::TextBlock> tb(std::dynamic_pointer_cast<Layout::Page::Space::TextBlock>(b.lock()));
269 std::vector<Layout::Page::Space::TextBlock::TextLinePtr> lines(tb->GetTextLines());
270 for (
const Layout::Page::Space::TextBlock::TextLinePtr &l : lines)
272 const std::shared_ptr<Layout::Page::Space::TextBlock::TextLine> sl(l.lock());
273 if (sl->GetId() == id)
291 std::vector<Layout::PagePtr> pages(
GetLayout().GetPages());
294 std::vector<Layout::Page::SpacePtr> spaces(p.lock()->GetSpaces());
297 std::vector<Layout::Page::Space::BlockPtr> blocks(s.lock()->GetBlocks());
298 for (
const Layout::Page::Space::BlockPtr &b : blocks)
300 const std::shared_ptr<const Layout::Page::Space::TextBlock> tb(std::dynamic_pointer_cast<Layout::Page::Space::TextBlock>(b.lock()));
303 std::vector<Layout::Page::Space::TextBlock::TextLinePtr> lines(tb->GetTextLines());
304 for (
const Layout::Page::Space::TextBlock::TextLinePtr &l : lines)
306 const std::shared_ptr<const Layout::Page::Space::TextBlock::TextLine> sl(l.lock());
307 if (sl->GetId() == id)
325 std::vector<Layout::PagePtr> pages(
GetLayout().GetPages());
328 std::vector<Layout::Page::SpacePtr> spaces(p.lock()->GetSpaces());
331 std::vector<Layout::Page::Space::BlockPtr> blocks(s.lock()->GetBlocks());
332 for (
const Layout::Page::Space::BlockPtr &b : blocks)
334 const std::shared_ptr<Layout::Page::Space::TextBlock> tb(std::dynamic_pointer_cast<Layout::Page::Space::TextBlock>(b.lock()));
337 std::vector<Layout::Page::Space::TextBlock::TextLinePtr> lines(tb->GetTextLines());
338 for (
const Layout::Page::Space::TextBlock::TextLinePtr &l : lines)
340 std::vector<Layout::Page::Space::TextBlock::TextLine::LineElementPtr> lels(l.lock()->GetLineElements());
341 for (
const Layout::Page::Space::TextBlock::TextLine::LineElementPtr lel : lels)
343 const std::shared_ptr<Layout::Page::Space::TextBlock::TextLine::Word> w(std::dynamic_pointer_cast<Layout::Page::Space::TextBlock::TextLine::Word>(lel.lock()));
347 if (w->GetId().Get() == id)
365 const Alto::Layout::Page::Space::TextBlock::TextLine::Word&
Alto::GetWord(
const Id &
id)
const
367 std::vector<Layout::PagePtr> pages(
GetLayout().GetPages());
370 std::vector<Layout::Page::SpacePtr> spaces(p.lock()->GetSpaces());
373 std::vector<Layout::Page::Space::BlockPtr> blocks(s.lock()->GetBlocks());
374 for (
const Layout::Page::Space::BlockPtr &b : blocks)
376 const std::shared_ptr<const Layout::Page::Space::TextBlock> tb(std::dynamic_pointer_cast<Layout::Page::Space::TextBlock>(b.lock()));
379 std::vector<Layout::Page::Space::TextBlock::TextLinePtr> lines(tb->GetTextLines());
380 for (
const Layout::Page::Space::TextBlock::TextLinePtr &l : lines)
382 std::vector<Layout::Page::Space::TextBlock::TextLine::LineElementPtr> lels(l.lock()->GetLineElements());
383 for (
const Layout::Page::Space::TextBlock::TextLine::LineElementPtr lel : lels)
385 const std::shared_ptr<const Layout::Page::Space::TextBlock::TextLine::Word> w(std::dynamic_pointer_cast<const Layout::Page::Space::TextBlock::TextLine::Word>(lel.lock()));
389 if (w->GetId().Get() == id)
409 std::vector<Layout::PagePtr> pages(
GetLayout().GetPages());
412 const std::shared_ptr<Layout::Page> sp(p.lock());
413 if (sp->GetId() == id)
415 std::vector<Layout::Page::SpacePtr> spaces(sp->GetSpaces());
418 const std::shared_ptr<Layout::Page::Space> ss(s.lock());
420 if (ss->GetId().Get() == id)
422 std::vector<Layout::Page::Space::BlockPtr> blocks(ss->GetBlocks());
423 for (Layout::Page::Space::BlockPtr &b : blocks)
425 const std::shared_ptr<Layout::Page::Space::Block> sb(b.lock());
426 if (sb->GetId() == id)
428 const std::shared_ptr<Layout::Page::Space::TextBlock> tb(std::dynamic_pointer_cast<Layout::Page::Space::TextBlock>(sb));
431 std::vector<Layout::Page::Space::TextBlock::TextLinePtr> lines(tb->GetTextLines());
432 for (
const Layout::Page::Space::TextBlock::TextLinePtr &l : lines)
434 const std::shared_ptr<Layout::Page::Space::TextBlock::TextLine> sl(l.lock());
435 if (sl->GetId() == id)
437 std::vector<Layout::Page::Space::TextBlock::TextLine::LineElementPtr> lels(sl->GetLineElements());
438 for (
const Layout::Page::Space::TextBlock::TextLine::LineElementPtr lel : lels)
440 const std::shared_ptr<Layout::Page::Space::TextBlock::TextLine::Word> w(std::dynamic_pointer_cast<Layout::Page::Space::TextBlock::TextLine::Word>(lel.lock()));
444 if (w->GetId().Get() == id)
449 const std::shared_ptr<Layout::Page::Space::TextBlock::TextLine::WhiteSpace> s(std::dynamic_pointer_cast<Layout::Page::Space::TextBlock::TextLine::WhiteSpace>(lel.lock()));
453 if (s->GetId().Get() == id)
474 std::vector<Layout::PagePtr> pages(
GetLayout().GetPages());
477 const std::shared_ptr<const Layout::Page> sp(p);
478 if (sp->GetId() == id)
480 std::vector<Layout::Page::SpacePtr> spaces(sp->GetSpaces());
483 const std::shared_ptr<const Layout::Page::Space> ss(s.lock());
485 if (ss->GetId().Get() == id)
487 std::vector<Layout::Page::Space::BlockPtr> blocks(ss->GetBlocks());
488 for (
const Layout::Page::Space::BlockPtr &b : blocks)
490 const std::shared_ptr<const Layout::Page::Space::Block> sb(b.lock());
491 if (sb->GetId() == id)
493 const std::shared_ptr<const Layout::Page::Space::TextBlock> tb(std::dynamic_pointer_cast<const Layout::Page::Space::TextBlock>(sb));
496 std::vector<Layout::Page::Space::TextBlock::TextLinePtr> lines(tb->GetTextLines());
497 for (
const Layout::Page::Space::TextBlock::TextLinePtr &l : lines)
499 const std::shared_ptr<const Layout::Page::Space::TextBlock::TextLine> sl(l.lock());
500 if (sl->GetId() == id)
502 std::vector<Layout::Page::Space::TextBlock::TextLine::LineElementPtr> lels(sl->GetLineElements());
503 for (
const Layout::Page::Space::TextBlock::TextLine::LineElementPtr lel : lels)
505 const std::shared_ptr<const Layout::Page::Space::TextBlock::TextLine::Word> w(std::dynamic_pointer_cast<Layout::Page::Space::TextBlock::TextLine::Word>(lel.lock()));
509 if (w->GetId().Get() == id)
514 const std::shared_ptr<const Layout::Page::Space::TextBlock::TextLine::WhiteSpace> s(std::dynamic_pointer_cast<Layout::Page::Space::TextBlock::TextLine::WhiteSpace>(lel.lock()));
518 if (s->GetId().Get() == id)
532 void Alto::register_ids()
534 std::vector<Id> lst(
GetStyles().GetTextStyles());
535 ids.insert(lst.begin(), lst.end());
537 ids.insert(lst.begin(), lst.end());
538 std::vector<Layout::PagePtr> pages(
GetLayout().GetPages());
541 const std::shared_ptr<const Layout::Page> sp(p.lock());
542 ids.insert(sp->GetId());
543 std::vector<Layout::Page::SpacePtr> spaces(sp->GetSpaces());
546 const std::shared_ptr<const Layout::Page::Space> ss(s.lock());
548 ids.insert(ss->GetId().Get());
549 std::vector<Layout::Page::Space::BlockPtr> blocks(ss->GetBlocks());
550 for (
const Layout::Page::Space::BlockPtr &b : blocks)
552 const std::shared_ptr<const Layout::Page::Space::Block> sb(b.lock());
553 ids.insert(sb->GetId());
554 const std::shared_ptr<const Layout::Page::Space::TextBlock> tb(std::dynamic_pointer_cast<const Layout::Page::Space::TextBlock>(sb));
557 std::vector<Layout::Page::Space::TextBlock::TextLinePtr> lines(tb->GetTextLines());
558 for (
const Layout::Page::Space::TextBlock::TextLinePtr &l : lines)
560 const std::shared_ptr<const Layout::Page::Space::TextBlock::TextLine> sl(l.lock());
561 ids.insert(sl->GetId());
562 std::vector<Layout::Page::Space::TextBlock::TextLine::LineElementPtr> lels(sl->GetLineElements());
563 for (
const Layout::Page::Space::TextBlock::TextLine::LineElementPtr lel : lels)
565 const std::shared_ptr<const Layout::Page::Space::TextBlock::TextLine::Word> w(std::dynamic_pointer_cast<Layout::Page::Space::TextBlock::TextLine::Word>(lel.lock()));
569 ids.insert(w->GetId().Get());
573 const std::shared_ptr<const Layout::Page::Space::TextBlock::TextLine::WhiteSpace> s(std::dynamic_pointer_cast<Layout::Page::Space::TextBlock::TextLine::WhiteSpace>(lel.lock()));
577 ids.insert(s->GetId().Get());
598 if (ids.find(
id) == ids.end())
613 return ids.find(
id) == ids.end();
645 Alto::Root::Root(
const Element &el):
653 if (elname ==
"Description")
657 else if (elname ==
"Styles")
659 styles.reset(
new Styles(cel));
661 else if (elname ==
"Layout")
663 layout.reset(
new Layout(cel));
668 init(
"file:///dev/null");
682 SetAttribute(
"xmlns:xsi",
"http://www.w3.org/2001/XMLSchema-instance");
683 SetAttribute(
"xmlns", ns);
686 nsloc +=
"http://www.loc.gov/standards/alto/alto-v2.0.xsd";
687 SetAttribute(
"xsi:schemaLocation", nsloc);
688 SetAttribute(
"xmlns:xlink",
"http://www.w3.org/TR/xlink");
695 void Alto::Root::init(
const StringUTF8 &imgname)
698 description.reset(
new Description(PushBackElement(
"Description"), imgname));
700 styles.reset(
new Styles(PushBackElement(
"Styles")));
702 layout.reset(
new Layout(PushBackElement(
"Layout")));
Id CreateId()
Creates a new id for the document.
Layout::Page::Space::TextBlock::TextLine & GetTextLine(const Id &id)
List of styles used in the document.
Styles & GetStyles()
Gets the styles description part of the Alto (may be null)
Comment PushBackComment(const StringUTF8 &text)
Adds a comment at the end of the children list.
static crn::StringUTF8 CreateUniqueId(size_t len=8)
Generates an almost unique id.
Element EndElement()
Gets a null node.
Element GetRoot()
Gets the first element.
Element PushBackElement(const StringUTF8 &name)
Adds an element at the end of the children list.
std::vector< Id > GetParagraphStyles() const
Returns the ids of the paragraph styles.
Layout::Page::Space::Block & GetBlock(const Id &id)
bool IsEmpty() const noexcept
Checks if the string is empty.
bool CheckId(const Id &id) const
Checks if an id already exists in the document.
A convenience class for file paths.
Id AddId(Element &el)
Adds an id to an element.
std::weak_ptr< Space > SpacePtr
Element BeginElement()
Gets the first child element.
void SetAttribute(const StringUTF8 &name, const StringUTF8 &value)
Sets the value of an attribute.
Layout & GetLayout()
Gets the layout description part of the Alto.
Layout::Page & GetPage(const Id &id)
T GetAttribute(const StringUTF8 &name, bool silent=true) const
Gets an attribute.
Element & GetElement(const Id &id)
Layout::Page::Space & GetSpace(const Id &id)
std::weak_ptr< Page > PagePtr
A character string class.
Alto(const Path &fname, bool char_conversion_throws=true)
Constructor from a file.
Layout::Page::Space::TextBlock & GetTextBlock(const Id &id)
Invalid argument error (e.g.: nullptr pointer)
Layout::Page::Space::TextBlock::TextLine::Word & GetWord(const Id &id)
An item was not found in a container.