22 #ifndef CRNAltoWrapper_HEADER
23 #define CRNAltoWrapper_HEADER
80 {
if (PagePath::operator<(other))
return true;
105 {
if (SpacePath::operator<(other))
return true;
130 {
if (BlockPath::operator<(other))
return true;
155 {
if (TextLinePath::operator<(other))
return true;
182 static std::unique_ptr<AltoWrapper>
NewFromDocument(
const crn::SDocument &document,
bool create_altos =
false,
bool throw_exceptions =
true);
184 static std::unique_ptr<AltoWrapper>
NewFromDocument(
const crn::Path &documentname,
bool create_altos =
false,
bool throw_exceptions =
true);
191 template<
typename Iter>
static std::unique_ptr<AltoWrapper>
NewFromImages(Iter begin_, Iter end_,
const crn::Path &documentname,
bool throw_exceptions =
true)
193 auto doc = std::make_shared<crn::Document>();
194 for (Iter it = begin_; it != end_; ++it)
196 doc->Save(documentname);
206 template<
typename Iter>
static std::unique_ptr<AltoWrapper>
NewFromList(Iter begin_, Iter end_,
const crn::Path &documentname,
crn::Progress *prog =
nullptr,
bool throw_exceptions =
true)
208 std::vector<std::pair<crn::Path, crn::Path>> imgxml;
209 for (Iter it = begin_; it != end_; ++it)
210 imgxml.push_back(std::make_pair(it->first, it->second));
211 return newFromList(imgxml, documentname, prog, throw_exceptions);
233 ~ViewLock() {
if (block) block->Save();
if (alto) alto->Save(); }
243 ViewLock(
const SBlock &b,
const SAlto &a):block(b),alto(a) { }
257 Word(
const SBlock &b, Alto::Layout::Page::Space::TextBlock::TextLine::Word &w,
const std::shared_ptr<ViewLock> &l,
const TextLinePath &t):block(b),word(&w),lock(l),path(t, w.
GetId().Get()) { }
267 SCAlto
GetAlto()
const {
return lock->GetAlto(); }
269 Alto::Layout::Page::Space::TextBlock::TextLine::Word&
GetElement() {
return *word; }
271 const Alto::Layout::Page::Space::TextBlock::TextLine::Word&
GetElement()
const {
return *word; }
277 const Id&
GetId()
const {
return word->GetId().Get(); }
279 std::vector<Id>
GetStyles()
const {
return word->GetStyles(); }
285 void AddStyle(
const Id &styleid) { word->AddStyle(styleid); }
299 void SetFontStyle(Alto::Styles::Text::FontStyle fs) { word->SetFontStyle(fs); }
316 void SetWC(
double conf) { word->SetWC(conf); }
320 Alto::Layout::Page::Space::TextBlock::TextLine::Word *word;
321 std::shared_ptr<ViewLock> lock;
334 TextLine(
const SBlock &b, Alto::Layout::Page::Space::TextBlock::TextLine &t,
const std::shared_ptr<ViewLock> &l,
const BlockPath &bp):block(b),textline(&t),lock(l),path(bp, t.
GetId()) { }
344 SCAlto
GetAlto()
const {
return lock->GetAlto(); }
346 Alto::Layout::Page::Space::TextBlock::TextLine&
GetElement() {
return *textline; }
348 const Alto::Layout::Page::Space::TextBlock::TextLine&
GetElement()
const {
return *textline; }
354 const Id&
GetId()
const {
return textline->GetId(); }
356 std::vector<Id>
GetStyles()
const {
return textline->GetStyles(); }
362 void AddStyle(
const Id &styleid) { textline->AddStyle(styleid); }
372 void SetBaseline(
double d,
bool check_bounds =
true) { textline->SetBaseline(d, check_bounds); }
403 Alto::Layout::Page::Space::TextBlock::TextLine *textline;
404 std::shared_ptr<ViewLock> lock;
417 TextBlock(
const SBlock &b, Alto::Layout::Page::Space::TextBlock &t,
const std::shared_ptr<ViewLock> &l,
const SpacePath &s):block(b),textblock(&t),lock(l),path(s, t.
GetId()) { }
427 SCAlto
GetAlto()
const {
return lock->GetAlto(); }
429 Alto::Layout::Page::Space::TextBlock&
GetElement() {
return *textblock; }
431 const Alto::Layout::Page::Space::TextBlock&
GetElement()
const {
return *textblock; }
437 const Id&
GetId()
const {
return textblock->GetId(); }
439 std::vector<Id>
GetStyles()
const {
return textblock->GetStyles(); }
445 void AddStyle(
const Id &styleid) { textblock->AddStyle(styleid); }
478 Alto::Layout::Page::Space::TextBlock *textblock;
479 std::shared_ptr<ViewLock> lock;
492 Space(
const SBlock &b, Alto::Layout::Page::Space &s,
const std::shared_ptr<ViewLock> &l,
const PagePath &p):block(b),space(&s),lock(l),path(p, s.
GetId().Get()) { }
502 SCAlto
GetAlto()
const {
return lock->GetAlto(); }
506 const Alto::Layout::Page::Space&
GetElement()
const {
return *space; }
512 const Id&
GetId()
const {
return space->GetId().Get(); }
514 std::vector<Id>
GetStyles()
const {
return space->GetStyles(); }
520 void AddStyle(
const Id &styleid) { space->AddStyle(styleid); }
549 Alto::Layout::Page::Space *space;
550 std::shared_ptr<ViewLock> lock;
563 Page(
const SBlock &b, Alto::Layout::Page &p,
const std::shared_ptr<ViewLock> &l,
const String &vid):block(b),page(&p),lock(l),path(vid, p.
GetId()) { }
573 SCAlto
GetAlto()
const {
return lock->GetAlto(); }
577 const Alto::Layout::Page&
GetElement()
const {
return *page; }
583 const Id&
GetId()
const {
return page->GetId(); }
589 std::vector<Id>
GetStyles()
const {
return page->GetStyles(); }
595 void AddStyle(
const Id &styleid) { page->AddStyle(styleid); }
613 void SetQuality(AltoPage::Quality q) { page->SetQuality(q); }
669 Alto::Layout::Page *page;
670 std::shared_ptr<ViewLock> lock;
684 View(
const std::shared_ptr<ViewLock> &l,
const String &view_id):lock(l), id(view_id) { }
696 SCBlock
GetBlock()
const {
return lock->GetBlock(); }
700 SCAlto
GetAlto()
const {
return lock->GetAlto(); }
703 Alto::Description&
GetDescription() {
return lock->GetAlto()->GetDescription(); }
706 Alto::Styles&
GetStyles() {
return lock->GetAlto()->GetStyles(); }
731 std::shared_ptr<ViewLock> lock;
735 std::vector<String>
GetViewIds()
const {
return doc->GetViewIds(); }
764 static std::unique_ptr<AltoWrapper> newFromList(
const std::vector<std::pair<crn::Path, crn::Path> > &filelist,
const crn::Path &documentname,
crn::Progress *prog,
bool throw_exceptions);
770 std::shared_ptr<ViewLock> getLock(
const String &view_id)
const;
773 mutable std::map<String, std::weak_ptr<ViewLock> > viewLocks;
ViewLock & operator=(const ViewLock &)=delete
SBlock GetBlock()
Returns the image.
static const String & GraphicalElementKey()
const Rect & GetBBox() const
Gets the coordinates of the text block.
TextBlock AddTextBlockAfter(const Id &pred, const crn::Rect &bbox)
Adds a text block to the space.
void SetWC(double conf)
Sets the OCR confidence of the word [0, 1].
bool operator<(const SpacePath &other) const
SpacePath(const PagePath &p, const Id &s)
SAlto GetAlto()
Gets the toplevel alto file.
void RemovePage(const Id &pageId)
Removes a page.
TextLine GetTextLine(const TextLinePath &p)
Gets a TextLine by path.
const Alto::Layout::Page::Space & GetElement() const
Gets the alto element.
Space AddPrintSpace(const crn::Rect &bbox)
Adds the print space.
const Id & GetId() const
Returns the id of the element.
Alto::Description & GetDescription()
Gets the global description part of the Alto (may be null)
AltoWrapper(const AltoWrapper &)=delete
bool operator!=(const WordPath &other) const
const String & GetId() const
Returns the id of the view.
std::vector< String > GetViewIds() const
Gets the list of the view ids of the document.
Page AddPageAfter(const Id &pred, int image_number, int w, int h, Option< AltoPage::Position > pos=Option< AltoPage::Position >())
Adds a page.
virtual ~SpacePath() override
const Rect & GetBBox() const
Gets the coordinates of the word.
Word & operator=(const Word &)=default
void ResizeTextLine(const TextLinePath &p, const crn::Rect &r, bool erase_oob)
Changes the size of a line and all its parents if needed.
std::vector< Alto::Styles::Paragraph > GetParagraphStyles()
Returns the paragraph styles.
bool operator==(const WordPath &other) const
TextBlock AddTextBlockBefore(const Id &next, const crn::Rect &bbox)
Adds a text block to the space.
const Id & GetId() const
Returns the id of the element.
const Id & GetId() const
Returns the id of the element.
AltoWrapper & operator=(const AltoWrapper &)=delete
static std::unique_ptr< AltoWrapper > NewFromDir(const crn::Path &directory, const crn::Path &documentname, const crn::Path &imagedirectory="", crn::Progress *prog=nullptr, bool throw_exceptions=true)
Creates a wrapper from a directory containing Altos.
Option< StringUTF8 > GetQualityDetail() const
Returns details on the quality of the original page.
std::vector< Alto::Styles::Text > GetTextStyles()
Returns the text styles.
Base class for a progress display.
SCAlto GetAlto() const
Returns the alto.
Page GetPage(const PagePath &p)
Gets a Page by path.
TextLine AddTextLineBefore(const Id &next, const crn::Rect &bbox)
Adds a text line to the block.
void ResizeTextLine(const TextLinePath &p, const crn::Rect &r, bool erase_oob)
Changes the size of a line and all its parents if needed.
std::vector< Id > GetStyles() const
Returns the list of style references.
SpacePath & operator=(const SpacePath &)=default
bool operator<(const BlockPath &other) const
TextLinePath & operator=(const TextLinePath &)=default
const Alto::Layout::Page & GetElement() const
Gets the alto element.
StringUTF8 GetContent() const
Returns the transcription of the word.
bool operator!=(const SpacePath &other) const
SCAlto GetAlto() const
Returns the alto.
Space AddBottomMargin(const crn::Rect &bbox)
Adds a bottom margin.
Option< StringUTF8 > GetPrintedImageNumber() const
Returns the page number that is printed on the document.
std::vector< Alto::Styles::Paragraph > GetParagraphStyles()
Returns the paragraph styles.
Alto::Layout::Page::Space::TextBlock::TextLine & GetElement()
Gets the alto element.
Page GetPage(const Id &pageId)
Gets a page.
Option< double > GetWC() const
Returns the OCR confidence of the word [0, 1].
ViewLock(const ViewLock &)=delete
Space GetRightMargin()
Gets the space proxy on the right margin.
const BlockPath & GetPath() const
Returns the path to the text block.
void RemoveStyle(const Id &styleid)
Adds a reference to a style.
Word AddWordAfter(const Id &pred, const StringUTF8 &text, const crn::Rect &bbox)
Adds a word to the line.
Word AddWordBefore(const Id &next, const StringUTF8 &text, const crn::Rect &bbox)
Adds a word to the line.
size_t GetNbTextLines() const
Returns the number of text lines in the page space.
void SetBBox(const crn::Rect &r, bool erase_oob)
Sets the coordinates of the space.
void ResizeSpace(const SpacePath &p, const crn::Rect &r, bool erase_oob)
Changes the size of a space and all its parents if needed.
SAlto GetAlto()
Gets the toplevel alto file.
void AddStyle(const Id &styleid)
Adds a reference to a style.
std::vector< Id > GetWords() const
Returns the ids of the words in the line.
virtual String ToString() const override
Word(const SBlock &b, Alto::Layout::Page::Space::TextBlock::TextLine::Word &w, const std::shared_ptr< ViewLock > &l, const TextLinePath &t)
static const String & TextBlockKey()
static std::unique_ptr< AltoWrapper > NewFromList(Iter begin_, Iter end_, const crn::Path &documentname, crn::Progress *prog=nullptr, bool throw_exceptions=true)
Creates a wrapper from list of images and Alto paths (such as a map or any container<pair>) ...
std::vector< Alto::Styles::Paragraph > GetParagraphStyles()
Returns the paragraph styles.
void RemoveSpace(const Id &sid)
Removes a space.
Page AddPageBefore(const Id &next, int image_number, int w, int h, Option< AltoPage::Position > pos=Option< AltoPage::Position >())
Adds a page.
void AddStyle(const Id &styleid)
Adds a reference to a style.
static const String & ComposedBlockKey()
const Alto::Layout::Page::Space::TextBlock & GetElement() const
Gets the alto element.
WordPath & operator=(const WordPath &)=default
static const String & WordKey()
void RemoveWord(const Id &wid)
Removes a line element.
Alto::Layout::Page & GetElement()
Gets the alto element.
void RemoveStyle(const Id &styleid)
Adds a reference to a style.
Space GetLeftMargin()
Gets the space proxy on the left margin.
const WordPath & GetPath() const
Returns the path to the string.
SAlto GetAlto()
Gets the toplevel alto file.
SAlto GetAlto()
Gets the toplevel alto file.
const TextLinePath & GetPath() const
Returns the path to the text line.
SCDocument GetDocument() const
void Synchronize(bool reset=false)
Creates CRNBlocks and ids where there is none.
static const String & IllustrationKey()
WordPath(const TextLinePath &l, const Id &w)
void SetQualityDetail(const StringUTF8 &s)
Sets the details on the quality of the original page.
void SetContent(const StringUTF8 &s)
Sets the transcription of the word.
View GetView(const String &view_id)
Gets a view by id.
int GetPhysicalImageNumber() const
Returns the number of the page within the document.
void UnsetBaseline()
Unsets the baseline ordinate.
ViewLock & operator=(ViewLock &&v)
void SetPrintedImageNumber(const StringUTF8 &s)
Sets the page number that is printed on the document.
A UTF32 character string class.
SBlock GetBlock()
Returns the image.
const String AddView(const Path &imagename, const Path &altoname="")
Adds a view to the document.
void AddStyle(const Id &styleid)
Adds a reference to a style.
Option< AltoWord::SubstitutionType > GetSubstitutionType() const
Returns the substitution type of the word.
void ResizeWord(const WordPath &p, const crn::Rect &r)
Changes the size of a word and all its parents if needed.
TextBlock GetTextBlock(const Id &id)
Gets a text block proxy.
void SetQuality(AltoPage::Quality q)
Sets the quality of the original page.
Space GetBottomMargin()
Gets the space proxy on the bottom margin.
bool operator!=(const PagePath &other) const
Option< StringUTF8 > GetPageClass() const
Returns the class of the page (user defined class such as "title")
void RemoveStyle(const Id &styleid)
Adds a reference to a style.
void SetAccuracy(double acc)
Sets the estimated % of OCR accuracy on the page [0, 100].
TextLinePath(const BlockPath &b, const Id &l)
SBlock GetBlock()
Returns the image.
std::vector< Alto::Styles::Text > GetTextStyles()
Returns the text styles.
const PagePath & GetPath() const
Returns the path to the page.
std::vector< Id > GetStyles() const
Returns the list of style references.
TextBlock AddTextBlock(const crn::Rect &bbox)
Adds a text block to the space.
std::vector< Id > GetTextBlocks() const
Returns the ids of the text blocks in the page space.
Page(const SBlock &b, Alto::Layout::Page &p, const std::shared_ptr< ViewLock > &l, const String &vid)
SBlock GetBlock()
Returns the image.
static const crn::String & Separator()
Space GetSpace(const SpacePath &p)
Gets a Space by path.
bool operator!=(const TextLinePath &other) const
Word AddWord(const StringUTF8 &text, const crn::Rect &bbox)
Adds a word to the line.
static const TextLinePath & NullPath()
A convenience class for file paths.
Option< AltoPage::Quality > GetQuality() const
Returns the quality of the original page.
void ResizeWord(const WordPath &p, const crn::Rect &r)
Changes the size of a word and all its parents if needed.
std::vector< Alto::Styles::Text > GetTextStyles()
Returns the text styles.
virtual ~BlockPath() override
const Id & GetId() const
Returns the id of the element.
bool operator==(const SpacePath &other) const
std::vector< Alto::Styles::Paragraph > GetParagraphStyles()
Returns the paragraph styles.
void SetBBox(const crn::Rect &r, bool erase_oob)
Sets the coordinates of the text line.
Alto::Styles & GetStyles()
Gets the styles description part of the Alto (may be null)
virtual String ToString() const override
const Rect & GetBBox() const
Gets the coordinates of the page.
static const BlockPath & NullPath()
static const WordPath & NullPath()
Alto::Layout::Page::Space::TextBlock & GetElement()
Gets the alto element.
std::vector< Id > GetStyles() const
Returns the list of style references.
TextBlock(const SBlock &b, Alto::Layout::Page::Space::TextBlock &t, const std::shared_ptr< ViewLock > &l, const SpacePath &s)
Option< double > GetAccuracy() const
Returns the estimated % of OCR accuracy on the page [0, 100].
Option< bool > GetManuallyCorrected()
Returns whether the line was manually corrected or not.
SCAlto GetAlto() const
Returns the alto.
Alto::Layout::Page::Space::TextBlock::TextLine::Word & GetElement()
Gets the alto element.
virtual ~TextLinePath() override
const Rect & GetBBox() const
Gets the coordinates of the space.
void SetBBox(const crn::Rect &r, bool erase_oob)
Sets the coordinates of the page.
Word GetWord(const Id &id)
Gets a word proxy.
void ResizeTextBlock(const BlockPath &p, const crn::Rect &r, bool erase_oob)
Changes the size of a text block and all its parents if needed.
size_t GetNbWords() const
Returns the number of words in the line.
static const String & TextLineKey()
SCAlto GetAlto() const
Returns the alto.
Space GetTopMargin()
Gets the space proxy on the top margin.
static std::unique_ptr< AltoWrapper > NewFromDocument(const crn::SDocument &document, bool create_altos=false, bool throw_exceptions=true)
Creates a wrapper from a crn::Document that was created by Nimrod.
virtual String ToString() const override
std::vector< Id > GetTextLines() const
Returns the ids of the text lines in the page space.
Space AddRightMargin(const crn::Rect &bbox)
Adds a right margin.
Option< Id > GetProcessing() const
Returns the id of the processing applied to the page.
static std::unique_ptr< AltoWrapper > NewFromDirs(const crn::Path &image_directory, const crn::Path &xml_directory, const crn::Path &documentname, crn::Progress *prog=nullptr, bool throw_exceptions=true)
Creates a wrapper from two directories containing images and Altos with the same base names...
Option< double > GetPageConfidence() const
Returns the confidence of OCR on the page [0, 1].
View & operator=(const View &)=default
void SetBBox(const crn::Rect &r, bool erase_oob)
Sets the coordinates of the text block.
void AddStyle(const Id &styleid)
Adds a reference to a style.
Space(const SBlock &b, Alto::Layout::Page::Space &s, const std::shared_ptr< ViewLock > &l, const PagePath &p)
bool operator==(const BlockPath &other) const
static const String & PageKey()
std::vector< Alto::Styles::Text > GetTextStyles()
Returns the text styles.
TextLine GetTextLine(const Id &id)
Gets a text line proxy.
View(const std::shared_ptr< ViewLock > &l, const String &view_id)
Constructor.
void SetPageClass(const StringUTF8 &s)
Sets the class of the page (user defined class such as "title")
SCBlock GetBlock() const
Returns the image.
XML Alto file wrapper to crn::Document.
bool operator<(const PagePath &other) const
bool operator!=(const BlockPath &other) const
void AddStyle(const Id &styleid)
Adds a reference to a style.
void SetPhysicalImageNumber(int pnum)
Sets the number of the page within the document.
bool operator==(const TextLinePath &other) const
BlockPath(const SpacePath &s, const Id &b)
Option< StringUTF8 > GetSubstitutionContent()
Returns the substitution of the word.
SCAlto GetAlto() const
Returns the alto.
SCBlock GetBlock() const
Returns the image.
TextBlock & operator=(const TextBlock &)=default
Space AddTopMargin(const crn::Rect &bbox)
Adds a top margin.
size_t GetNbViews() const
Returns the number of views.
const SpacePath & GetPath() const
Returns the path to the space.
Option< StringUTF8 > GetLanguage() const
Returns the language of the text inside the block.
SCBlock GetBlock() const
Returns the image.
static const String & AltoPathKey()
Space AddLeftMargin(const crn::Rect &bbox)
Adds a left margin.
Word GetWord(const WordPath &p)
Gets a Word by path.
Space GetPrintSpace()
Gets the space proxy on the print space.
SBlock GetBlock()
Returns the image.
Page AddPage(int image_number, int w, int h, Option< AltoPage::Position > pos=Option< AltoPage::Position >())
Adds a page.
SAlto GetAlto()
Returns the alto.
TextLine AddTextLineAfter(const Id &pred, const crn::Rect &bbox)
Adds a text line to the block.
const Alto::Layout::Page::Space::TextBlock::TextLine & GetElement() const
Gets the alto element.
BlockPath & operator=(const BlockPath &)=default
TextLine(const SBlock &b, Alto::Layout::Page::Space::TextBlock::TextLine &t, const std::shared_ptr< ViewLock > &l, const BlockPath &bp)
std::vector< Alto::Styles::Paragraph > GetParagraphStyles()
Returns the paragraph styles.
void SetPageConfidence(double c)
Sets the confidence of OCR on the page [0, 1].
TextLine AddTextLine(const crn::Rect &bbox)
Adds a text line to the block.
PagePath & operator=(const PagePath &)=default
PagePath(const String &v, const Id &p)
SCBlock GetBlock() const
Returns the image.
SCAlto GetAlto() const
Returns the alto.
Page & operator=(const Page &)=default
void RemoveStyle(const Id &styleid)
Removes a reference to a style.
Option< AltoPage::Position > GetPosition() const
Returns the position of the page.
void ResizeTextBlock(const BlockPath &p, const crn::Rect &r, bool erase_oob)
Changes the size of a text block and all its parents if needed.
SAlto GetAlto()
Gets the toplevel alto file.
TextBlock GetTextBlock(const BlockPath &p)
Gets a TextBlock by path.
CRN_ALIAS_SMART_PTR(ImageBW)
A character string class.
void UnsetFontStyle()
Unsets the font style of the word.
const Id & GetId() const
Returns the id of the element.
SCBlock GetBlock() const
Returns the image.
void SetBBox(const crn::Rect &r)
Sets the coordinates of the word.
void RemoveBlock(const Id &bid)
Removes a block.
std::vector< Id > GetSpaces() const
Returns the ids of the spaces in the page.
A class to store an optional value.
virtual ~WordPath() override
static const String & SpaceKey()
static std::unique_ptr< AltoWrapper > NewFromImages(Iter begin_, Iter end_, const crn::Path &documentname, bool throw_exceptions=true)
Creates a wrapper from a list of images.
Internal class used to save modifications at the right time.
Option< double > GetBaseline() const
Returns the ordinate of the baseline.
Alto::Layout::Page::Space & GetElement()
Gets the alto element.
SBlock GetBlock()
Returns the image.
std::vector< Id > GetStyles() const
Returns the list of style references.
std::vector< Id > GetStyles() const
Returns the list of style references.
bool operator<(const TextLinePath &other) const
Option< Alto::Styles::Text::FontStyle > GetFontStyle() const
Returns the font style of the word.
std::vector< Id > GetPages()
Gets the list of page ids.
void SetBaseline(double d, bool check_bounds=true)
Sets the baseline ordinate.
void ResizeSpace(const SpacePath &p, const crn::Rect &r, bool erase_oob)
Changes the size of a space and all its parents if needed.
void SetSubstitution(AltoWord::SubstitutionType stype, const StringUTF8 &scontent)
Sets the substitution of the word.
bool operator==(const PagePath &other) const
Space GetSpace(const Id &spaceid)
Gets a space proxy.
Space & operator=(const Space &)=default
void RemoveStyle(const Id &styleid)
Adds a reference to a style.
void SetFontStyle(Alto::Styles::Text::FontStyle fs)
Sets the font style of the word.
const Rect & GetBBox() const
Gets the coordinates of the text line.
const Alto::Layout::Page::Space::TextBlock::TextLine::Word & GetElement() const
Gets the alto element.
void RemoveTextLine(const Id &tid)
Removes a text line.
static const PagePath & NullPath()
static const SpacePath & NullPath()
TextLine & operator=(const TextLine &)=default
virtual String ToString() const override
std::vector< Alto::Styles::Text > GetTextStyles()
Returns the text styles.
SCBlock GetBlock() const
Returns the image.
virtual String ToString() const
bool operator<(const WordPath &other) const