libcrn  3.9.5
A document image processing library
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CRNAltoWrapper.h
Go to the documentation of this file.
1 /* Copyright 2011-2016 CoReNum
2  *
3  * This file is part of libcrn.
4  *
5  * libcrn is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * libcrn is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with libcrn. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * file: CRNAltoWrapper.h
19  * \author Yann LEYDIER
20  */
21 
22 #ifndef CRNAltoWrapper_HEADER
23 #define CRNAltoWrapper_HEADER
24 
25 #include <CRNXml/CRNAlto.h>
26 #include <CRNDocument.h>
27 #include <CRNUtils/CRNProgress.h>
28 #include <vector>
29 #include <utility>
30 
31 namespace crn
32 {
33  namespace xml
34  {
39  struct PagePath
40  {
41  PagePath() { }
42  PagePath(const String &v, const Id &p):view_id(v),page_id(p) { }
43  PagePath(const String &p);
44  PagePath(const PagePath&) = default;
45  PagePath(PagePath&&) = default;
46  virtual ~PagePath() {}
47  PagePath& operator=(const PagePath&) = default;
48  PagePath& operator=(PagePath&&) = default;
49 
50  inline bool operator==(const PagePath &other) const { return (view_id == other.view_id) && (page_id == other.page_id); }
51  inline bool operator!=(const PagePath &other) const { return !operator==(other); }
52  inline bool operator<(const PagePath &other) const
53  { if (view_id < other.view_id) return true;
54  else if ((view_id == other.view_id) && (page_id < other.page_id)) return true;
55  return false; }
58  virtual String ToString() const { return view_id + Separator() + page_id; }
59  static const crn::String& Separator();
60  static const PagePath& NullPath();
61  };
66  struct SpacePath: public PagePath
67  {
68  SpacePath() { }
69  SpacePath(const PagePath &p, const Id &s):PagePath(p),space_id(s) { }
70  SpacePath(const String &p);
71  SpacePath(const SpacePath&) = default;
72  SpacePath(SpacePath&&) = default;
73  virtual ~SpacePath() override {}
74  SpacePath& operator=(const SpacePath&) = default;
75  SpacePath& operator=(SpacePath&&) = default;
76 
77  inline bool operator==(const SpacePath &other) const { return PagePath::operator==(other) && (space_id == other.space_id); }
78  inline bool operator!=(const SpacePath &other) const { return !operator==(other); }
79  inline bool operator<(const SpacePath &other) const
80  { if (PagePath::operator<(other)) return true;
81  else if ((page_id == other.page_id) && (space_id < other.space_id)) return true;
82  return false; }
84  virtual String ToString() const override { return PagePath::ToString() + Separator() + space_id; }
85  static const SpacePath& NullPath();
86  };
91  struct BlockPath: public SpacePath
92  {
93  BlockPath() { }
94  BlockPath(const SpacePath &s, const Id &b):SpacePath(s),block_id(b) { }
95  BlockPath(const String &p);
96  BlockPath(const BlockPath&) = default;
97  BlockPath(BlockPath&&) = default;
98  virtual ~BlockPath() override { }
99  BlockPath& operator=(const BlockPath&) = default;
100  BlockPath& operator=(BlockPath&&) = default;
101 
102  inline bool operator==(const BlockPath &other) const { return SpacePath::operator==(other) && (block_id == other.block_id); }
103  inline bool operator!=(const BlockPath &other) const { return !operator==(other); }
104  inline bool operator<(const BlockPath &other) const
105  { if (SpacePath::operator<(other)) return true;
106  else if ((space_id == other.space_id) && (block_id < other.block_id)) return true;
107  return false; }
109  virtual String ToString() const override { return SpacePath::ToString() + Separator() + block_id; }
110  static const BlockPath& NullPath();
111  };
116  struct TextLinePath: public BlockPath
117  {
119  TextLinePath(const BlockPath &b, const Id &l):BlockPath(b),textline_id(l) { }
120  TextLinePath(const String &p);
121  TextLinePath(const TextLinePath&) = default;
122  TextLinePath(TextLinePath&&) = default;
123  virtual ~TextLinePath() override { }
124  TextLinePath& operator=(const TextLinePath&) = default;
125  TextLinePath& operator=(TextLinePath&&) = default;
126 
127  inline bool operator==(const TextLinePath &other) const { return BlockPath::operator==(other) && (textline_id == other.textline_id); }
128  inline bool operator!=(const TextLinePath &other) const { return !operator==(other); }
129  inline bool operator<(const TextLinePath &other) const
130  { if (BlockPath::operator<(other)) return true;
131  else if ((block_id == other.block_id) && (textline_id < other.textline_id)) return true;
132  return false; }
134  virtual String ToString() const override { return BlockPath::ToString() + Separator() + textline_id; }
135  static const TextLinePath& NullPath();
136  };
141  struct WordPath: public TextLinePath
142  {
143  WordPath() { }
144  WordPath(const TextLinePath &l, const Id &w):TextLinePath(l),word_id(w) { }
145  WordPath(const String &p);
146  WordPath(const WordPath&) = default;
147  WordPath(WordPath&&) = default;
148  virtual ~WordPath() override { }
149  WordPath& operator=(const WordPath&) = default;
150  WordPath& operator=(WordPath&&) = default;
151 
152  inline bool operator==(const WordPath &other) const { return TextLinePath::operator==(other) && (word_id == other.word_id); }
153  inline bool operator!=(const WordPath &other) const { return !operator==(other); }
154  inline bool operator<(const WordPath &other) const
155  { if (TextLinePath::operator<(other)) return true;
156  else if ((textline_id == other.textline_id) && (word_id < other.word_id)) return true;
157  return false; }
159  virtual String ToString() const override { return TextLinePath::ToString() + Separator() + word_id; }
160  static const WordPath& NullPath();
161  };
169  {
170  public:
171  AltoWrapper(const AltoWrapper&) = delete;
172  AltoWrapper(AltoWrapper&&) = default;
174  AltoWrapper& operator=(const AltoWrapper&) = delete;
175  AltoWrapper& operator=(AltoWrapper&&) = default;
176 
178  static std::unique_ptr<AltoWrapper> NewFromDir(const crn::Path &directory, const crn::Path &documentname, const crn::Path &imagedirectory = "", crn::Progress *prog = nullptr, bool throw_exceptions = true);
180  static std::unique_ptr<AltoWrapper> NewFromDirs(const crn::Path &image_directory, const crn::Path &xml_directory, const crn::Path &documentname, crn::Progress *prog = nullptr, bool throw_exceptions = true);
182  static std::unique_ptr<AltoWrapper> NewFromDocument(const crn::SDocument &document, bool create_altos = false, bool throw_exceptions = true);
184  static std::unique_ptr<AltoWrapper> NewFromDocument(const crn::Path &documentname, bool create_altos = false, bool throw_exceptions = true);
191  template<typename Iter> static std::unique_ptr<AltoWrapper> NewFromImages(Iter begin_, Iter end_, const crn::Path &documentname, bool throw_exceptions = true)
192  {
193  auto doc = std::make_shared<crn::Document>();
194  for (Iter it = begin_; it != end_; ++it)
195  doc->AddView(*it);
196  doc->Save(documentname);
197  return NewFromDocument(doc, true, throw_exceptions);
198  }
206  template<typename Iter> static std::unique_ptr<AltoWrapper> NewFromList(Iter begin_, Iter end_, const crn::Path &documentname, crn::Progress *prog = nullptr, bool throw_exceptions = true)
207  {
208  std::vector<std::pair<crn::Path, crn::Path>> imgxml;
209  for (Iter it = begin_; it != end_; ++it)
210  imgxml.push_back(std::make_pair(it->first, it->second));
211  return newFromList(imgxml, documentname, prog, throw_exceptions);
212  }
213 
215  void Synchronize(bool reset = false);
216 
218  const String AddView(const Path &imagename, const Path &altoname = "");
219 
220  SDocument GetDocument() { return doc; }
221  SCDocument GetDocument() const { return doc; }
222 
228  class ViewLock
229  {
230  public:
231  ViewLock(const ViewLock&) = delete;
232  ViewLock(ViewLock &&v):block(std::move(v.block)),alto(std::move(v.alto)) {}
233  ~ViewLock() { if (block) block->Save(); if (alto) alto->Save(); }
234  ViewLock& operator=(const ViewLock&) = delete;
235  ViewLock& operator=(ViewLock &&v) { block = std::move(v.block); alto = std::move(v.alto); return *this; }
236 
237  SBlock GetBlock() { return block; }
238  SCBlock GetBlock() const { return block; }
239  SAlto GetAlto() { return alto; }
240  SCAlto GetAlto() const { return alto; }
241 
242  private:
243  ViewLock(const SBlock &b, const SAlto &a):block(b),alto(a) { }
244 
245  SBlock block;
246  SAlto alto;
247 
248  friend class AltoWrapper;
249  };
254  class Word
255  {
256  public:
257  Word(const SBlock &b, Alto::Layout::Page::Space::TextBlock::TextLine::Word &w, const std::shared_ptr<ViewLock> &l, const TextLinePath &t):block(b),word(&w),lock(l),path(t, w.GetId().Get()) { }
258  Word(const Word&) = default;
259  Word(Word&&) = default;
260  ~Word() { }
261  Word& operator=(const Word&) = default;
262  Word& operator=(Word&&) = default;
263 
265  SAlto GetAlto() { return lock->GetAlto(); }
267  SCAlto GetAlto() const { return lock->GetAlto(); }
269  Alto::Layout::Page::Space::TextBlock::TextLine::Word& GetElement() { return *word; }
271  const Alto::Layout::Page::Space::TextBlock::TextLine::Word& GetElement() const { return *word; }
272 
274  const WordPath& GetPath() const { return path; }
275 
277  const Id& GetId() const { return word->GetId().Get(); }
279  std::vector<Id> GetStyles() const { return word->GetStyles(); }
281  std::vector<Alto::Styles::Text> GetTextStyles();
283  std::vector<Alto::Styles::Paragraph> GetParagraphStyles();
285  void AddStyle(const Id &styleid) { word->AddStyle(styleid); }
287  void RemoveStyle(const Id &styleid) { word->RemoveStyle(styleid); }
289  const Rect& GetBBox() const { return block->GetAbsoluteBBox(); }
291  void SetBBox(const crn::Rect &r);
293  StringUTF8 GetContent() const { return word->GetContent(); }
295  void SetContent(const StringUTF8 &s) { word->SetContent(s); }
297  Option<Alto::Styles::Text::FontStyle> GetFontStyle() const { return word->GetFontStyle(); }
299  void SetFontStyle(Alto::Styles::Text::FontStyle fs) { word->SetFontStyle(fs); }
301  void UnsetFontStyle() { word->UnsetFontStyle(); }
303  Option<AltoWord::SubstitutionType> GetSubstitutionType() const { return word->GetSubstitutionType(); }
305  Option<StringUTF8> GetSubstitutionContent() { return word->GetSubstitutionContent(); }
307  void SetSubstitution(AltoWord::SubstitutionType stype, const StringUTF8 &scontent) { word->SetSubstitution(stype, scontent); }
309  SBlock GetBlock() { return block; }
311  SCBlock GetBlock() const { return block; }
312 
314  Option<double> GetWC() const { return word->GetWC(); }
316  void SetWC(double conf) { word->SetWC(conf); }
317 
318  private:
319  SBlock block;
320  Alto::Layout::Page::Space::TextBlock::TextLine::Word *word;
321  std::shared_ptr<ViewLock> lock;
322  WordPath path;
323  };
325  Word GetWord(const WordPath &p);
326 
331  class TextLine
332  {
333  public:
334  TextLine(const SBlock &b, Alto::Layout::Page::Space::TextBlock::TextLine &t, const std::shared_ptr<ViewLock> &l, const BlockPath &bp):block(b),textline(&t),lock(l),path(bp, t.GetId()) { }
335  TextLine(const TextLine&) = default;
336  TextLine(TextLine&&) = default;
337  ~TextLine() { }
338  TextLine& operator=(const TextLine&) = default;
339  TextLine& operator=(TextLine&&) = default;
340 
342  SAlto GetAlto() { return lock->GetAlto(); }
344  SCAlto GetAlto() const { return lock->GetAlto(); }
346  Alto::Layout::Page::Space::TextBlock::TextLine& GetElement() { return *textline; }
348  const Alto::Layout::Page::Space::TextBlock::TextLine& GetElement() const { return *textline; }
349 
351  const TextLinePath& GetPath() const { return path; }
352 
354  const Id& GetId() const { return textline->GetId(); }
356  std::vector<Id> GetStyles() const { return textline->GetStyles(); }
358  std::vector<Alto::Styles::Text> GetTextStyles();
360  std::vector<Alto::Styles::Paragraph> GetParagraphStyles();
362  void AddStyle(const Id &styleid) { textline->AddStyle(styleid); }
364  void RemoveStyle(const Id &styleid) { textline->RemoveStyle(styleid); }
366  const Rect& GetBBox() const { return block->GetAbsoluteBBox(); }
368  void SetBBox(const crn::Rect &r, bool erase_oob);
370  Option<double> GetBaseline() const { return textline->GetBaseline(); }
372  void SetBaseline(double d, bool check_bounds = true) { textline->SetBaseline(d, check_bounds); }
374  void UnsetBaseline() { textline->UnsetBaseline(); }
376  Option<bool> GetManuallyCorrected() { return textline->GetManuallyCorrected(); }// CS element
377 
379  SBlock GetBlock() { return block; }
381  SCBlock GetBlock() const { return block; }
382 
384  std::vector<Id> GetWords() const;
386  size_t GetNbWords() const { return textline->GetNbWords(); }
388  Word GetWord(const Id &id);
390  const Word GetWord(const Id &id) const;
392  Word AddWord(const StringUTF8 &text, const crn::Rect &bbox);
394  Word AddWordAfter(const Id &pred, const StringUTF8 &text, const crn::Rect &bbox);
396  Word AddWordBefore(const Id &next, const StringUTF8 &text, const crn::Rect &bbox);
397 
399  void RemoveWord(const Id &wid);
400 
401  private:
402  SBlock block;
403  Alto::Layout::Page::Space::TextBlock::TextLine *textline;
404  std::shared_ptr<ViewLock> lock;
405  TextLinePath path;
406  };
408  TextLine GetTextLine(const TextLinePath &p);
409 
414  class TextBlock
415  {
416  public:
417  TextBlock(const SBlock &b, Alto::Layout::Page::Space::TextBlock &t, const std::shared_ptr<ViewLock> &l, const SpacePath &s):block(b),textblock(&t),lock(l),path(s, t.GetId()) { }
418  TextBlock(const TextBlock&) = default;
419  TextBlock(TextBlock&&) = default;
421  TextBlock& operator=(const TextBlock&) = default;
422  TextBlock& operator=(TextBlock&&) = default;
423 
425  SAlto GetAlto() { return lock->GetAlto(); }
427  SCAlto GetAlto() const { return lock->GetAlto(); }
429  Alto::Layout::Page::Space::TextBlock& GetElement() { return *textblock; }
431  const Alto::Layout::Page::Space::TextBlock& GetElement() const { return *textblock; }
432 
434  const BlockPath& GetPath() const { return path; }
435 
437  const Id& GetId() const { return textblock->GetId(); }
439  std::vector<Id> GetStyles() const { return textblock->GetStyles(); }
441  std::vector<Alto::Styles::Text> GetTextStyles();
443  std::vector<Alto::Styles::Paragraph> GetParagraphStyles();
445  void AddStyle(const Id &styleid) { textblock->AddStyle(styleid); }
447  void RemoveStyle(const Id &styleid) { textblock->RemoveStyle(styleid); }
449  const Rect& GetBBox() const { return block->GetAbsoluteBBox(); }
451  void SetBBox(const crn::Rect &r, bool erase_oob);
453  Option<StringUTF8> GetLanguage() const { return textblock->GetLanguage(); }
454 
456  SBlock GetBlock() { return block; }
458  SCBlock GetBlock() const { return block; }
459 
461  std::vector<Id> GetTextLines() const;
463  size_t GetNbTextLines() const { return textblock->GetNbTextLines(); }
465  TextLine GetTextLine(const Id &id);
467  TextLine AddTextLine(const crn::Rect &bbox);
469  TextLine AddTextLineAfter(const Id &pred, const crn::Rect &bbox);
471  TextLine AddTextLineBefore(const Id &next, const crn::Rect &bbox);
472 
474  void RemoveTextLine(const Id &tid);
475 
476  private:
477  SBlock block;
478  Alto::Layout::Page::Space::TextBlock *textblock;
479  std::shared_ptr<ViewLock> lock;
480  BlockPath path;
481  };
483  TextBlock GetTextBlock(const BlockPath &p);
484 
489  class Space
490  {
491  public:
492  Space(const SBlock &b, Alto::Layout::Page::Space &s, const std::shared_ptr<ViewLock> &l, const PagePath &p):block(b),space(&s),lock(l),path(p, s.GetId().Get()) { }
493  Space(const Space&) = default;
494  Space(Space&&) = default;
495  ~Space() { }
496  Space& operator=(const Space&) = default;
497  Space& operator=(Space&&) = default;
498 
500  SAlto GetAlto() { return lock->GetAlto(); }
502  SCAlto GetAlto() const { return lock->GetAlto(); }
504  Alto::Layout::Page::Space& GetElement() { return *space; }
506  const Alto::Layout::Page::Space& GetElement() const { return *space; }
507 
509  const SpacePath& GetPath() const { return path; }
510 
512  const Id& GetId() const { return space->GetId().Get(); }
514  std::vector<Id> GetStyles() const { return space->GetStyles(); }
516  std::vector<Alto::Styles::Text> GetTextStyles();
518  std::vector<Alto::Styles::Paragraph> GetParagraphStyles();
520  void AddStyle(const Id &styleid) { space->AddStyle(styleid); }
522  void RemoveStyle(const Id &styleid) { space->RemoveStyle(styleid); }
524  const Rect& GetBBox() const { return block->GetAbsoluteBBox(); }
526  void SetBBox(const crn::Rect &r, bool erase_oob);
527 
529  SBlock GetBlock() { return block; }
531  SCBlock GetBlock() const { return block; }
532 
534  std::vector<Id> GetTextBlocks() const;
536  TextBlock GetTextBlock(const Id &id);
538  TextBlock AddTextBlock(const crn::Rect &bbox);
540  TextBlock AddTextBlockAfter(const Id &pred, const crn::Rect &bbox);
542  TextBlock AddTextBlockBefore(const Id &next, const crn::Rect &bbox);
543 
545  void RemoveBlock(const Id &bid);
546 
547  private:
548  SBlock block;
549  Alto::Layout::Page::Space *space;
550  std::shared_ptr<ViewLock> lock;
551  SpacePath path;
552  };
554  Space GetSpace(const SpacePath &p);
555 
560  class Page
561  {
562  public:
563  Page(const SBlock &b, Alto::Layout::Page &p, const std::shared_ptr<ViewLock> &l, const String &vid):block(b),page(&p),lock(l),path(vid, p.GetId()) { }
564  Page(const Page&) = default;
565  Page(Page&&) = default;
566  ~Page() {}
567  Page& operator=(const Page&) = default;
568  Page& operator=(Page&&) = default;
569 
571  SAlto GetAlto() { return lock->GetAlto(); }
573  SCAlto GetAlto() const { return lock->GetAlto(); }
575  Alto::Layout::Page& GetElement() { return *page; }
577  const Alto::Layout::Page& GetElement() const { return *page; }
578 
580  const PagePath& GetPath() const { return path; }
581 
583  const Id& GetId() const { return page->GetId(); }
585  Option<StringUTF8> GetPageClass() const { return page->GetPageClass(); }
587  void SetPageClass(const StringUTF8 &s) { page->SetPageClass(s); }
589  std::vector<Id> GetStyles() const { return page->GetStyles(); }
591  std::vector<Alto::Styles::Text> GetTextStyles();
593  std::vector<Alto::Styles::Paragraph> GetParagraphStyles();
595  void AddStyle(const Id &styleid) { page->AddStyle(styleid); }
597  void RemoveStyle(const Id &styleid) { page->RemoveStyle(styleid); }
599  const Rect& GetBBox() const { return block->GetAbsoluteBBox(); }
601  void SetBBox(const crn::Rect &r, bool erase_oob);
603  int GetPhysicalImageNumber() const { return page->GetPhysicalImageNumber(); }
605  void SetPhysicalImageNumber(int pnum) { page->SetPhysicalImageNumber(pnum); }
607  Option<StringUTF8> GetPrintedImageNumber() const { return page->GetPrintedImageNumber(); }
609  void SetPrintedImageNumber(const StringUTF8 &s) { page->SetPrintedImageNumber(s); }
611  Option<AltoPage::Quality> GetQuality() const { return page->GetQuality(); }
613  void SetQuality(AltoPage::Quality q) { page->SetQuality(q); }
615  Option<StringUTF8> GetQualityDetail() const { return page->GetQualityDetail(); }
617  void SetQualityDetail(const StringUTF8 &s) { page->SetQualityDetail(s); }
619  Option<AltoPage::Position> GetPosition() const { return page->GetPosition(); }
621  Option<Id> GetProcessing() const { return page->GetProcessing(); }
623  Option<double> GetAccuracy() const { return page->GetAccuracy(); }
625  void SetAccuracy(double acc) { page->SetAccuracy(acc); }
627  Option<double> GetPageConfidence() const { return page->GetPageConfidence(); }
629  void SetPageConfidence(double c) { page->SetPageConfidence(c); }
630 
632  SBlock GetBlock() { return block; }
634  SCBlock GetBlock() const { return block; }
635 
637  std::vector<Id> GetSpaces() const;
639  Space GetSpace(const Id &spaceid);
643  Space AddTopMargin(const crn::Rect &bbox);
647  Space AddLeftMargin(const crn::Rect &bbox);
651  Space AddBottomMargin(const crn::Rect &bbox);
655  Space AddRightMargin(const crn::Rect &bbox);
659  Space AddPrintSpace(const crn::Rect &bbox);
660 
662  void RemoveSpace(const Id &sid);
663 
665  //void ImportContent(Page &other, const crn::Rect &crop = crn::Rect());
666 
667  private:
668  SBlock block;
669  Alto::Layout::Page *page;
670  std::shared_ptr<ViewLock> lock;
671  PagePath path;
672  };
674  Page GetPage(const PagePath &p);
675 
680  class View
681  {
682  public:
684  View(const std::shared_ptr<ViewLock> &l, const String &view_id):lock(l), id(view_id) { }
685  View(const View&) = default;
686  View(View&&) = default;
687  ~View() { }
688  View& operator=(const View&) = default;
689  View& operator=(View&&) = default;
690 
692  const String& GetId() const { return id; }
694  SBlock GetBlock() { return lock->GetBlock(); }
696  SCBlock GetBlock() const { return lock->GetBlock(); }
698  SAlto GetAlto() { return lock->GetAlto(); }
700  SCAlto GetAlto() const { return lock->GetAlto(); }
701 
703  Alto::Description& GetDescription() { return lock->GetAlto()->GetDescription(); }
704 
706  Alto::Styles& GetStyles() { return lock->GetAlto()->GetStyles(); }
707 
709  std::vector<Id> GetPages();
711  Page GetPage(const Id &pageId);
713  Page AddPage(int image_number, int w, int h, Option<AltoPage::Position> pos = Option<AltoPage::Position>());
715  Page AddPageAfter(const Id &pred, int image_number, int w, int h, Option<AltoPage::Position> pos = Option<AltoPage::Position>());
717  Page AddPageBefore(const Id &next, int image_number, int w, int h, Option<AltoPage::Position> pos = Option<AltoPage::Position>());
719  void RemovePage(const Id &pageId);
720 
722  void ResizeWord(const WordPath &p, const crn::Rect &r);
724  void ResizeTextLine(const TextLinePath &p, const crn::Rect &r, bool erase_oob);
726  void ResizeTextBlock(const BlockPath &p, const crn::Rect &r, bool erase_oob);
728  void ResizeSpace(const SpacePath &p, const crn::Rect &r, bool erase_oob);
729  private:
730 
731  std::shared_ptr<ViewLock> lock;
732  String id;
733  };
735  std::vector<String> GetViewIds() const { return doc->GetViewIds(); }
737  View GetView(const String &view_id);
739  size_t GetNbViews() const { return doc->GetNbViews(); }
741  View GetView(size_t index);
742 
744  void ResizeWord(const WordPath &p, const crn::Rect &r);
746  void ResizeTextLine(const TextLinePath &p, const crn::Rect &r, bool erase_oob);
748  void ResizeTextBlock(const BlockPath &p, const crn::Rect &r, bool erase_oob);
750  void ResizeSpace(const SpacePath &p, const crn::Rect &r, bool erase_oob);
751 
752  static const String& AltoPathKey();
753  static const String& PageKey();
754  static const String& SpaceKey();
755  static const String& TextBlockKey();
756  static const String& IllustrationKey();
757  static const String& GraphicalElementKey();
758  static const String& ComposedBlockKey();
759  static const String& TextLineKey();
760  static const String& WordKey();
762  private:
764  static std::unique_ptr<AltoWrapper> newFromList(const std::vector<std::pair<crn::Path, crn::Path> > &filelist, const crn::Path &documentname, crn::Progress *prog, bool throw_exceptions);
766  AltoWrapper(bool throw_exceptions);
768  void createAltos();
769 
770  std::shared_ptr<ViewLock> getLock(const String &view_id) const;
771 
772  SDocument doc;
773  mutable std::map<String, std::weak_ptr<ViewLock> > viewLocks;
774  bool throws;
775  };
776  CRN_ALIAS_SMART_PTR(AltoWrapper)
777  }
778 }
779 
780 
781 #endif
782 
ViewLock & operator=(const ViewLock &)=delete
SBlock GetBlock()
Returns the image.
static const String & GraphicalElementKey()
const Rect & GetBBox() const
Gets the coordinates of the text block.
TextBlock AddTextBlockAfter(const Id &pred, const crn::Rect &bbox)
Adds a text block to the space.
void SetWC(double conf)
Sets the OCR confidence of the word [0, 1].
bool operator<(const SpacePath &other) const
SpacePath(const PagePath &p, const Id &s)
SAlto GetAlto()
Gets the toplevel alto file.
void RemovePage(const Id &pageId)
Removes a page.
TextLine GetTextLine(const TextLinePath &p)
Gets a TextLine by path.
const Alto::Layout::Page::Space & GetElement() const
Gets the alto element.
Space AddPrintSpace(const crn::Rect &bbox)
Adds the print space.
const Id & GetId() const
Returns the id of the element.
Alto::Description & GetDescription()
Gets the global description part of the Alto (may be null)
AltoWrapper(const AltoWrapper &)=delete
A print space on a page.
bool operator!=(const WordPath &other) const
const String & GetId() const
Returns the id of the view.
std::vector< String > GetViewIds() const
Gets the list of the view ids of the document.
Page AddPageAfter(const Id &pred, int image_number, int w, int h, Option< AltoPage::Position > pos=Option< AltoPage::Position >())
Adds a page.
virtual ~SpacePath() override
const Rect & GetBBox() const
Gets the coordinates of the word.
Word & operator=(const Word &)=default
void ResizeTextLine(const TextLinePath &p, const crn::Rect &r, bool erase_oob)
Changes the size of a line and all its parents if needed.
std::vector< Alto::Styles::Paragraph > GetParagraphStyles()
Returns the paragraph styles.
bool operator==(const WordPath &other) const
TextBlock AddTextBlockBefore(const Id &next, const crn::Rect &bbox)
Adds a text block to the space.
const Id & GetId() const
Returns the id of the element.
const Id & GetId() const
Returns the id of the element.
AltoWrapper & operator=(const AltoWrapper &)=delete
static std::unique_ptr< AltoWrapper > NewFromDir(const crn::Path &directory, const crn::Path &documentname, const crn::Path &imagedirectory="", crn::Progress *prog=nullptr, bool throw_exceptions=true)
Creates a wrapper from a directory containing Altos.
Option< StringUTF8 > GetQualityDetail() const
Returns details on the quality of the original page.
std::vector< Alto::Styles::Text > GetTextStyles()
Returns the text styles.
Base class for a progress display.
Definition: CRNProgress.h:39
SCAlto GetAlto() const
Returns the alto.
Page GetPage(const PagePath &p)
Gets a Page by path.
TextLine AddTextLineBefore(const Id &next, const crn::Rect &bbox)
Adds a text line to the block.
void ResizeTextLine(const TextLinePath &p, const crn::Rect &r, bool erase_oob)
Changes the size of a line and all its parents if needed.
std::vector< Id > GetStyles() const
Returns the list of style references.
SpacePath & operator=(const SpacePath &)=default
bool operator<(const BlockPath &other) const
TextLinePath & operator=(const TextLinePath &)=default
const Alto::Layout::Page & GetElement() const
Gets the alto element.
StringUTF8 GetContent() const
Returns the transcription of the word.
bool operator!=(const SpacePath &other) const
SCAlto GetAlto() const
Returns the alto.
Space AddBottomMargin(const crn::Rect &bbox)
Adds a bottom margin.
Option< StringUTF8 > GetPrintedImageNumber() const
Returns the page number that is printed on the document.
std::vector< Alto::Styles::Paragraph > GetParagraphStyles()
Returns the paragraph styles.
Alto::Layout::Page::Space::TextBlock::TextLine & GetElement()
Gets the alto element.
Page GetPage(const Id &pageId)
Gets a page.
Option< double > GetWC() const
Returns the OCR confidence of the word [0, 1].
ViewLock(const ViewLock &)=delete
Space GetRightMargin()
Gets the space proxy on the right margin.
const BlockPath & GetPath() const
Returns the path to the text block.
void RemoveStyle(const Id &styleid)
Adds a reference to a style.
Word AddWordAfter(const Id &pred, const StringUTF8 &text, const crn::Rect &bbox)
Adds a word to the line.
Word AddWordBefore(const Id &next, const StringUTF8 &text, const crn::Rect &bbox)
Adds a word to the line.
size_t GetNbTextLines() const
Returns the number of text lines in the page space.
void SetBBox(const crn::Rect &r, bool erase_oob)
Sets the coordinates of the space.
void ResizeSpace(const SpacePath &p, const crn::Rect &r, bool erase_oob)
Changes the size of a space and all its parents if needed.
SAlto GetAlto()
Gets the toplevel alto file.
void AddStyle(const Id &styleid)
Adds a reference to a style.
std::vector< Id > GetWords() const
Returns the ids of the words in the line.
virtual String ToString() const override
Word(const SBlock &b, Alto::Layout::Page::Space::TextBlock::TextLine::Word &w, const std::shared_ptr< ViewLock > &l, const TextLinePath &t)
static const String & TextBlockKey()
static std::unique_ptr< AltoWrapper > NewFromList(Iter begin_, Iter end_, const crn::Path &documentname, crn::Progress *prog=nullptr, bool throw_exceptions=true)
Creates a wrapper from list of images and Alto paths (such as a map or any container<pair>) ...
std::vector< Alto::Styles::Paragraph > GetParagraphStyles()
Returns the paragraph styles.
void RemoveSpace(const Id &sid)
Removes a space.
Page AddPageBefore(const Id &next, int image_number, int w, int h, Option< AltoPage::Position > pos=Option< AltoPage::Position >())
Adds a page.
void AddStyle(const Id &styleid)
Adds a reference to a style.
static const String & ComposedBlockKey()
const Alto::Layout::Page::Space::TextBlock & GetElement() const
Gets the alto element.
WordPath & operator=(const WordPath &)=default
static const String & WordKey()
void RemoveWord(const Id &wid)
Removes a line element.
Alto::Layout::Page & GetElement()
Gets the alto element.
void RemoveStyle(const Id &styleid)
Adds a reference to a style.
Space GetLeftMargin()
Gets the space proxy on the left margin.
const WordPath & GetPath() const
Returns the path to the string.
SAlto GetAlto()
Gets the toplevel alto file.
SAlto GetAlto()
Gets the toplevel alto file.
const TextLinePath & GetPath() const
Returns the path to the text line.
SCDocument GetDocument() const
void Synchronize(bool reset=false)
Creates CRNBlocks and ids where there is none.
static const String & IllustrationKey()
WordPath(const TextLinePath &l, const Id &w)
void SetQualityDetail(const StringUTF8 &s)
Sets the details on the quality of the original page.
void SetContent(const StringUTF8 &s)
Sets the transcription of the word.
View GetView(const String &view_id)
Gets a view by id.
int GetPhysicalImageNumber() const
Returns the number of the page within the document.
void UnsetBaseline()
Unsets the baseline ordinate.
ViewLock & operator=(ViewLock &&v)
void SetPrintedImageNumber(const StringUTF8 &s)
Sets the page number that is printed on the document.
A UTF32 character string class.
Definition: CRNString.h:61
SBlock GetBlock()
Returns the image.
const String AddView(const Path &imagename, const Path &altoname="")
Adds a view to the document.
void AddStyle(const Id &styleid)
Adds a reference to a style.
Option< AltoWord::SubstitutionType > GetSubstitutionType() const
Returns the substitution type of the word.
void ResizeWord(const WordPath &p, const crn::Rect &r)
Changes the size of a word and all its parents if needed.
TextBlock GetTextBlock(const Id &id)
Gets a text block proxy.
void SetQuality(AltoPage::Quality q)
Sets the quality of the original page.
Space GetBottomMargin()
Gets the space proxy on the bottom margin.
bool operator!=(const PagePath &other) const
Option< StringUTF8 > GetPageClass() const
Returns the class of the page (user defined class such as "title")
void RemoveStyle(const Id &styleid)
Adds a reference to a style.
void SetAccuracy(double acc)
Sets the estimated % of OCR accuracy on the page [0, 100].
TextLinePath(const BlockPath &b, const Id &l)
SBlock GetBlock()
Returns the image.
std::vector< Alto::Styles::Text > GetTextStyles()
Returns the text styles.
const PagePath & GetPath() const
Returns the path to the page.
std::vector< Id > GetStyles() const
Returns the list of style references.
TextBlock AddTextBlock(const crn::Rect &bbox)
Adds a text block to the space.
std::vector< Id > GetTextBlocks() const
Returns the ids of the text blocks in the page space.
Page(const SBlock &b, Alto::Layout::Page &p, const std::shared_ptr< ViewLock > &l, const String &vid)
SBlock GetBlock()
Returns the image.
static const crn::String & Separator()
Space GetSpace(const SpacePath &p)
Gets a Space by path.
bool operator!=(const TextLinePath &other) const
Word AddWord(const StringUTF8 &text, const crn::Rect &bbox)
Adds a word to the line.
static const TextLinePath & NullPath()
A convenience class for file paths.
Definition: CRNPath.h:39
Option< AltoPage::Quality > GetQuality() const
Returns the quality of the original page.
void ResizeWord(const WordPath &p, const crn::Rect &r)
Changes the size of a word and all its parents if needed.
std::vector< Alto::Styles::Text > GetTextStyles()
Returns the text styles.
virtual ~BlockPath() override
const Id & GetId() const
Returns the id of the element.
bool operator==(const SpacePath &other) const
std::vector< Alto::Styles::Paragraph > GetParagraphStyles()
Returns the paragraph styles.
void SetBBox(const crn::Rect &r, bool erase_oob)
Sets the coordinates of the text line.
Alto::Styles & GetStyles()
Gets the styles description part of the Alto (may be null)
virtual String ToString() const override
const Rect & GetBBox() const
Gets the coordinates of the page.
static const BlockPath & NullPath()
static const WordPath & NullPath()
Alto::Layout::Page::Space::TextBlock & GetElement()
Gets the alto element.
std::vector< Id > GetStyles() const
Returns the list of style references.
TextBlock(const SBlock &b, Alto::Layout::Page::Space::TextBlock &t, const std::shared_ptr< ViewLock > &l, const SpacePath &s)
Option< double > GetAccuracy() const
Returns the estimated % of OCR accuracy on the page [0, 100].
Option< bool > GetManuallyCorrected()
Returns whether the line was manually corrected or not.
SCAlto GetAlto() const
Returns the alto.
Alto::Layout::Page::Space::TextBlock::TextLine::Word & GetElement()
Gets the alto element.
virtual ~TextLinePath() override
const Rect & GetBBox() const
Gets the coordinates of the space.
void SetBBox(const crn::Rect &r, bool erase_oob)
Sets the coordinates of the page.
Word GetWord(const Id &id)
Gets a word proxy.
void ResizeTextBlock(const BlockPath &p, const crn::Rect &r, bool erase_oob)
Changes the size of a text block and all its parents if needed.
size_t GetNbWords() const
Returns the number of words in the line.
static const String & TextLineKey()
SCAlto GetAlto() const
Returns the alto.
Space GetTopMargin()
Gets the space proxy on the top margin.
static std::unique_ptr< AltoWrapper > NewFromDocument(const crn::SDocument &document, bool create_altos=false, bool throw_exceptions=true)
Creates a wrapper from a crn::Document that was created by Nimrod.
virtual String ToString() const override
std::vector< Id > GetTextLines() const
Returns the ids of the text lines in the page space.
Space AddRightMargin(const crn::Rect &bbox)
Adds a right margin.
Option< Id > GetProcessing() const
Returns the id of the processing applied to the page.
static std::unique_ptr< AltoWrapper > NewFromDirs(const crn::Path &image_directory, const crn::Path &xml_directory, const crn::Path &documentname, crn::Progress *prog=nullptr, bool throw_exceptions=true)
Creates a wrapper from two directories containing images and Altos with the same base names...
Option< double > GetPageConfidence() const
Returns the confidence of OCR on the page [0, 1].
View & operator=(const View &)=default
void SetBBox(const crn::Rect &r, bool erase_oob)
Sets the coordinates of the text block.
void AddStyle(const Id &styleid)
Adds a reference to a style.
Space(const SBlock &b, Alto::Layout::Page::Space &s, const std::shared_ptr< ViewLock > &l, const PagePath &p)
bool operator==(const BlockPath &other) const
static const String & PageKey()
std::vector< Alto::Styles::Text > GetTextStyles()
Returns the text styles.
TextLine GetTextLine(const Id &id)
Gets a text line proxy.
View(const std::shared_ptr< ViewLock > &l, const String &view_id)
Constructor.
void SetPageClass(const StringUTF8 &s)
Sets the class of the page (user defined class such as "title")
SCBlock GetBlock() const
Returns the image.
XML Alto file wrapper to crn::Document.
bool operator<(const PagePath &other) const
bool operator!=(const BlockPath &other) const
void AddStyle(const Id &styleid)
Adds a reference to a style.
void SetPhysicalImageNumber(int pnum)
Sets the number of the page within the document.
bool operator==(const TextLinePath &other) const
BlockPath(const SpacePath &s, const Id &b)
Option< StringUTF8 > GetSubstitutionContent()
Returns the substitution of the word.
SCAlto GetAlto() const
Returns the alto.
SCBlock GetBlock() const
Returns the image.
TextBlock & operator=(const TextBlock &)=default
Space AddTopMargin(const crn::Rect &bbox)
Adds a top margin.
size_t GetNbViews() const
Returns the number of views.
const SpacePath & GetPath() const
Returns the path to the space.
Option< StringUTF8 > GetLanguage() const
Returns the language of the text inside the block.
SCBlock GetBlock() const
Returns the image.
static const String & AltoPathKey()
Space AddLeftMargin(const crn::Rect &bbox)
Adds a left margin.
Word GetWord(const WordPath &p)
Gets a Word by path.
Space GetPrintSpace()
Gets the space proxy on the print space.
SBlock GetBlock()
Returns the image.
Page AddPage(int image_number, int w, int h, Option< AltoPage::Position > pos=Option< AltoPage::Position >())
Adds a page.
SAlto GetAlto()
Returns the alto.
TextLine AddTextLineAfter(const Id &pred, const crn::Rect &bbox)
Adds a text line to the block.
const Alto::Layout::Page::Space::TextBlock::TextLine & GetElement() const
Gets the alto element.
Alto text block.
BlockPath & operator=(const BlockPath &)=default
TextLine(const SBlock &b, Alto::Layout::Page::Space::TextBlock::TextLine &t, const std::shared_ptr< ViewLock > &l, const BlockPath &bp)
std::vector< Alto::Styles::Paragraph > GetParagraphStyles()
Returns the paragraph styles.
void SetPageConfidence(double c)
Sets the confidence of OCR on the page [0, 1].
TextLine AddTextLine(const crn::Rect &bbox)
Adds a text line to the block.
PagePath & operator=(const PagePath &)=default
PagePath(const String &v, const Id &p)
SCBlock GetBlock() const
Returns the image.
SCAlto GetAlto() const
Returns the alto.
Page & operator=(const Page &)=default
void RemoveStyle(const Id &styleid)
Removes a reference to a style.
Option< AltoPage::Position > GetPosition() const
Returns the position of the page.
void ResizeTextBlock(const BlockPath &p, const crn::Rect &r, bool erase_oob)
Changes the size of a text block and all its parents if needed.
SAlto GetAlto()
Gets the toplevel alto file.
TextBlock GetTextBlock(const BlockPath &p)
Gets a TextBlock by path.
CRN_ALIAS_SMART_PTR(ImageBW)
A character string class.
Definition: CRNStringUTF8.h:49
void UnsetFontStyle()
Unsets the font style of the word.
const Id & GetId() const
Returns the id of the element.
SCBlock GetBlock() const
Returns the image.
void SetBBox(const crn::Rect &r)
Sets the coordinates of the word.
void RemoveBlock(const Id &bid)
Removes a block.
std::vector< Id > GetSpaces() const
Returns the ids of the spaces in the page.
A class to store an optional value.
Definition: CRNOption.h:33
virtual ~WordPath() override
static const String & SpaceKey()
static std::unique_ptr< AltoWrapper > NewFromImages(Iter begin_, Iter end_, const crn::Path &documentname, bool throw_exceptions=true)
Creates a wrapper from a list of images.
Internal class used to save modifications at the right time.
Option< double > GetBaseline() const
Returns the ordinate of the baseline.
Alto::Layout::Page::Space & GetElement()
Gets the alto element.
SBlock GetBlock()
Returns the image.
std::vector< Id > GetStyles() const
Returns the list of style references.
std::vector< Id > GetStyles() const
Returns the list of style references.
bool operator<(const TextLinePath &other) const
Option< Alto::Styles::Text::FontStyle > GetFontStyle() const
Returns the font style of the word.
std::vector< Id > GetPages()
Gets the list of page ids.
void SetBaseline(double d, bool check_bounds=true)
Sets the baseline ordinate.
void ResizeSpace(const SpacePath &p, const crn::Rect &r, bool erase_oob)
Changes the size of a space and all its parents if needed.
void SetSubstitution(AltoWord::SubstitutionType stype, const StringUTF8 &scontent)
Sets the substitution of the word.
bool operator==(const PagePath &other) const
Space GetSpace(const Id &spaceid)
Gets a space proxy.
Space & operator=(const Space &)=default
void RemoveStyle(const Id &styleid)
Adds a reference to a style.
void SetFontStyle(Alto::Styles::Text::FontStyle fs)
Sets the font style of the word.
const Rect & GetBBox() const
Gets the coordinates of the text line.
const Alto::Layout::Page::Space::TextBlock::TextLine::Word & GetElement() const
Gets the alto element.
void RemoveTextLine(const Id &tid)
Removes a text line.
static const PagePath & NullPath()
static const SpacePath & NullPath()
TextLine & operator=(const TextLine &)=default
virtual String ToString() const override
std::vector< Alto::Styles::Text > GetTextStyles()
Returns the text styles.
SCBlock GetBlock() const
Returns the image.
virtual String ToString() const
bool operator<(const WordPath &other) const
A rectangle class.
Definition: CRNRect.h:46