libcrn  3.9.5
A document image processing library
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CRNAltoTextBlock.hpp
Go to the documentation of this file.
1 /* Copyright 2011-2016 CoReNum
2  *
3  * This file is part of libcrn.
4  *
5  * libcrn is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * libcrn is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with libcrn. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * file: CRNAltoTextBlock.h
19  * \author Yann LEYDIER
20  */
21 
23 #ifdef CRNAlto_HEADER
24 
30 class TextBlock: public Block
31 {
32  public:
33  TextBlock(const TextBlock&) = delete;
34  TextBlock(TextBlock&&) = default;
35  virtual ~TextBlock() override {}
36  TextBlock& operator=(const TextBlock&) = delete;
37  TextBlock& operator=(TextBlock&&) = default;
38 
40  Option<StringUTF8> GetLanguage() const;
41 
46  class TextLine: public Element
47  {
48  public:
49  TextLine(const TextLine&) = delete;
50  TextLine(TextLine&&) = default;
51  virtual ~TextLine() override {}
52  TextLine& operator=(const TextLine&) = delete;
53  TextLine& operator=(TextLine&&) = default;
54 
56  const Id& GetId() const { return id; }
57 
59  std::vector<Id> GetStyles() const;
61  void AddStyle(const Id &styleid);
63  void RemoveStyle(const Id &styleid);
64 
66  double GetWidth() const;
68  void SetWidth(double d);
70  double GetHeight() const;
72  void SetHeight(double d);
74  double GetHPos() const;
76  void SetHPos(double d);
78  double GetVPos() const;
80  void SetVPos(double d);
82  Option<double> GetBaseline() const;
84  void SetBaseline(double d, bool check_bounds = true);
86  void UnsetBaseline();
88  Option<bool> GetManuallyCorrected() const;
90  void SetManuallyCorrected(bool c);
93 
98  class LineElement: public Element
99  {
100  public:
101  LineElement(const LineElement&) = delete;
102  LineElement(LineElement&&) = default;
103  virtual ~LineElement() override {}
104  LineElement& operator=(const LineElement&) = delete;
105  LineElement& operator=(LineElement&&) = default;
106 
108  Option<double> GetWidth() const;
110  void SetWidth(double d);
112  Option<double> GetHPos() const;
114  void SetHPos(double d);
116  Option<double> GetVPos() const;
118  void SetVPos(double d);
119 
120  protected:
122  LineElement(const Element &el);
124  LineElement(const Element &el, const Option<double> &x, const Option<double> &y, const Option<double> &w);
125  };
126  typedef std::weak_ptr<LineElement> LineElementPtr;
127 
129  std::vector<LineElementPtr> GetLineElements() const;
131  size_t GetNbLineElements() const { return lineElements.size(); }
132 
137  class Word: public LineElement
138  {
139  public:
140  Word(const Word&) = delete;
141  Word(Word&&) = default;
142  virtual ~Word() override {}
143  Word& operator=(const Word&) = delete;
144  Word& operator=(Word&&) = default;
145 
147  const Option<Id>& GetId() const { return id; }
148 
150  std::vector<Id> GetStyles() const;
152  void AddStyle(const Id &styleid);
154  void RemoveStyle(const Id &styleid);
155 
157  StringUTF8 GetContent() const;
159  void SetContent(const StringUTF8 &s);
161  Option<double> GetHeight() const;
163  void SetHeight(double d);
165  Option<Alto::Styles::Text::FontStyle> GetFontStyle() const;
167  void SetFontStyle(Alto::Styles::Text::FontStyle fs);
169  void UnsetFontStyle();
172  Option<SubstitutionType> GetSubstitutionType() const;
174  Option<StringUTF8> GetSubstitutionContent() const;
176  void SetSubstitution(SubstitutionType stype, const StringUTF8 &scontent);
178  Option<double> GetWC() const;
180  void SetWC(double conf);
182  Option<StringUTF8> GetCC() const;
184  void UnsetWC();
185  // TODO alternatives
186 
187  private:
189  Word(const Element &el);
191  Word(const Element &el, const Id &id_, const StringUTF8 &text, const Option<double> &x, const Option<double> &y, const Option<double> &w, const Option<double> &h);
192 
193  Option<Id> id;
194  friend class TextLine;
195  };
196  typedef std::weak_ptr<Word> WordPtr;
197 
199  const std::vector<WordPtr>& GetWords() const;
201  size_t GetNbWords() const { return words.size(); }
203  Word& GetWord(const Id &id_);
205  Word& AddWord(const Id &id_, const StringUTF8 &text, const Option<double> &x = Option<double>(), const Option<double> &y = Option<double>(), const Option<double> &w = Option<double>(), const Option<double> &h = Option<double>());
207  Word& AddWordAfter(const Id &pred, const Id &id_, const StringUTF8 &text, const Option<double> &x = Option<double>(), const Option<double> &y = Option<double>(), const Option<double> &w = Option<double>(), const Option<double> &h = Option<double>());
209  Word& AddWordBefore(const Id &next, const Id &id_, const StringUTF8 &text, const Option<double> &x = Option<double>(), const Option<double> &y = Option<double>(), const Option<double> &w = Option<double>(), const Option<double> &h = Option<double>());
210 
212  void RemoveWord(const Id &wid);
213 
218  class WhiteSpace: public LineElement
219  {
220  public:
221  WhiteSpace(const WhiteSpace&) = delete;
222  WhiteSpace(WhiteSpace&&) = default;
223  virtual ~WhiteSpace() override {}
224  WhiteSpace& operator=(const WhiteSpace&) = delete;
225  WhiteSpace& operator=(WhiteSpace&&) = default;
226 
228  const Option<Id>& GetId() const { return id; }
229 
230  private:
232  WhiteSpace(const Element &el);
233 
234  Option<Id> id;
235 
236  friend class TextLine;
237  };
238  typedef std::weak_ptr<WhiteSpace> WhiteSpacePtr;
239 
244  class Hyphen: public LineElement
245  {
246  public:
247  Hyphen(const Hyphen&) = delete;
248  Hyphen(Hyphen&&) = default;
249  virtual ~Hyphen() override {}
250  Hyphen& operator=(const Hyphen&) = delete;
251  Hyphen& operator=(Hyphen&&) = default;
252 
254  StringUTF8 GetContent() const;
256  void SetContent(const StringUTF8 &s);
257 
258  private:
260  Hyphen(const Element &el);
261 
262  friend class TextLine;
263  };
264  typedef std::weak_ptr<Hyphen> HyphenPtr;
265 
266  private:
268  TextLine(const Element &el);
270  TextLine(const Element &el, const Id &id_, double x, double y, double w, double h);
272  void update_subelements();
273 
274  Id id;
275  mutable std::vector<std::shared_ptr<LineElement> > lineElements;
276  mutable std::vector<WordPtr> words;
277  mutable std::map<Id, WordPtr> id_words;
278 
279  friend class TextBlock;
280  };
281  typedef std::weak_ptr<TextLine> TextLinePtr;
282 
284  std::vector<TextLinePtr> GetTextLines() const;
286  size_t GetNbTextLines() const { return lines.size(); }
288  TextLine& GetTextLine(const Id &id_);
290  TextLine& AddTextLine(const Id &id_, double x, double y, double w, double h);
292  TextLine& AddTextLineAfter(const Id &pred, const Id &id_, double x, double y, double w, double h);
294  TextLine& AddTextLineBefore(const Id &next, const Id &id_, double x, double y, double w, double h);
295 
297  void RemoveTextLine(const Id &tid);
298 
299  private:
301  TextBlock(const Element &el);
303  TextBlock(const Element &el, const Id &id_, int x, int y, int w, int h);
305  void update_subelements();
306 
307  std::vector<std::shared_ptr<TextLine> > lines;
308  std::map<Id, TextLinePtr> id_lines;
309 
310  friend class Space;
311 };
312 #else
313 #error you cannot include this file directly
314 #endif
315 
TextLine & GetTextLine(const Id &id_)
Returns a text line contained in the block.
const std::vector< WordPtr > & GetWords() const
Returns the list of words in the line.
Base class for elements in a text line.
Option< double > GetVPos() const
Returns the ordinate of the element.
virtual ~TextBlock() override
A print space on a page.
const Id & GetId() const
Returns the id of the element.
TextLine & AddTextLine(const Id &id_, double x, double y, double w, double h)
Adds a text line in the block.
void AddStyle(const Id &styleid)
Adds a reference to a style.
StringUTF8 GetContent() const
Returns the transcription of the word.
Hyphen & operator=(const Hyphen &)=delete
double GetHPos() const
Returns the abscissa of the line.
crn::StringUTF8 Id
Definition: CRNAltoUtils.h:31
Option< Alto::Styles::Text::FontStyle > GetFontStyle() const
Returns the font style of the word.
Option< StringUTF8 > GetCC() const
Returns the OCR confidence of the characters.
Option< double > GetHeight() const
Returns the height of the word.
void SetWidth(double d)
Sets the width of the element.
Word & operator=(const Word &)=delete
void UnsetBaseline()
Unsets the baseline ordinate.
double GetHeight() const
Returns the height of the line.
void RemoveStyle(const Id &styleid)
Removes a reference to a style.
std::weak_ptr< WhiteSpace > WhiteSpacePtr
TextLine & AddTextLineAfter(const Id &pred, const Id &id_, double x, double y, double w, double h)
Adds a text line in the block.
WhiteSpace(const WhiteSpace &)=delete
void SetHeight(double d)
Sets the height of the line.
Word & AddWord(const Id &id_, const StringUTF8 &text, const Option< double > &x=Option< double >(), const Option< double > &y=Option< double >(), const Option< double > &w=Option< double >(), const Option< double > &h=Option< double >())
Adds a word in the line.
TextLine & operator=(const TextLine &)=delete
void SetHPos(double d)
Sets the abscissa of the line.
void SetWidth(double d)
Sets the width of the line.
std::weak_ptr< Word > WordPtr
Word & AddWordBefore(const Id &next, const Id &id_, const StringUTF8 &text, const Option< double > &x=Option< double >(), const Option< double > &y=Option< double >(), const Option< double > &w=Option< double >(), const Option< double > &h=Option< double >())
Adds a word in the line.
void SetManuallyCorrected(bool c)
Sets whether the line was manually corrected or not.
Option< double > GetWC() const
Returns the OCR confidence of the word [0, 1].
std::weak_ptr< Hyphen > HyphenPtr
std::vector< Id > GetStyles() const
Returns the list of style references.
Option< bool > GetManuallyCorrected() const
Returns whether the line was manually corrected or not.
std::vector< Id > GetStyles() const
Returns the list of style references.
void SetWC(double conf)
Sets the OCR confidence of the word [0, 1].
Alto text line.
void RemoveStyle(const Id &styleid)
Removes a reference to a style.
void RemoveTextLine(const Id &tid)
Removes a text line.
Word(const Word &)=delete
TextLine(const TextLine &)=delete
Alto white space ("SP" element)
StringUTF8 GetContent() const
Returns hyphenation marker.
void UnsetFontStyle()
Unsets the font style of the word.
Option< double > GetWidth() const
Returns the width of the element.
TextBlock(const TextBlock &)=delete
TextBlock & operator=(const TextBlock &)=delete
Alto word ("String" element)
void SetHeight(double d)
Returns the height of the word.
const Option< Id > & GetId() const
Returns the id of the element.
size_t GetNbLineElements() const
Returns the number of elements in the line.
Option< double > GetHPos() const
Returns the abscissa of the element.
double GetVPos() const
Returns the ordinate of the line.
void SetHPos(double d)
Sets the abscissa of the element.
void SetContent(const StringUTF8 &s)
Sets the hyphenation marker.
Hyphen(const Hyphen &)=delete
void SetVPos(double d)
Sets the ordinate of the element.
LineElement(const LineElement &)=delete
void SetFontStyle(Alto::Styles::Text::FontStyle fs)
Sets the font style of the word.
const Option< Id > & GetId() const
Returns the id of the element.
Word & GetWord(const Id &id_)
Returns a word contained in the line.
Option< SubstitutionType > GetSubstitutionType() const
Returns the substitution type of the word.
TextLine & AddTextLineBefore(const Id &next, const Id &id_, double x, double y, double w, double h)
Adds a text line in the block.
size_t GetNbTextLines() const
Returns the number of text lines in the block.
std::weak_ptr< TextLine > TextLinePtr
void SetVPos(double d)
Sets the ordinate of the line.
Option< StringUTF8 > GetLanguage() const
Returns the language of the text inside the block.
WhiteSpace & operator=(const WhiteSpace &)=delete
std::vector< LineElementPtr > GetLineElements() const
Returns the list of elements in the line.
Option< StringUTF8 > GetSubstitutionContent() const
Returns the substitution of the word.
Alto text block.
std::vector< TextLinePtr > GetTextLines() const
Returns the list of text lines in the block.
Alto hyphen ("HYP" element)
void SetContent(const StringUTF8 &s)
Sets the transcription of the word.
size_t GetNbWords() const
Returns the number of words in the line.
Word & AddWordAfter(const Id &pred, const Id &id_, const StringUTF8 &text, const Option< double > &x=Option< double >(), const Option< double > &y=Option< double >(), const Option< double > &w=Option< double >(), const Option< double > &h=Option< double >())
Adds a word in the line.
virtual ~TextLine() override
double GetWidth() const
Returns the width of the line.
void UnsetManuallyCorrected()
Unsets whether the line was manually corrected or not.
Option< double > GetBaseline() const
Returns the ordinate of the baseline.
void UnsetWC()
Unsets the OCR confidence of the word.
void SetBaseline(double d, bool check_bounds=true)
Sets the baseline ordinate.
void SetSubstitution(SubstitutionType stype, const StringUTF8 &scontent)
Sets the substitution of the word.
std::weak_ptr< LineElement > LineElementPtr
void RemoveWord(const Id &wid)
Removes a word.
LineElement & operator=(const LineElement &)=delete
void AddStyle(const Id &styleid)
Adds a reference to a style.