libcrn  3.9.5
A document image processing library
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CRNTextFile.h
Go to the documentation of this file.
1 /* Copyright 2006-2016 Yann LEYDIER, CoReNum, INSA-Lyon, ENS-Lyon
2  *
3  * This file is part of libcrn.
4  *
5  * libcrn is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * libcrn is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with libcrn. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * file: CRNTextFile.h
19  * \author Yann LEYDIER
20  */
21 
22 #ifndef CRNTEXTFILE_HEADER
23 #define CRNTEXTFILE_HEADER
24 
25 #include <CRNString.h>
26 #include <CRNIO/CRNPath.h>
27 #include <vector>
28 #include <set>
29 #include <map>
30 
31 namespace crn
32 {
33  /****************************************************************************/
43  class TextFile
44  {
45  public:
47  enum class Encoding { UTF_8, UTF_32 };
48 
52  TextFile(const Path &fname, Encoding enc = Encoding::UTF_8);
53  TextFile(const TextFile&) = delete;
54  TextFile(TextFile&&) = default;
56  ~TextFile() = default;
57 
58  TextFile& operator=(const TextFile&) = delete;
59  TextFile& operator=(TextFile&&) = default;
60 
62  void Save(const Path &fname);
64  void Save(const Path &fname, Encoding enc);
66  void Save();
67 
69  const String& GetText() const noexcept {return text;};
70 
72  std::vector<String> SplitText() const { return text.Split(U" \r\n\t,.!?:;&\"\'({[|`_\\^@)]}=+^$£%*/<>-"); }
74  std::set<String> ExtractWords(bool case_sensitive = false) const;
76  std::map<int, String> ExtractWordsByFrequency(bool case_sensitive = false) const;
77 
80  private:
81  Path filename;
82  Encoding original_encoding;
83  };
84 
86 }
87 
88 
89 #endif
const String & GetText() const noexcept
Get text content.
Definition: CRNTextFile.h:69
A text file.
Definition: CRNTextFile.h:43
std::vector< String > Split(const String &sep) const
Splits the string in multiple strings delimited by a set of separators.
Definition: CRNString.cpp:772
Encoding
Allowed text encoding.
Definition: CRNTextFile.h:47
void Save()
Overwrites the loaded file.
A UTF32 character string class.
Definition: CRNString.h:61
TextFile(Encoding enc=Encoding::UTF_8)
Blank constructor.
Definition: CRNTextFile.cpp:35
TextFile & operator=(const TextFile &)=delete
~TextFile()=default
Destructor.
A convenience class for file paths.
Definition: CRNPath.h:39
std::vector< String > SplitText() const
Extract words.
Definition: CRNTextFile.h:72
std::set< String > ExtractWords(bool case_sensitive=false) const
Extract unique words.
CRN_ALIAS_SMART_PTR(ImageBW)
std::map< int, String > ExtractWordsByFrequency(bool case_sensitive=false) const
Extract unique words sorted by frequency.