libcrn  3.9.5
A document image processing library
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CRNCharsetConverter.cpp
Go to the documentation of this file.
1 /* Copyright 2012-2014 CoReNum
2  *
3  * This file is part of libcrn.
4  *
5  * libcrn is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * libcrn is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with libcrn. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * file: CRNCharsetConverter.cpp
19  * \author Yann LEYDIER
20  */
21 
23 #include <errno.h>
24 #include <CRNStringUTF8.h>
25 #include <CRNi18n.h>
26 
27 using namespace crn;
28 
35 CharsetConverter::CharsetConverter(const std::string &to_code, bool translit, bool throw_exceptions):
36  silent(true),
37  throws(throw_exceptions)
38 {
39  Reset(to_code, translit);
40 }
41 
44 {
45  if (!silent)
46  {
47  iconv_close(fromutf);
48  iconv_close(toutf);
49  }
50 }
51 
57 void CharsetConverter::Reset(const std::string &to_code, bool translit)
58 {
59  if (!silent)
60  {
61  iconv_close(fromutf);
62  iconv_close(toutf);
63  }
64  if (to_code.empty())
65  throw crn::ExceptionInvalidArgument(_("Null charset"));
66  if (to_code == "utf-8")
67  {
68  silent = true;
69  current_code = to_code;
70  return;
71  }
72  else
73  {
74  silent = false;
75  }
76  std::string to(to_code), from("utf-8");
77  if (translit)
78  to += "//TRANSLIT";
79  fromutf = iconv_open(to.c_str(), from.c_str());
80  if (fromutf == (iconv_t)(-1))
81  throw crn::ExceptionInvalidArgument(_("Cannot convert from ") + to_code);
82  to = "utf-8";
83  from = to_code;
84  if (translit)
85  to += "//TRANSLIT";
86  toutf = iconv_open(to.c_str(), from.c_str());
87  if (toutf == (iconv_t)(-1))
88  {
89  iconv_close(fromutf);
90  throw crn::ExceptionInvalidArgument(_("Cannot convert to ") + to_code);
91  }
92  current_code = to_code;
93 }
94 
102 std::string CharsetConverter::FromUTF8(const StringUTF8 &str, Status *stat) const
103 {
104  if (silent)
105  return str.Std();
106  if (str.IsEmpty())
107  return std::string();
108  return fromUTF8(str, str.Size() * 2, stat);
109 }
110 
119 std::string CharsetConverter::fromUTF8(const StringUTF8 &str, size_t buff, Status *stat) const
120 {
121  if (buff == 0)
122  return std::string();
123  const char *in(&str[0]);
124  size_t ins = str.Size();
125  if (ins == 0)
126  return std::string();
127  size_t outs = buff;
128  std::vector<char> ret(outs, '\0');
129  char *out = &ret.front();
130 #ifdef _MSC_VER
131  size_t ans = iconv(fromutf, (const char**)&in, &ins, &out, &outs);
132 #else
133  size_t ans = iconv(fromutf, (char**)&in, &ins, &out, &outs);
134 #endif
135  if (outs == 0)
136  return fromUTF8(str, buff * 2, stat);
137  if (ans == (size_t)(-1))
138  {
139  switch (errno)
140  {
141  case E2BIG:
142  return fromUTF8(str, buff * 2, stat);
143  case EILSEQ:
144  if (throws)
145  throw ExceptionInvalidCharacter(_("Invalid character."));
146  if (stat)
147  *stat = Status::INVALID; // modify after exception throw to preserve state
148  break;
149  case EINVAL:
150  if (throws)
151  throw ExceptionIncompleteCode(_("Incomplete multibyte character."));
152  if (stat)
153  *stat = Status::INCOMPLETE;
154  break;
155  }
156  }
157  return std::string(&ret.front());
158 }
159 
167 StringUTF8 CharsetConverter::ToUTF8(const std::string &str, Status *stat) const
168 {
169  if (silent)
170  return str;
171  if (str.empty())
172  return StringUTF8();
173  return toUTF8(str, str.size() * 2, stat);
174 }
175 
184 StringUTF8 CharsetConverter::toUTF8(const std::string &str, size_t buff, Status *stat) const
185 {
186  if (buff == 0)
187  return StringUTF8();
188  const char *in(str.c_str());
189  size_t ins = str.size();
190  if (ins == 0)
191  return StringUTF8();
192  size_t outs = buff;
193  std::vector<char> ret(outs, '\0');
194  char *out = &ret.front();
195 #ifdef _MSC_VER
196  size_t ans = iconv(toutf, (const char**)&in, &ins, &out, &outs);
197 #else
198  size_t ans = iconv(toutf, (char**)&in, &ins, &out, &outs);
199 #endif
200  if (outs == 0)
201  return toUTF8(str, buff * 2, stat);
202  if (ans == (size_t)(-1))
203  {
204  switch (errno)
205  {
206  case E2BIG:
207  return toUTF8(str, buff * 2, stat);
208  case EILSEQ:
209  if (throws)
210  throw ExceptionInvalidCharacter(_("Invalid character in: ") + str);
211  if (stat)
212  *stat = Status::INVALID;
213  break;
214  case EINVAL:
215  if (throws)
216  throw ExceptionIncompleteCode(_("Incomplete multibyte character in: ") + str);
217  if (stat)
218  *stat = Status::INCOMPLETE;
219  break;
220  }
221  }
222  return StringUTF8(&ret.front());
223 }
224 
231 
238 
245 
246 
std::string FromUTF8(const crn::StringUTF8 &str, Status *stat=nullptr) const
Converts from unicode to the selected charset.
#define _(String)
Definition: CRNi18n.h:51
void Reset(const std::string &to_code, bool translit=true)
Changes the charset to convert.
bool IsEmpty() const noexcept
Checks if the string is empty.
CharsetConverter(const std::string &to_code, bool translit=true, bool throw_exceptions=true)
Constructor.
#define true
Definition: ConvertUTF.cpp:57
ExceptionInvalidCharacter() noexcept
Default constructor.
crn::StringUTF8 ToUTF8(const std::string &str, Status *stat=nullptr) const
Converts to unicode.
std::string & Std()&noexcept
Conversion to std string.
size_t Size() const noexcept
Returns the number of bytes in the string.
A character string class.
Definition: CRNStringUTF8.h:49
Base class for exceptions.
Definition: CRNException.h:39
Exception() noexcept
Default constructor.
Invalid argument error (e.g.: nullptr pointer)
Definition: CRNException.h:107