libcrn  3.9.5
A document image processing library
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
UtfConverter.cpp
Go to the documentation of this file.
1 /* Copyright 2008-2009 INSA Lyon
2  *
3  * This file is part of libcrn.
4  *
5  * libcrn is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * libcrn is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with libcrn. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * file: UtfConverter.cpp
19  * \author Yann LEYDIER
20  */
21 
22 #include "UtfConverter.h"
23 #include "ConvertUTF.h"
24 #include <CRNException.h>
25 #include <CRNIO/CRNIO.h>
26 #include <CRNi18n.h>
27 
28 namespace UtfConverter
29 {
30 
31  std::basic_string<char32_t> FromUtf8(const std::string& utf8string)
32  {
33 #ifdef CRN_NO_WSTRING
34  return utf8string;
35 #endif
36  if (utf8string.empty())
37  return std::basic_string<char32_t>();
38  size_t widesize = utf8string.length();
39  if (sizeof(char32_t) == 2)
40  {
41  std::basic_string<char32_t> resultstring;
42  resultstring.resize(widesize+1, U'\0');
43  const UTF8* sourcestart = reinterpret_cast<const UTF8*>(utf8string.c_str());
44  const UTF8* sourceend = sourcestart + widesize;
45  UTF16* targetstart = reinterpret_cast<UTF16*>(&resultstring[0]);
46  UTF16* ts = targetstart;
47  UTF16* targetend = targetstart + widesize;
48  ConversionResult res = ConvertUTF8toUTF16(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
49  switch (res)
50  {
51  case conversionOK:
52  break;
53  case sourceExhausted:
54  CRNWarning(U"FromUtf8: source exhausted.");
55  break;
56  case targetExhausted:
57  CRNWarning(U"FromUtf8: target exhausted.");
58  break;
59  case sourceIllegal:
60  CRNWarning(U"FromUtf8: source illegal.");
61  break;
62  }
63  *targetstart = 0;
64  return std::basic_string<char32_t>(resultstring, 0, targetstart - ts);
65  }
66  else if (sizeof(char32_t) == 4)
67  {
68  std::basic_string<char32_t> resultstring;
69  resultstring.resize(widesize+1, U'\0');
70  const UTF8* sourcestart = reinterpret_cast<const UTF8*>(utf8string.c_str());
71  const UTF8* sourceend = sourcestart + widesize;
72  UTF32* targetstart = reinterpret_cast<UTF32*>(&resultstring[0]);
73  UTF32* ts = targetstart;
74  UTF32* targetend = targetstart + widesize;
75  ConversionResult res = ConvertUTF8toUTF32(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
76  switch (res)
77  {
78  case conversionOK:
79  break;
80  case sourceExhausted:
81  CRNWarning(U"FromUtf8: source exhausted.");
82  break;
83  case targetExhausted:
84  CRNWarning(U"FromUtf8: target exhausted.");
85  break;
86  case sourceIllegal:
87  CRNWarning(U"FromUtf8: source illegal.");
88  break;
89  }
90  *targetstart = 0;
91  return std::basic_string<char32_t>(resultstring, 0, targetstart - ts);
92  }
93  else
94  {
95  throw crn::ExceptionRuntime(crn::StringUTF8("FromUtf8: ") + _("UTF not supported."));
96  }
97  return U"";
98  }
99 
100  std::string ToUtf8(const std::basic_string<char32_t>& widestring)
101  {
102 #ifdef CRN_NO_WSTRING
103  return widestring;
104 #endif
105  if (widestring.empty())
106  return std::string();
107 
108  size_t widesize = widestring.length();
109 
110  if (sizeof(char32_t) == 2)
111  {
112  size_t utf8size = 3 * widesize + 1;
113  std::string resultstring;
114  resultstring.resize(utf8size, '\0');
115  const UTF16* sourcestart = reinterpret_cast<const UTF16*>(widestring.c_str());
116  const UTF16* sourceend = sourcestart + widesize;
117  UTF8* targetstart = reinterpret_cast<UTF8*>(&resultstring[0]);
118  UTF8* ts = targetstart;
119  UTF8* targetend = targetstart + utf8size;
120  ConversionResult res = ConvertUTF16toUTF8(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
121  switch (res)
122  {
123  case conversionOK:
124  break;
125  case sourceExhausted:
126  CRNWarning(U"ToUtf8: source exhausted.");
127  break;
128  case targetExhausted:
129  CRNWarning(U"ToUtf8: target exhausted.");
130  break;
131  case sourceIllegal:
132  CRNWarning(U"ToUtf8: source illegal.");
133  break;
134  }
135  *targetstart = 0;
136  return std::string(resultstring, 0, targetstart - ts);
137  }
138  else if (sizeof(char32_t) == 4)
139  {
140  size_t utf8size = 4 * widesize + 1;
141  std::string resultstring;
142  resultstring.resize(utf8size, '\0');
143  const UTF32* sourcestart = reinterpret_cast<const UTF32*>(widestring.c_str());
144  const UTF32* sourceend = sourcestart + widesize;
145  UTF8* targetstart = reinterpret_cast<UTF8*>(&resultstring[0]);
146  UTF8* ts = targetstart;
147  UTF8* targetend = targetstart + utf8size;
148  ConversionResult res = ConvertUTF32toUTF8(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
149  switch (res)
150  {
151  case conversionOK:
152  break;
153  case sourceExhausted:
154  CRNWarning(U"ToUtf8: source exhausted.");
155  break;
156  case targetExhausted:
157  CRNWarning(U"ToUtf8: target exhausted.");
158  break;
159  case sourceIllegal:
160  CRNWarning(U"ToUtf8: source illegal.");
161  break;
162  }
163  *targetstart = 0;
164  return std::string(resultstring, 0, targetstart - ts);
165  }
166  else
167  {
168  throw crn::ExceptionRuntime(crn::StringUTF8("FromUtf8: ") + _("UTF not supported."));
169  }
170  return "";
171  }
172 }
173 
ConversionResult ConvertUTF16toUTF8(const UTF16 **sourceStart, const UTF16 *sourceEnd, UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags)
Definition: ConvertUTF.cpp:214
A generic runtime error.
Definition: CRNException.h:131
#define _(String)
Definition: CRNi18n.h:51
#define CRNWarning(x)
Definition: CRNIO.h:145
unsigned char UTF8
Definition: ConvertUTF.h:92
ConversionResult
Definition: ConvertUTF.h:102
std::string ToUtf8(const std::basic_string< char32_t > &widestring)
std::basic_string< char32_t > FromUtf8(const std::string &utf8string)
ConversionResult ConvertUTF8toUTF32(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags)
Definition: ConvertUTF.cpp:462
ConversionResult ConvertUTF32toUTF8(const UTF32 **sourceStart, const UTF32 *sourceEnd, UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags)
Definition: ConvertUTF.cpp:409
unsigned int UTF32
Definition: ConvertUTF.h:90
ConversionResult ConvertUTF8toUTF16(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags)
Definition: ConvertUTF.cpp:336
unsigned short UTF16
Definition: ConvertUTF.h:91
A character string class.
Definition: CRNStringUTF8.h:49