1 /*
2 * Copyright Andrey Semashev 2007 - 2015.
3 * Distributed under the Boost Software License, Version 1.0.
4 * (See accompanying file LICENSE_1_0.txt or copy at
5 * http://www.boost.org/LICENSE_1_0.txt)
6 */
7 /*!
8 * \file code_conversion.cpp
9 * \author Andrey Semashev
10 * \date 08.11.2008
11 *
12 * \brief This header is the Boost.Log library implementation, see the library documentation
13 * at http://www.boost.org/doc/libs/release/libs/log/doc/html/index.html.
14 */
15
16 #include <boost/log/detail/config.hpp>
17 #include <cstddef>
18 #include <locale>
19 #include <string>
20 #include <stdexcept>
21 #include <algorithm>
22 #include <boost/log/exceptions.hpp>
23 #include <boost/log/detail/code_conversion.hpp>
24 #if defined(BOOST_WINDOWS)
25 #include <cstring>
26 #include <limits>
27 #include <boost/winapi/get_last_error.hpp>
28 #include <boost/winapi/character_code_conversion.hpp>
29 #endif
30 #include <boost/log/detail/header.hpp>
31
32 namespace boost {
33
34 BOOST_LOG_OPEN_NAMESPACE
35
36 namespace aux {
37
38 BOOST_LOG_ANONYMOUS_NAMESPACE {
39
40 //! The function performs character conversion with the specified facet
41 template< typename LocalCharT >
42 inline std::codecvt_base::result convert(
43 std::codecvt< LocalCharT, char, std::mbstate_t > const& fac,
44 std::mbstate_t& state,
45 const char*& pSrcBegin,
46 const char* pSrcEnd,
47 LocalCharT*& pDstBegin,
48 LocalCharT* pDstEnd)
49 {
50 return fac.in(state, pSrcBegin, pSrcEnd, pSrcBegin, pDstBegin, pDstEnd, pDstBegin);
51 }
52
53 //! The function performs character conversion with the specified facet
54 template< typename LocalCharT >
55 inline std::codecvt_base::result convert(
56 std::codecvt< LocalCharT, char, std::mbstate_t > const& fac,
57 std::mbstate_t& state,
58 const LocalCharT*& pSrcBegin,
59 const LocalCharT* pSrcEnd,
60 char*& pDstBegin,
61 char* pDstEnd)
62 {
63 return fac.out(state, pSrcBegin, pSrcEnd, pSrcBegin, pDstBegin, pDstEnd, pDstBegin);
64 }
65
66 } // namespace
67
68 template< typename SourceCharT, typename TargetCharT, typename FacetT >
69 inline std::size_t code_convert(const SourceCharT* begin, const SourceCharT* end, std::basic_string< TargetCharT >& converted, std::size_t max_size, FacetT const& fac)
70 {
71 typedef typename FacetT::state_type state_type;
72 TargetCharT converted_buffer[256];
73
74 const SourceCharT* const original_begin = begin;
75 state_type state = state_type();
76 std::size_t buf_size = (std::min)(max_size, sizeof(converted_buffer) / sizeof(*converted_buffer));
77 while (begin != end && buf_size > 0u)
78 {
79 TargetCharT* dest = converted_buffer;
80 std::codecvt_base::result res = convert(
81 fac,
82 state,
83 begin,
84 end,
85 dest,
86 dest + buf_size);
87
88 switch (res)
89 {
90 case std::codecvt_base::ok:
91 // All characters were successfully converted
92 // NOTE: MSVC 11 also returns ok when the source buffer was only partially consumed, so we also check that the begin pointer has reached the end.
93 converted.append(converted_buffer, dest);
94 max_size -= dest - converted_buffer;
95 break;
96
97 case std::codecvt_base::noconv:
98 {
99 // Not possible, unless both character types are actually equivalent
100 const std::size_t size = (std::min)(max_size, static_cast< std::size_t >(end - begin));
101 converted.append(begin, begin + size);
102 begin += size;
103 max_size -= size;
104 }
105 goto done;
106
107 case std::codecvt_base::partial:
108 // Some characters were converted, some were not
109 if (dest != converted_buffer)
110 {
111 // Some conversion took place, so it seems like
112 // the destination buffer might not have been long enough
113 converted.append(converted_buffer, dest);
114 max_size -= dest - converted_buffer;
115
116 // ...and go on for the next part
117 break;
118 }
119 else
120 {
121 // Nothing was converted
122 if (begin == end)
123 goto done;
124
125 // Looks like the tail of the source buffer contains only part of the last character.
126 // In this case we intentionally fall through to throw an exception.
127 }
128 BOOST_FALLTHROUGH;
129
130 default: // std::codecvt_base::error
131 BOOST_LOG_THROW_DESCR(conversion_error, "Could not convert character encoding");
132 }
133
134 buf_size = (std::min)(max_size, sizeof(converted_buffer) / sizeof(*converted_buffer));
135 }
136
137 done:
138 return static_cast< std::size_t >(begin - original_begin);
139 }
140
141 //! The function converts one string to the character type of another
code_convert_impl(const wchar_t * str1,std::size_t len,std::string & str2,std::size_t max_size,std::locale const & loc)142 BOOST_LOG_API bool code_convert_impl(const wchar_t* str1, std::size_t len, std::string& str2, std::size_t max_size, std::locale const& loc)
143 {
144 return code_convert(str1, str1 + len, str2, max_size, std::use_facet< std::codecvt< wchar_t, char, std::mbstate_t > >(loc)) == len;
145 }
146
147 //! The function converts one string to the character type of another
code_convert_impl(const char * str1,std::size_t len,std::wstring & str2,std::size_t max_size,std::locale const & loc)148 BOOST_LOG_API bool code_convert_impl(const char* str1, std::size_t len, std::wstring& str2, std::size_t max_size, std::locale const& loc)
149 {
150 return code_convert(str1, str1 + len, str2, max_size, std::use_facet< std::codecvt< wchar_t, char, std::mbstate_t > >(loc)) == len;
151 }
152
153 #if !defined(BOOST_LOG_NO_CXX11_CODECVT_FACETS)
154
155 #if !defined(BOOST_NO_CXX11_CHAR16_T)
156
157 //! The function converts one string to the character type of another
code_convert_impl(const char16_t * str1,std::size_t len,std::string & str2,std::size_t max_size,std::locale const & loc)158 BOOST_LOG_API bool code_convert_impl(const char16_t* str1, std::size_t len, std::string& str2, std::size_t max_size, std::locale const& loc)
159 {
160 return code_convert(str1, str1 + len, str2, max_size, std::use_facet< std::codecvt< char16_t, char, std::mbstate_t > >(loc)) == len;
161 }
162
163 //! The function converts one string to the character type of another
code_convert_impl(const char * str1,std::size_t len,std::u16string & str2,std::size_t max_size,std::locale const & loc)164 BOOST_LOG_API bool code_convert_impl(const char* str1, std::size_t len, std::u16string& str2, std::size_t max_size, std::locale const& loc)
165 {
166 return code_convert(str1, str1 + len, str2, max_size, std::use_facet< std::codecvt< char16_t, char, std::mbstate_t > >(loc)) == len;
167 }
168
169 //! The function converts one string to the character type of another
code_convert_impl(const char16_t * str1,std::size_t len,std::wstring & str2,std::size_t max_size,std::locale const & loc)170 BOOST_LOG_API bool code_convert_impl(const char16_t* str1, std::size_t len, std::wstring& str2, std::size_t max_size, std::locale const& loc)
171 {
172 std::string temp_str;
173 code_convert(str1, str1 + len, temp_str, temp_str.max_size(), std::use_facet< std::codecvt< char16_t, char, std::mbstate_t > >(loc));
174 const std::size_t temp_size = temp_str.size();
175 return code_convert(temp_str.c_str(), temp_str.c_str() + temp_size, str2, max_size, std::use_facet< std::codecvt< wchar_t, char, std::mbstate_t > >(loc)) == temp_size;
176 }
177
178 #endif
179
180 #if !defined(BOOST_NO_CXX11_CHAR32_T)
181
182 //! The function converts one string to the character type of another
code_convert_impl(const char32_t * str1,std::size_t len,std::string & str2,std::size_t max_size,std::locale const & loc)183 BOOST_LOG_API bool code_convert_impl(const char32_t* str1, std::size_t len, std::string& str2, std::size_t max_size, std::locale const& loc)
184 {
185 return code_convert(str1, str1 + len, str2, max_size, std::use_facet< std::codecvt< char32_t, char, std::mbstate_t > >(loc)) == len;
186 }
187
188 //! The function converts one string to the character type of another
code_convert_impl(const char * str1,std::size_t len,std::u32string & str2,std::size_t max_size,std::locale const & loc)189 BOOST_LOG_API bool code_convert_impl(const char* str1, std::size_t len, std::u32string& str2, std::size_t max_size, std::locale const& loc)
190 {
191 return code_convert(str1, str1 + len, str2, max_size, std::use_facet< std::codecvt< char32_t, char, std::mbstate_t > >(loc)) == len;
192 }
193
194 //! The function converts one string to the character type of another
code_convert_impl(const char32_t * str1,std::size_t len,std::wstring & str2,std::size_t max_size,std::locale const & loc)195 BOOST_LOG_API bool code_convert_impl(const char32_t* str1, std::size_t len, std::wstring& str2, std::size_t max_size, std::locale const& loc)
196 {
197 std::string temp_str;
198 code_convert(str1, str1 + len, temp_str, temp_str.max_size(), std::use_facet< std::codecvt< char32_t, char, std::mbstate_t > >(loc));
199 const std::size_t temp_size = temp_str.size();
200 return code_convert(temp_str.c_str(), temp_str.c_str() + temp_size, str2, max_size, std::use_facet< std::codecvt< wchar_t, char, std::mbstate_t > >(loc)) == temp_size;
201 }
202
203 #endif
204
205 #if !defined(BOOST_NO_CXX11_CHAR16_T) && !defined(BOOST_NO_CXX11_CHAR32_T)
206
207 //! The function converts one string to the character type of another
code_convert_impl(const char16_t * str1,std::size_t len,std::u32string & str2,std::size_t max_size,std::locale const & loc)208 BOOST_LOG_API bool code_convert_impl(const char16_t* str1, std::size_t len, std::u32string& str2, std::size_t max_size, std::locale const& loc)
209 {
210 std::string temp_str;
211 code_convert(str1, str1 + len, temp_str, temp_str.max_size(), std::use_facet< std::codecvt< char16_t, char, std::mbstate_t > >(loc));
212 const std::size_t temp_size = temp_str.size();
213 return code_convert(temp_str.c_str(), temp_str.c_str() + temp_size, str2, max_size, std::use_facet< std::codecvt< char32_t, char, std::mbstate_t > >(loc)) == temp_size;
214 }
215
216 //! The function converts one string to the character type of another
code_convert_impl(const char32_t * str1,std::size_t len,std::u16string & str2,std::size_t max_size,std::locale const & loc)217 BOOST_LOG_API bool code_convert_impl(const char32_t* str1, std::size_t len, std::u16string& str2, std::size_t max_size, std::locale const& loc)
218 {
219 std::string temp_str;
220 code_convert(str1, str1 + len, temp_str, temp_str.max_size(), std::use_facet< std::codecvt< char32_t, char, std::mbstate_t > >(loc));
221 const std::size_t temp_size = temp_str.size();
222 return code_convert(temp_str.c_str(), temp_str.c_str() + temp_size, str2, max_size, std::use_facet< std::codecvt< char16_t, char, std::mbstate_t > >(loc)) == temp_size;
223 }
224
225 #endif
226
227 #endif // !defined(BOOST_LOG_NO_CXX11_CODECVT_FACETS)
228
229 #if defined(BOOST_WINDOWS)
230
231 //! Converts UTF-8 to UTF-16
utf8_to_utf16(const char * str)232 std::wstring utf8_to_utf16(const char* str)
233 {
234 std::size_t utf8_len = std::strlen(str);
235 if (utf8_len == 0)
236 return std::wstring();
237 else if (BOOST_UNLIKELY(utf8_len > static_cast< std::size_t >((std::numeric_limits< int >::max)())))
238 BOOST_LOG_THROW_DESCR(bad_alloc, "UTF-8 string too long");
239
240 int len = boost::winapi::MultiByteToWideChar(boost::winapi::CP_UTF8_, boost::winapi::MB_ERR_INVALID_CHARS_, str, static_cast< int >(utf8_len), NULL, 0);
241 if (BOOST_LIKELY(len > 0))
242 {
243 std::wstring wstr;
244 wstr.resize(len);
245
246 len = boost::winapi::MultiByteToWideChar(boost::winapi::CP_UTF8_, boost::winapi::MB_ERR_INVALID_CHARS_, str, static_cast< int >(utf8_len), &wstr[0], len);
247 if (BOOST_LIKELY(len > 0))
248 {
249 return wstr;
250 }
251 }
252
253 const boost::winapi::DWORD_ err = boost::winapi::GetLastError();
254 BOOST_LOG_THROW_DESCR_PARAMS(system_error, "Failed to convert UTF-8 to UTF-16", (err));
255 BOOST_LOG_UNREACHABLE_RETURN(std::wstring());
256 }
257
258 //! Converts UTF-16 to UTF-8
utf16_to_utf8(const wchar_t * wstr)259 std::string utf16_to_utf8(const wchar_t* wstr)
260 {
261 std::size_t utf16_len = std::wcslen(wstr);
262 if (utf16_len == 0)
263 return std::string();
264 else if (BOOST_UNLIKELY(utf16_len > static_cast< std::size_t >((std::numeric_limits< int >::max)())))
265 BOOST_LOG_THROW_DESCR(bad_alloc, "UTF-16 string too long");
266
267 const boost::winapi::DWORD_ flags =
268 #if BOOST_USE_WINAPI_VERSION >= BOOST_WINAPI_VERSION_WIN6
269 boost::winapi::WC_ERR_INVALID_CHARS_;
270 #else
271 0u;
272 #endif
273 int len = boost::winapi::WideCharToMultiByte(boost::winapi::CP_UTF8_, flags, wstr, static_cast< int >(utf16_len), NULL, 0, NULL, NULL);
274 if (BOOST_LIKELY(len > 0))
275 {
276 std::string str;
277 str.resize(len);
278
279 len = boost::winapi::WideCharToMultiByte(boost::winapi::CP_UTF8_, flags, wstr, static_cast< int >(utf16_len), &str[0], len, NULL, NULL);
280 if (BOOST_LIKELY(len > 0))
281 {
282 return str;
283 }
284 }
285
286 const boost::winapi::DWORD_ err = boost::winapi::GetLastError();
287 BOOST_LOG_THROW_DESCR_PARAMS(system_error, "Failed to convert UTF-16 to UTF-8", (err));
288 BOOST_LOG_UNREACHABLE_RETURN(std::string());
289 }
290
291 #endif // defined(BOOST_WINDOWS)
292
293 } // namespace aux
294
295 BOOST_LOG_CLOSE_NAMESPACE // namespace log
296
297 } // namespace boost
298
299 #include <boost/log/detail/footer.hpp>
300