1 // Copyright Vladimir Prus 2004. 2 // Distributed under the Boost Software License, Version 1.0. 3 // (See accompanying file LICENSE_1_0.txt 4 // or copy at http://www.boost.org/LICENSE_1_0.txt) 5 6 #include <fstream> 7 #include <locale.h> 8 #include <locale> 9 #include <iostream> 10 #include <string> 11 #include <locale> 12 #include <stdexcept> 13 14 #include <boost/config.hpp> 15 16 #define BOOST_PROGRAM_OPTIONS_SOURCE 17 #include <boost/program_options/config.hpp> 18 #include <boost/program_options/detail/convert.hpp> 19 #include <boost/program_options/detail/utf8_codecvt_facet.hpp> 20 #include <boost/throw_exception.hpp> 21 22 #include <boost/bind.hpp> 23 24 using namespace std; 25 26 namespace boost { namespace detail { 27 28 /* Internal function to actually perform conversion. 29 The logic in from_8_bit and to_8_bit function is exactly 30 the same, except that one calls 'in' method of codecvt and another 31 calls the 'out' method, and that syntax difference makes straightforward 32 template implementation impossible. 33 34 This functions takes a 'fun' argument, which should have the same 35 parameters and return type and the in/out methods. The actual converting 36 function will pass functional objects created with boost::bind. 37 Experiments show that the performance loss is less than 10%. 38 */ 39 template<class ToChar, class FromChar, class Fun> 40 std::basic_string<ToChar> convert(const std::basic_string<FromChar> & s,Fun fun)41 convert(const std::basic_string<FromChar>& s, Fun fun) 42 43 { 44 std::basic_string<ToChar> result; 45 46 std::mbstate_t state = std::mbstate_t(); 47 48 const FromChar* from = s.data(); 49 const FromChar* from_end = s.data() + s.size(); 50 // The interface of cvt is not really iterator-like, and it's 51 // not possible the tell the required output size without the conversion. 52 // All we can is convert data by pieces. 53 while(from != from_end) { 54 55 // std::basic_string does not provide non-const pointers to the data, 56 // so converting directly into string is not possible. 57 ToChar buffer[32]; 58 59 ToChar* to_next = buffer; 60 // Need variable because boost::bind doesn't work with rvalues. 61 ToChar* to_end = buffer + 32; 62 std::codecvt_base::result r = 63 fun(state, from, from_end, from, buffer, to_end, to_next); 64 65 if (r == std::codecvt_base::error) 66 boost::throw_exception( 67 std::logic_error("character conversion failed")); 68 // 'partial' is not an error, it just means not all source 69 // characters were converted. However, we need to check that at 70 // least one new target character was produced. If not, it means 71 // the source data is incomplete, and since we don't have extra 72 // data to add to source, it's error. 73 if (to_next == buffer) 74 boost::throw_exception( 75 std::logic_error("character conversion failed")); 76 77 // Add converted characters 78 result.append(buffer, to_next); 79 } 80 81 return result; 82 } 83 }} 84 85 namespace boost { 86 87 #ifndef BOOST_NO_STD_WSTRING 88 BOOST_PROGRAM_OPTIONS_DECL std::wstring from_8_bit(const std::string & s,const std::codecvt<wchar_t,char,std::mbstate_t> & cvt)89 from_8_bit(const std::string& s, 90 const std::codecvt<wchar_t, char, std::mbstate_t>& cvt) 91 { 92 return detail::convert<wchar_t>( 93 s, 94 boost::bind(&std::codecvt<wchar_t, char, mbstate_t>::in, 95 &cvt, 96 _1, _2, _3, _4, _5, _6, _7)); 97 } 98 99 BOOST_PROGRAM_OPTIONS_DECL std::string to_8_bit(const std::wstring & s,const std::codecvt<wchar_t,char,std::mbstate_t> & cvt)100 to_8_bit(const std::wstring& s, 101 const std::codecvt<wchar_t, char, std::mbstate_t>& cvt) 102 { 103 return detail::convert<char>( 104 s, 105 boost::bind(&codecvt<wchar_t, char, mbstate_t>::out, 106 &cvt, 107 _1, _2, _3, _4, _5, _6, _7)); 108 } 109 110 111 namespace { 112 boost::program_options::detail::utf8_codecvt_facet 113 utf8_facet; 114 } 115 116 BOOST_PROGRAM_OPTIONS_DECL std::wstring from_utf8(const std::string & s)117 from_utf8(const std::string& s) 118 { 119 return from_8_bit(s, utf8_facet); 120 } 121 122 BOOST_PROGRAM_OPTIONS_DECL std::string to_utf8(const std::wstring & s)123 to_utf8(const std::wstring& s) 124 { 125 return to_8_bit(s, utf8_facet); 126 } 127 128 BOOST_PROGRAM_OPTIONS_DECL std::wstring from_local_8_bit(const std::string & s)129 from_local_8_bit(const std::string& s) 130 { 131 typedef codecvt<wchar_t, char, mbstate_t> facet_type; 132 return from_8_bit(s, 133 BOOST_USE_FACET(facet_type, locale())); 134 } 135 136 BOOST_PROGRAM_OPTIONS_DECL std::string to_local_8_bit(const std::wstring & s)137 to_local_8_bit(const std::wstring& s) 138 { 139 typedef codecvt<wchar_t, char, mbstate_t> facet_type; 140 return to_8_bit(s, 141 BOOST_USE_FACET(facet_type, locale())); 142 } 143 #endif 144 145 namespace program_options 146 { to_internal(const std::string & s)147 BOOST_PROGRAM_OPTIONS_DECL std::string to_internal(const std::string& s) 148 { 149 return s; 150 } 151 152 #ifndef BOOST_NO_STD_WSTRING to_internal(const std::wstring & s)153 BOOST_PROGRAM_OPTIONS_DECL std::string to_internal(const std::wstring& s) 154 { 155 return to_utf8(s); 156 } 157 #endif 158 } 159 160 161 } 162