1 // 2 // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) 3 // 4 // Distributed under the Boost Software License, Version 1.0. (See 5 // accompanying file LICENSE_1_0.txt or copy at 6 // http://www.boost.org/LICENSE_1_0.txt) 7 // 8 #define BOOST_LOCALE_SOURCE 9 #include <boost/locale/encoding.hpp> 10 #include <boost/shared_ptr.hpp> 11 #include <boost/locale/hold_ptr.hpp> 12 #include "../encoding/conv.hpp" 13 #include <boost/locale/util.hpp> 14 #include "all_generator.hpp" 15 16 #include <errno.h> 17 #include <algorithm> 18 #include <stdexcept> 19 #include <vector> 20 #include "codecvt.hpp" 21 22 #ifdef BOOST_LOCALE_WITH_ICONV 23 #include "../util/iconv.hpp" 24 #endif 25 26 namespace boost { 27 namespace locale { 28 namespace impl_posix { 29 30 #ifdef BOOST_LOCALE_WITH_ICONV 31 class mb2_iconv_converter : public util::base_converter { 32 public: 33 mb2_iconv_converter(std::string const & encoding)34 mb2_iconv_converter(std::string const &encoding) : 35 encoding_(encoding), 36 to_utf_((iconv_t)(-1)), 37 from_utf_((iconv_t)(-1)) 38 { 39 iconv_t d = (iconv_t)(-1); 40 std::vector<uint32_t> first_byte_table; 41 try { 42 d = iconv_open(utf32_encoding(),encoding.c_str()); 43 if(d == (iconv_t)(-1)) { 44 throw std::runtime_error("Unsupported encoding" + encoding); 45 } 46 for(unsigned c=0;c<256;c++) { 47 char ibuf[2] = { char(c) , 0 }; 48 char *in = ibuf; 49 size_t insize =2; 50 uint32_t obuf[2] = {illegal,illegal}; 51 char *out = reinterpret_cast<char *>(obuf); 52 size_t outsize = 8; 53 // Basic sigle codepoint conversion 54 call_iconv(d,&in,&insize,&out,&outsize); 55 if(insize == 0 && outsize == 0 && obuf[1] == 0) { 56 first_byte_table.push_back(obuf[0]); 57 continue; 58 } 59 60 // Test if this is illegal first byte or incomplete 61 in = ibuf; 62 insize = 1; 63 out = reinterpret_cast<char *>(obuf); 64 outsize = 8; 65 call_iconv(d,0,0,0,0); 66 size_t res = call_iconv(d,&in,&insize,&out,&outsize); 67 68 // Now if this single byte starts a sequence we add incomplete 69 // to know to ask that we need two bytes, othewise it may only be 70 // illegal 71 72 uint32_t point; 73 if(res == (size_t)(-1) && errno == EINVAL) 74 point = incomplete; 75 else 76 point = illegal; 77 first_byte_table.push_back(point); 78 79 } 80 } 81 catch(...) { 82 if(d!=(iconv_t)(-1)) 83 iconv_close(d); 84 throw; 85 } 86 iconv_close(d); 87 first_byte_table_.reset(new std::vector<uint32_t>()); 88 first_byte_table_->swap(first_byte_table); 89 } 90 mb2_iconv_converter(mb2_iconv_converter const & other)91 mb2_iconv_converter(mb2_iconv_converter const &other) : 92 first_byte_table_(other.first_byte_table_), 93 encoding_(other.encoding_), 94 to_utf_((iconv_t)(-1)), 95 from_utf_((iconv_t)(-1)) 96 { 97 } 98 ~mb2_iconv_converter()99 virtual ~mb2_iconv_converter() 100 { 101 if(to_utf_ != (iconv_t)(-1)) 102 iconv_close(to_utf_); 103 if(from_utf_ != (iconv_t)(-1)) 104 iconv_close(from_utf_); 105 106 } 107 is_thread_safe() const108 virtual bool is_thread_safe() const 109 { 110 return false; 111 } 112 clone() const113 virtual mb2_iconv_converter *clone() const 114 { 115 return new mb2_iconv_converter(*this); 116 } 117 to_unicode(char const * & begin,char const * end)118 uint32_t to_unicode(char const *&begin,char const *end) 119 { 120 if(begin == end) 121 return incomplete; 122 123 unsigned char seq0 = *begin; 124 uint32_t index = (*first_byte_table_)[seq0]; 125 if(index == illegal) 126 return illegal; 127 if(index != incomplete) { 128 begin++; 129 return index; 130 } 131 else if(begin+1 == end) 132 return incomplete; 133 134 open(to_utf_,utf32_encoding(),encoding_.c_str()); 135 136 // maybe illegal or may be double byte 137 138 char inseq[3] = { static_cast<char>(seq0) , begin[1], 0}; 139 char *inbuf = inseq; 140 size_t insize = 3; 141 uint32_t result[2] = { illegal, illegal }; 142 size_t outsize = 8; 143 char *outbuf = reinterpret_cast<char*>(result); 144 call_iconv(to_utf_,&inbuf,&insize,&outbuf,&outsize); 145 if(outsize == 0 && insize == 0 && result[1]==0 ) { 146 begin+=2; 147 return result[0]; 148 } 149 return illegal; 150 } 151 from_unicode(uint32_t cp,char * begin,char const * end)152 uint32_t from_unicode(uint32_t cp,char *begin,char const *end) 153 { 154 if(cp == 0) { 155 if(begin!=end) { 156 *begin = 0; 157 return 1; 158 } 159 else { 160 return incomplete; 161 } 162 } 163 164 open(from_utf_,encoding_.c_str(),utf32_encoding()); 165 166 uint32_t codepoints[2] = {cp,0}; 167 char *inbuf = reinterpret_cast<char *>(codepoints); 168 size_t insize = sizeof(codepoints); 169 char outseq[3] = {0}; 170 char *outbuf = outseq; 171 size_t outsize = 3; 172 173 call_iconv(from_utf_,&inbuf,&insize,&outbuf,&outsize); 174 175 if(insize != 0 || outsize > 1) 176 return illegal; 177 size_t len = 2 - outsize ; 178 size_t reminder = end - begin; 179 if(reminder < len) 180 return incomplete; 181 for(unsigned i=0;i<len;i++) 182 *begin++ = outseq[i]; 183 return len; 184 } 185 open(iconv_t & d,char const * to,char const * from)186 void open(iconv_t &d,char const *to,char const *from) 187 { 188 if(d!=(iconv_t)(-1)) 189 return; 190 d=iconv_open(to,from); 191 } 192 utf32_encoding()193 static char const *utf32_encoding() 194 { 195 union { char one; uint32_t value; } test; 196 test.value = 1; 197 if(test.one == 1) 198 return "UTF-32LE"; 199 else 200 return "UTF-32BE"; 201 } 202 max_len() const203 virtual int max_len() const 204 { 205 return 2; 206 } 207 208 private: 209 boost::shared_ptr<std::vector<uint32_t> > first_byte_table_; 210 std::string encoding_; 211 iconv_t to_utf_; 212 iconv_t from_utf_; 213 }; 214 create_iconv_converter(std::string const & encoding)215 util::base_converter *create_iconv_converter(std::string const &encoding) 216 { 217 hold_ptr<util::base_converter> cvt; 218 try { 219 cvt.reset(new mb2_iconv_converter(encoding)); 220 } 221 catch(std::exception const &e) { 222 // Nothing to do, just retrun empty cvt 223 } 224 return cvt.release(); 225 } 226 227 #else // no iconv 228 util::base_converter *create_iconv_converter(std::string const &/*encoding*/) 229 { 230 return 0; 231 } 232 #endif 233 create_codecvt(std::locale const & in,std::string const & encoding,character_facet_type type)234 std::locale create_codecvt(std::locale const &in,std::string const &encoding,character_facet_type type) 235 { 236 if(conv::impl::normalize_encoding(encoding.c_str())=="utf8") 237 return util::create_utf8_codecvt(in,type); 238 239 try { 240 return util::create_simple_codecvt(in,encoding,type); 241 } 242 catch(conv::invalid_charset_error const &) { 243 util::base_converter *cvt = create_iconv_converter(encoding); 244 return util::create_codecvt_from_pointer(in,cvt,type); 245 } 246 } 247 248 } // impl_posix 249 } // locale 250 } // boost 251 252 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 253