1 // 2 // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) 3 // 4 // Distributed under the Boost Software License, Version 1.0. (See 5 // accompanying file LICENSE_1_0.txt or copy at 6 // http://www.boost.org/LICENSE_1_0.txt) 7 // 8 #define BOOST_LOCALE_SOURCE 9 #include <boost/locale/encoding.hpp> 10 #include <boost/locale/encoding_errors.hpp> 11 #include "../encoding/conv.hpp" 12 #include "all_generator.hpp" 13 #include "uconv.hpp" 14 #include <unicode/ucnv.h> 15 #include <unicode/ucnv_err.h> 16 #include <boost/locale/util.hpp> 17 #include <boost/locale/hold_ptr.hpp> 18 #include "codecvt.hpp" 19 20 #ifdef BOOST_MSVC 21 # pragma warning(disable : 4244) // loose data 22 #endif 23 24 #include "icu_util.hpp" 25 #include <vector> 26 namespace boost { 27 namespace locale { 28 namespace impl_icu { 29 class uconv_converter : public util::base_converter { 30 public: 31 uconv_converter(std::string const & encoding)32 uconv_converter(std::string const &encoding) : 33 encoding_(encoding) 34 { 35 UErrorCode err=U_ZERO_ERROR; 36 37 // No need to check err each time, this 38 // is how ICU works. 39 cvt_ = ucnv_open(encoding.c_str(),&err); 40 ucnv_setFromUCallBack(cvt_,UCNV_FROM_U_CALLBACK_STOP,0,0,0,&err); 41 ucnv_setToUCallBack(cvt_,UCNV_TO_U_CALLBACK_STOP,0,0,0,&err); 42 43 if(!cvt_ || U_FAILURE(err)) { 44 if(cvt_) 45 ucnv_close(cvt_); 46 throw conv::invalid_charset_error(encoding); 47 } 48 49 max_len_ = ucnv_getMaxCharSize(cvt_); 50 } 51 ~uconv_converter()52 virtual ~uconv_converter() 53 { 54 ucnv_close(cvt_); 55 } 56 is_thread_safe() const57 virtual bool is_thread_safe() const 58 { 59 return false; 60 } 61 clone() const62 virtual uconv_converter *clone() const 63 { 64 return new uconv_converter(encoding_); 65 } 66 to_unicode(char const * & begin,char const * end)67 uint32_t to_unicode(char const *&begin,char const *end) 68 { 69 UErrorCode err=U_ZERO_ERROR; 70 char const *tmp = begin; 71 UChar32 c=ucnv_getNextUChar(cvt_,&tmp,end,&err); 72 ucnv_reset(cvt_); 73 if(err == U_TRUNCATED_CHAR_FOUND) { 74 return incomplete; 75 } 76 if(U_FAILURE(err)) { 77 return illegal; 78 } 79 80 begin = tmp; 81 return c; 82 } 83 from_unicode(uint32_t u,char * begin,char const * end)84 uint32_t from_unicode(uint32_t u,char *begin,char const *end) 85 { 86 UChar code_point[2]={0}; 87 int len; 88 if(u<=0xFFFF) { 89 if(0xD800 <=u && u<= 0xDFFF) // No surragates 90 return illegal; 91 code_point[0]=u; 92 len=1; 93 } 94 else { 95 u-=0x10000; 96 code_point[0]=0xD800 | (u>>10); 97 code_point[1]=0xDC00 | (u & 0x3FF); 98 len=2; 99 } 100 UErrorCode err=U_ZERO_ERROR; 101 int olen = ucnv_fromUChars(cvt_,begin,end-begin,code_point,len,&err); 102 ucnv_reset(cvt_); 103 if(err == U_BUFFER_OVERFLOW_ERROR) 104 return incomplete; 105 if(U_FAILURE(err)) 106 return illegal; 107 return olen; 108 } 109 max_len() const110 virtual int max_len() const 111 { 112 return max_len_; 113 } 114 115 private: 116 std::string encoding_; 117 UConverter *cvt_; 118 int max_len_; 119 }; 120 create_uconv_converter(std::string const & encoding)121 util::base_converter *create_uconv_converter(std::string const &encoding) 122 { 123 hold_ptr<util::base_converter> cvt; 124 try { 125 cvt.reset(new uconv_converter(encoding)); 126 } 127 catch(std::exception const &/*e*/) 128 { 129 // no encoding so we return empty pointer 130 } 131 return cvt.release(); 132 } 133 create_codecvt(std::locale const & in,std::string const & encoding,character_facet_type type)134 std::locale create_codecvt(std::locale const &in,std::string const &encoding,character_facet_type type) 135 { 136 if(conv::impl::normalize_encoding(encoding.c_str())=="utf8") 137 return util::create_utf8_codecvt(in,type); 138 139 try { 140 return util::create_simple_codecvt(in,encoding,type); 141 } 142 catch(boost::locale::conv::invalid_charset_error const &) { 143 hold_ptr<util::base_converter> cvt; 144 try { 145 cvt.reset(create_uconv_converter(encoding)); 146 } 147 catch(std::exception const &/*e*/) 148 { 149 cvt.reset(new util::base_converter()); 150 } 151 return util::create_codecvt_from_pointer(in,cvt.release(),type); 152 } 153 } 154 155 } // impl_icu 156 } // locale 157 } // boost 158 159 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 160