// // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) // // Distributed under the Boost Software License, Version 1.0. (See // accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) // #ifndef BOOST_SRC_LOCALE_ICU_UCONV_HPP #define BOOST_SRC_LOCALE_ICU_UCONV_HPP #include #include #include #include #include #include #include #include #include "icu_util.hpp" namespace boost { namespace locale { namespace impl_icu { typedef enum { cvt_skip, cvt_stop } cpcvt_type; template class icu_std_converter { public: typedef CharType char_type; typedef std::basic_string string_type; icu_std_converter(std::string charset,cpcvt_type cv=cvt_skip); icu::UnicodeString icu(char_type const *begin,char_type const *end) const; string_type std(icu::UnicodeString const &str) const; size_t cut(icu::UnicodeString const &str,char_type const *begin,char_type const *end,size_t n,size_t from_u=0,size_t from_c=0) const; }; template class icu_std_converter { public: typedef CharType char_type; typedef std::basic_string string_type; icu::UnicodeString icu_checked(char_type const *vb,char_type const *ve) const { return icu(vb,ve); // Already done } icu::UnicodeString icu(char_type const *vb,char_type const *ve) const { char const *begin=reinterpret_cast(vb); char const *end=reinterpret_cast(ve); uconv cvt(charset_,cvt_type_); UErrorCode err=U_ZERO_ERROR; icu::UnicodeString tmp(begin,end-begin,cvt.cvt(),err); check_and_throw_icu_error(err); return tmp; } string_type std(icu::UnicodeString const &str) const { uconv cvt(charset_,cvt_type_); return cvt.go(str.getBuffer(),str.length(),max_len_); } icu_std_converter(std::string charset,cpcvt_type cvt_type = cvt_skip) : charset_(charset), cvt_type_(cvt_type) { uconv cvt(charset_,cvt_type); max_len_=cvt.max_char_size(); } size_t cut(icu::UnicodeString const &str,char_type const *begin,char_type const *end, size_t n,size_t from_u=0,size_t from_char=0) const { size_t code_points = str.countChar32(from_u,n); uconv cvt(charset_,cvt_type_); return cvt.cut(code_points,begin+from_char,end); } struct uconv { uconv(uconv const &other); void operator=(uconv const &other); public: uconv(std::string const &charset,cpcvt_type cvt_type=cvt_skip) { UErrorCode err=U_ZERO_ERROR; cvt_ = ucnv_open(charset.c_str(),&err); if(!cvt_ || U_FAILURE(err)) { if(cvt_) ucnv_close(cvt_); throw conv::invalid_charset_error(charset); } try { if(cvt_type==cvt_skip) { ucnv_setFromUCallBack(cvt_,UCNV_FROM_U_CALLBACK_SKIP,0,0,0,&err); check_and_throw_icu_error(err); err=U_ZERO_ERROR; ucnv_setToUCallBack(cvt_,UCNV_TO_U_CALLBACK_SKIP,0,0,0,&err); check_and_throw_icu_error(err); } else { ucnv_setFromUCallBack(cvt_,UCNV_FROM_U_CALLBACK_STOP,0,0,0,&err); check_and_throw_icu_error(err); err=U_ZERO_ERROR; ucnv_setToUCallBack(cvt_,UCNV_TO_U_CALLBACK_STOP,0,0,0,&err); check_and_throw_icu_error(err); } } catch(...) { ucnv_close(cvt_) ; throw; } } int max_char_size() { return ucnv_getMaxCharSize(cvt_); } string_type go(UChar const *buf,int length,int max_size) { string_type res; res.resize(UCNV_GET_MAX_BYTES_FOR_STRING(length,max_size)); char *ptr=reinterpret_cast(&res[0]); UErrorCode err=U_ZERO_ERROR; int n = ucnv_fromUChars(cvt_,ptr,res.size(),buf,length,&err); check_and_throw_icu_error(err); res.resize(n); return res; } size_t cut(size_t n,char_type const *begin,char_type const *end) { char_type const *saved = begin; while(n > 0 && begin < end) { UErrorCode err=U_ZERO_ERROR; ucnv_getNextUChar(cvt_,&begin,end,&err); if(U_FAILURE(err)) return 0; n--; } return begin - saved; } UConverter *cvt() { return cvt_; } ~uconv() { ucnv_close(cvt_); } private: UConverter *cvt_; }; private: int max_len_; std::string charset_; cpcvt_type cvt_type_; }; template class icu_std_converter { public: typedef CharType char_type; typedef std::basic_string string_type; icu::UnicodeString icu_checked(char_type const *begin,char_type const *end) const { icu::UnicodeString tmp(end-begin,0,0); // make inital capacity while(begin!=end) { UChar cl = *begin++; if(U16_IS_SINGLE(cl)) tmp.append(static_cast(cl)); else if(U16_IS_LEAD(cl)) { if(begin==end) { throw_if_needed(); } else { UChar ct=*begin++; if(!U16_IS_TRAIL(ct)) throw_if_needed(); else { UChar32 c=U16_GET_SUPPLEMENTARY(cl,ct); tmp.append(c); } } } else throw_if_needed(); } return tmp; } void throw_if_needed() const { if(mode_ == cvt_stop) throw conv::conversion_error(); } icu::UnicodeString icu(char_type const *vb,char_type const *ve) const { UChar const *begin=reinterpret_cast(vb); UChar const *end=reinterpret_cast(ve); icu::UnicodeString tmp(begin,end-begin); return tmp; } string_type std(icu::UnicodeString const &str) const { char_type const *ptr=reinterpret_cast(str.getBuffer()); return string_type(ptr,str.length()); } size_t cut(icu::UnicodeString const &/*str*/,char_type const * /*begin*/,char_type const * /*end*/,size_t n, size_t /*from_u*/=0,size_t /*from_c*/=0) const { return n; } icu_std_converter(std::string /*charset*/,cpcvt_type mode=cvt_skip) : mode_(mode) { } private: cpcvt_type mode_; }; template class icu_std_converter { public: typedef CharType char_type; typedef std::basic_string string_type; icu::UnicodeString icu_checked(char_type const *begin,char_type const *end) const { icu::UnicodeString tmp(end-begin,0,0); // make inital capacity while(begin!=end) { UChar32 c = static_cast(*begin++); if(U_IS_UNICODE_CHAR(c)) tmp.append(c); else throw_if_needed(); } return tmp; } void throw_if_needed() const { if(mode_ == cvt_stop) throw conv::conversion_error(); } icu::UnicodeString icu(char_type const *begin,char_type const *end) const { icu::UnicodeString tmp(end-begin,0,0); // make inital capacity while(begin!=end) { UChar32 c=static_cast(*begin++); tmp.append(c); } return tmp; } string_type std(icu::UnicodeString const &str) const { string_type tmp; tmp.resize(str.length()); UChar32 *ptr=reinterpret_cast(&tmp[0]); #ifdef __SUNPRO_CC int len=0; #else ::int32_t len=0; #endif UErrorCode code=U_ZERO_ERROR; u_strToUTF32(ptr,tmp.size(),&len,str.getBuffer(),str.length(),&code); check_and_throw_icu_error(code); tmp.resize(len); return tmp; } size_t cut(icu::UnicodeString const &str,char_type const * /*begin*/,char_type const * /*end*/,size_t n, size_t from_u=0,size_t /*from_c*/=0) const { return str.countChar32(from_u,n); } icu_std_converter(std::string /*charset*/,cpcvt_type mode=cvt_skip) : mode_(mode) { } private: cpcvt_type mode_; }; } /// impl_icu } // locale } // boost #endif // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4