1 // 2 // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) 3 // 4 // Distributed under the Boost Software License, Version 1.0. (See 5 // accompanying file LICENSE_1_0.txt or copy at 6 // http://www.boost.org/LICENSE_1_0.txt) 7 // 8 #define BOOST_LOCALE_SOURCE 9 #include <boost/locale/collator.hpp> 10 #include <boost/locale/generator.hpp> 11 #include <boost/thread.hpp> 12 #include <vector> 13 #include <limits> 14 15 #include "cdata.hpp" 16 #include "all_generator.hpp" 17 #include "uconv.hpp" 18 #include "../shared/mo_hash.hpp" 19 20 #include <unicode/coll.h> 21 #if U_ICU_VERSION_MAJOR_NUM*100 + U_ICU_VERSION_MINOR_NUM >= 402 22 # include <unicode/stringpiece.h> 23 #endif 24 25 namespace boost { 26 namespace locale { 27 namespace impl_icu { 28 template<typename CharType> 29 class collate_impl : public collator<CharType> 30 { 31 public: 32 typedef typename collator<CharType>::level_type level_type; limit(level_type level) const33 level_type limit(level_type level) const 34 { 35 if(level < 0) 36 level=collator_base::primary; 37 else if(level >= level_count) 38 level = static_cast<level_type>(level_count - 1); 39 return level; 40 } 41 42 #if U_ICU_VERSION_MAJOR_NUM*100 + U_ICU_VERSION_MINOR_NUM >= 402 do_utf8_compare(level_type level,char const * b1,char const * e1,char const * b2,char const * e2,UErrorCode & status) const43 int do_utf8_compare( level_type level, 44 char const *b1,char const *e1, 45 char const *b2,char const *e2, 46 UErrorCode &status) const 47 { 48 icu::StringPiece left (b1,e1-b1); 49 icu::StringPiece right(b2,e2-b2); 50 return get_collator(level)->compareUTF8(left,right,status); 51 52 } 53 #endif 54 do_ustring_compare(level_type level,CharType const * b1,CharType const * e1,CharType const * b2,CharType const * e2,UErrorCode & status) const55 int do_ustring_compare( level_type level, 56 CharType const *b1,CharType const *e1, 57 CharType const *b2,CharType const *e2, 58 UErrorCode &status) const 59 { 60 icu::UnicodeString left=cvt_.icu(b1,e1); 61 icu::UnicodeString right=cvt_.icu(b2,e2); 62 return get_collator(level)->compare(left,right,status); 63 } 64 do_real_compare(level_type level,CharType const * b1,CharType const * e1,CharType const * b2,CharType const * e2,UErrorCode & status) const65 int do_real_compare(level_type level, 66 CharType const *b1,CharType const *e1, 67 CharType const *b2,CharType const *e2, 68 UErrorCode &status) const 69 { 70 return do_ustring_compare(level,b1,e1,b2,e2,status); 71 } 72 do_compare(level_type level,CharType const * b1,CharType const * e1,CharType const * b2,CharType const * e2) const73 virtual int do_compare( level_type level, 74 CharType const *b1,CharType const *e1, 75 CharType const *b2,CharType const *e2) const 76 { 77 UErrorCode status=U_ZERO_ERROR; 78 79 int res = do_real_compare(level,b1,e1,b2,e2,status); 80 81 if(U_FAILURE(status)) 82 throw std::runtime_error(std::string("Collation failed:") + u_errorName(status)); 83 if(res < 0) 84 return -1; 85 else if(res > 0) 86 return 1; 87 return 0; 88 } 89 do_basic_transform(level_type level,CharType const * b,CharType const * e) const90 std::vector<uint8_t> do_basic_transform(level_type level,CharType const *b,CharType const *e) const 91 { 92 icu::UnicodeString str=cvt_.icu(b,e); 93 std::vector<uint8_t> tmp; 94 tmp.resize(str.length()); 95 icu::Collator *collate = get_collator(level); 96 int len = collate->getSortKey(str,&tmp[0],tmp.size()); 97 if(len > int(tmp.size())) { 98 tmp.resize(len); 99 collate->getSortKey(str,&tmp[0],tmp.size()); 100 } 101 else 102 tmp.resize(len); 103 return tmp; 104 } do_transform(level_type level,CharType const * b,CharType const * e) const105 std::basic_string<CharType> do_transform(level_type level,CharType const *b,CharType const *e) const 106 { 107 std::vector<uint8_t> tmp = do_basic_transform(level,b,e); 108 return std::basic_string<CharType>(tmp.begin(),tmp.end()); 109 } 110 do_hash(level_type level,CharType const * b,CharType const * e) const111 long do_hash(level_type level,CharType const *b,CharType const *e) const 112 { 113 std::vector<uint8_t> tmp = do_basic_transform(level,b,e); 114 tmp.push_back(0); 115 return gnu_gettext::pj_winberger_hash_function(reinterpret_cast<char *>(&tmp.front())); 116 } 117 collate_impl(cdata const & d)118 collate_impl(cdata const &d) : 119 cvt_(d.encoding), 120 locale_(d.locale), 121 is_utf8_(d.utf8) 122 { 123 124 } get_collator(level_type ilevel) const125 icu::Collator *get_collator(level_type ilevel) const 126 { 127 int l = limit(ilevel); 128 static const icu::Collator::ECollationStrength levels[level_count] = 129 { 130 icu::Collator::PRIMARY, 131 icu::Collator::SECONDARY, 132 icu::Collator::TERTIARY, 133 icu::Collator::QUATERNARY, 134 icu::Collator::IDENTICAL 135 }; 136 137 icu::Collator *col = collates_[l].get(); 138 if(col) 139 return col; 140 141 UErrorCode status=U_ZERO_ERROR; 142 143 collates_[l].reset(icu::Collator::createInstance(locale_,status)); 144 145 if(U_FAILURE(status)) 146 throw std::runtime_error(std::string("Creation of collate failed:") + u_errorName(status)); 147 148 collates_[l]->setStrength(levels[l]); 149 return collates_[l].get(); 150 } 151 152 private: 153 static const int level_count = 5; 154 icu_std_converter<CharType> cvt_; 155 icu::Locale locale_; 156 mutable boost::thread_specific_ptr<icu::Collator> collates_[level_count]; 157 bool is_utf8_; 158 }; 159 160 161 #if U_ICU_VERSION_MAJOR_NUM*100 + U_ICU_VERSION_MINOR_NUM >= 402 162 template<> do_real_compare(level_type level,char const * b1,char const * e1,char const * b2,char const * e2,UErrorCode & status) const163 int collate_impl<char>::do_real_compare( 164 level_type level, 165 char const *b1,char const *e1, 166 char const *b2,char const *e2, 167 UErrorCode &status) const 168 { 169 if(is_utf8_) 170 return do_utf8_compare(level,b1,e1,b2,e2,status); 171 else 172 return do_ustring_compare(level,b1,e1,b2,e2,status); 173 } 174 #endif 175 create_collate(std::locale const & in,cdata const & cd,character_facet_type type)176 std::locale create_collate(std::locale const &in,cdata const &cd,character_facet_type type) 177 { 178 switch(type) { 179 case char_facet: 180 return std::locale(in,new collate_impl<char>(cd)); 181 case wchar_t_facet: 182 return std::locale(in,new collate_impl<wchar_t>(cd)); 183 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T 184 case char16_t_facet: 185 return std::locale(in,new collate_impl<char16_t>(cd)); 186 #endif 187 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T 188 case char32_t_facet: 189 return std::locale(in,new collate_impl<char32_t>(cd)); 190 #endif 191 default: 192 return in; 193 } 194 } 195 196 } /// impl_icu 197 198 } // locale 199 } // boost 200 201 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 202