• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 //  Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
3 //
4 //  Distributed under the Boost Software License, Version 1.0. (See
5 //  accompanying file LICENSE_1_0.txt or copy at
6 //  http://www.boost.org/LICENSE_1_0.txt)
7 //
8 #define BOOST_LOCALE_SOURCE
9 #include <boost/locale/collator.hpp>
10 #include <boost/locale/generator.hpp>
11 #include <boost/thread.hpp>
12 #include <vector>
13 #include <limits>
14 
15 #include "cdata.hpp"
16 #include "all_generator.hpp"
17 #include "uconv.hpp"
18 #include "../shared/mo_hash.hpp"
19 
20 #include <unicode/coll.h>
21 #if U_ICU_VERSION_MAJOR_NUM*100 + U_ICU_VERSION_MINOR_NUM >= 402
22 #  include <unicode/stringpiece.h>
23 #endif
24 
25 namespace boost {
26     namespace locale {
27         namespace impl_icu {
28             template<typename CharType>
29             class collate_impl : public collator<CharType>
30             {
31             public:
32                 typedef typename collator<CharType>::level_type level_type;
limit(level_type level) const33                 level_type limit(level_type level) const
34                 {
35                     if(level < 0)
36                         level=collator_base::primary;
37                     else if(level >= level_count)
38                         level = static_cast<level_type>(level_count - 1);
39                     return level;
40                 }
41 
42                 #if U_ICU_VERSION_MAJOR_NUM*100 + U_ICU_VERSION_MINOR_NUM >= 402
do_utf8_compare(level_type level,char const * b1,char const * e1,char const * b2,char const * e2,UErrorCode & status) const43                 int do_utf8_compare(    level_type level,
44                                         char const *b1,char const *e1,
45                                         char const *b2,char const *e2,
46                                         UErrorCode &status) const
47                 {
48                     icu::StringPiece left (b1,e1-b1);
49                     icu::StringPiece right(b2,e2-b2);
50                     return get_collator(level)->compareUTF8(left,right,status);
51 
52                 }
53                 #endif
54 
do_ustring_compare(level_type level,CharType const * b1,CharType const * e1,CharType const * b2,CharType const * e2,UErrorCode & status) const55                 int do_ustring_compare( level_type level,
56                                         CharType const *b1,CharType const *e1,
57                                         CharType const *b2,CharType const *e2,
58                                         UErrorCode &status) const
59                 {
60                     icu::UnicodeString left=cvt_.icu(b1,e1);
61                     icu::UnicodeString right=cvt_.icu(b2,e2);
62                     return get_collator(level)->compare(left,right,status);
63                 }
64 
do_real_compare(level_type level,CharType const * b1,CharType const * e1,CharType const * b2,CharType const * e2,UErrorCode & status) const65                 int do_real_compare(level_type level,
66                                     CharType const *b1,CharType const *e1,
67                                     CharType const *b2,CharType const *e2,
68                                     UErrorCode &status) const
69                 {
70                     return do_ustring_compare(level,b1,e1,b2,e2,status);
71                 }
72 
do_compare(level_type level,CharType const * b1,CharType const * e1,CharType const * b2,CharType const * e2) const73                 virtual int do_compare( level_type level,
74                                         CharType const *b1,CharType const *e1,
75                                         CharType const *b2,CharType const *e2) const
76                 {
77                     UErrorCode status=U_ZERO_ERROR;
78 
79                     int res = do_real_compare(level,b1,e1,b2,e2,status);
80 
81                     if(U_FAILURE(status))
82                             throw std::runtime_error(std::string("Collation failed:") + u_errorName(status));
83                     if(res < 0)
84                         return -1;
85                     else if(res > 0)
86                         return 1;
87                     return 0;
88                 }
89 
do_basic_transform(level_type level,CharType const * b,CharType const * e) const90                 std::vector<uint8_t> do_basic_transform(level_type level,CharType const *b,CharType const *e) const
91                 {
92                     icu::UnicodeString str=cvt_.icu(b,e);
93                     std::vector<uint8_t> tmp;
94                     tmp.resize(str.length());
95                     icu::Collator *collate = get_collator(level);
96                     int len = collate->getSortKey(str,&tmp[0],tmp.size());
97                     if(len > int(tmp.size())) {
98                         tmp.resize(len);
99                         collate->getSortKey(str,&tmp[0],tmp.size());
100                     }
101                     else
102                         tmp.resize(len);
103                     return tmp;
104                 }
do_transform(level_type level,CharType const * b,CharType const * e) const105                 std::basic_string<CharType> do_transform(level_type level,CharType const *b,CharType const *e) const
106                 {
107                     std::vector<uint8_t> tmp = do_basic_transform(level,b,e);
108                     return std::basic_string<CharType>(tmp.begin(),tmp.end());
109                 }
110 
do_hash(level_type level,CharType const * b,CharType const * e) const111                 long do_hash(level_type level,CharType const *b,CharType const *e) const
112                 {
113                     std::vector<uint8_t> tmp = do_basic_transform(level,b,e);
114                     tmp.push_back(0);
115                     return gnu_gettext::pj_winberger_hash_function(reinterpret_cast<char *>(&tmp.front()));
116                 }
117 
collate_impl(cdata const & d)118                 collate_impl(cdata const &d) :
119                     cvt_(d.encoding),
120                     locale_(d.locale),
121                     is_utf8_(d.utf8)
122                 {
123 
124                 }
get_collator(level_type ilevel) const125                 icu::Collator *get_collator(level_type ilevel) const
126                 {
127                     int l = limit(ilevel);
128                     static const icu::Collator::ECollationStrength levels[level_count] =
129                     {
130                         icu::Collator::PRIMARY,
131                         icu::Collator::SECONDARY,
132                         icu::Collator::TERTIARY,
133                         icu::Collator::QUATERNARY,
134                         icu::Collator::IDENTICAL
135                     };
136 
137                     icu::Collator *col = collates_[l].get();
138                     if(col)
139                         return col;
140 
141                     UErrorCode status=U_ZERO_ERROR;
142 
143                     collates_[l].reset(icu::Collator::createInstance(locale_,status));
144 
145                     if(U_FAILURE(status))
146                         throw std::runtime_error(std::string("Creation of collate failed:") + u_errorName(status));
147 
148                     collates_[l]->setStrength(levels[l]);
149                     return collates_[l].get();
150                 }
151 
152             private:
153                 static const int level_count = 5;
154                 icu_std_converter<CharType>  cvt_;
155                 icu::Locale locale_;
156                 mutable boost::thread_specific_ptr<icu::Collator> collates_[level_count];
157                 bool is_utf8_;
158             };
159 
160 
161             #if U_ICU_VERSION_MAJOR_NUM*100 + U_ICU_VERSION_MINOR_NUM >= 402
162             template<>
do_real_compare(level_type level,char const * b1,char const * e1,char const * b2,char const * e2,UErrorCode & status) const163             int collate_impl<char>::do_real_compare(
164                                     level_type level,
165                                     char const *b1,char const *e1,
166                                     char const *b2,char const *e2,
167                                     UErrorCode &status) const
168             {
169                 if(is_utf8_)
170                     return do_utf8_compare(level,b1,e1,b2,e2,status);
171                 else
172                     return do_ustring_compare(level,b1,e1,b2,e2,status);
173             }
174             #endif
175 
create_collate(std::locale const & in,cdata const & cd,character_facet_type type)176             std::locale create_collate(std::locale const &in,cdata const &cd,character_facet_type type)
177             {
178                 switch(type) {
179                 case char_facet:
180                     return std::locale(in,new collate_impl<char>(cd));
181                 case wchar_t_facet:
182                     return std::locale(in,new collate_impl<wchar_t>(cd));
183                 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
184                 case char16_t_facet:
185                     return std::locale(in,new collate_impl<char16_t>(cd));
186                 #endif
187                 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
188                 case char32_t_facet:
189                     return std::locale(in,new collate_impl<char32_t>(cd));
190                 #endif
191                 default:
192                     return in;
193                 }
194             }
195 
196         } /// impl_icu
197 
198     } // locale
199 } // boost
200 
201 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
202