• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 //  Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
3 //
4 //  Distributed under the Boost Software License, Version 1.0. (See
5 //  accompanying file LICENSE_1_0.txt or copy at
6 //  http://www.boost.org/LICENSE_1_0.txt)
7 //
8 #ifndef BOOST_LOCALE_CONVERTER_HPP_INCLUDED
9 #define BOOST_LOCALE_CONVERTER_HPP_INCLUDED
10 
11 #include <boost/locale/config.hpp>
12 #ifdef BOOST_MSVC
13 #  pragma warning(push)
14 #  pragma warning(disable : 4275 4251 4231 4660)
15 #endif
16 #include <locale>
17 
18 
19 namespace boost {
20     namespace locale {
21 
22         ///
23         /// \defgroup convert Text Conversions
24         ///
25         ///  This module provides various function for string manipulation like Unicode normalization, case conversion etc.
26         /// @{
27         ///
28 
29 
30         ///
31         /// \brief This class provides base flags for text manipulation. It is used as base for converter facet.
32         ///
33         class converter_base {
34         public:
35             ///
36             /// The flag used for facet - the type of operation to perform
37             ///
38             typedef enum {
39                 normalization,  ///< Apply Unicode normalization on the text
40                 upper_case,     ///< Convert text to upper case
41                 lower_case,     ///< Convert text to lower case
42                 case_folding,   ///< Fold case in the text
43                 title_case      ///< Convert text to title case
44             } conversion_type;
45         };
46 
47         template<typename CharType>
48         class converter;
49 
50         #ifdef BOOST_LOCALE_DOXYGEN
51         ///
52         /// \brief The facet that implements text manipulation
53         ///
54         /// It is used to performs text conversion operations defined by \ref conversion_type. It is specialized
55         /// for four types of characters \c char, \c wchar_t, \c char16_t, \c char32_t
56         ///
57         template<typename Char>
58         class BOOST_LOCALE_DECL converter: public converter_base, public std::locale::facet {
59         public:
60             /// Locale identification
61             static std::locale::id id;
62 
63             /// Standard constructor
converter(size_t refs=0)64             converter(size_t refs = 0) : std::locale::facet(refs)
65             {
66             }
67             ///
68             /// Convert text in range [\a begin, \a end) according to conversion method \a how. Parameter
69             /// \a flags is used for specification of normalization method like nfd, nfc etc.
70             ///
71             virtual std::basic_string<Char> convert(conversion_type how,Char const *begin,Char const *end,int flags = 0) const = 0;
72 #if defined (__SUNPRO_CC) && defined (_RWSTD_VER)
__get_id(void) const73             std::locale::id& __get_id (void) const { return id; }
74 #endif
75         };
76         #else
77 
78         template<>
79         class BOOST_LOCALE_DECL converter<char> : public converter_base, public std::locale::facet {
80         public:
81             static std::locale::id id;
82 
converter(size_t refs=0)83             converter(size_t refs = 0) : std::locale::facet(refs)
84             {
85             }
86             virtual std::string convert(conversion_type how,char const *begin,char const *end,int flags = 0) const = 0;
87 #if defined (__SUNPRO_CC) && defined (_RWSTD_VER)
__get_id(void) const88             std::locale::id& __get_id (void) const { return id; }
89 #endif
90         };
91 
92         template<>
93         class BOOST_LOCALE_DECL converter<wchar_t> : public converter_base, public std::locale::facet {
94         public:
95             static std::locale::id id;
converter(size_t refs=0)96             converter(size_t refs = 0) : std::locale::facet(refs)
97             {
98             }
99              virtual std::wstring convert(conversion_type how,wchar_t const *begin,wchar_t const *end,int flags = 0) const = 0;
100 #if defined (__SUNPRO_CC) && defined (_RWSTD_VER)
__get_id(void) const101             std::locale::id& __get_id (void) const { return id; }
102 #endif
103         };
104 
105         #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
106         template<>
107         class BOOST_LOCALE_DECL converter<char16_t> : public converter_base, public std::locale::facet {
108         public:
109             static std::locale::id id;
converter(size_t refs=0)110             converter(size_t refs = 0) : std::locale::facet(refs)
111             {
112             }
113             virtual std::u16string convert(conversion_type how,char16_t const *begin,char16_t const *end,int flags = 0) const = 0;
114 #if defined (__SUNPRO_CC) && defined (_RWSTD_VER)
__get_id(void) const115             std::locale::id& __get_id (void) const { return id; }
116 #endif
117         };
118         #endif
119 
120         #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
121         template<>
122         class BOOST_LOCALE_DECL converter<char32_t> : public converter_base, public std::locale::facet {
123         public:
124             static std::locale::id id;
converter(size_t refs=0)125             converter(size_t refs = 0) : std::locale::facet(refs)
126             {
127             }
128             virtual std::u32string convert(conversion_type how,char32_t const *begin,char32_t const *end,int flags = 0) const = 0;
129 #if defined (__SUNPRO_CC) && defined (_RWSTD_VER)
__get_id(void) const130             std::locale::id& __get_id (void) const { return id; }
131 #endif
132         };
133         #endif
134 
135         #endif
136 
137         ///
138         /// The type that defined <a href="http://unicode.org/reports/tr15/#Norm_Forms">normalization form</a>
139         ///
140 
141         typedef enum {
142             norm_nfd,   ///< Canonical decomposition
143             norm_nfc,   ///< Canonical decomposition followed by canonical composition
144             norm_nfkd,  ///< Compatibility decomposition
145             norm_nfkc,  ///< Compatibility decomposition followed by canonical composition.
146             norm_default = norm_nfc, ///< Default normalization - canonical decomposition followed by canonical composition
147         } norm_type;
148 
149         ///
150         /// Normalize Unicode string \a str according to \ref norm_type "normalization form" \a n
151         ///
152         /// Note: This function receives only Unicode strings, i.e.: UTF-8, UTF-16 or UTF-32. It does not take
153         /// in account the locale encoding, because Unicode decomposition and composition are meaningless outside
154         /// of a Unicode character set.
155         ///
156         /// \note throws std::bad_cast if loc does not have \ref converter facet installed
157         ///
158         template<typename CharType>
normalize(std::basic_string<CharType> const & str,norm_type n=norm_default,std::locale const & loc=std::locale ())159         std::basic_string<CharType> normalize(std::basic_string<CharType> const &str,norm_type n=norm_default,std::locale const &loc=std::locale())
160         {
161             return std::use_facet<converter<CharType> >(loc).convert(converter_base::normalization,str.data(),str.data() + str.size(),n);
162         }
163 
164         ///
165         /// Normalize NUL terminated Unicode string \a str according to \ref norm_type "normalization form" \a n
166         ///
167         /// Note: This function receives only Unicode strings, i.e.: UTF-8, UTF-16 or UTF-32. It does not take
168         /// in account the locale encoding, because Unicode decomposition and composition are meaningless outside
169         /// of a Unicode character set.
170         ///
171         /// \note throws std::bad_cast if loc does not have \ref converter facet installed
172         ///
173         template<typename CharType>
normalize(CharType const * str,norm_type n=norm_default,std::locale const & loc=std::locale ())174         std::basic_string<CharType> normalize(CharType const *str,norm_type n=norm_default,std::locale const &loc=std::locale())
175         {
176             CharType const *end=str;
177             while(*end)
178                 end++;
179             return std::use_facet<converter<CharType> >(loc).convert(converter_base::normalization,str,end,n);
180         }
181 
182         ///
183         /// Normalize Unicode string in range [begin,end) according to \ref norm_type "normalization form" \a n
184         ///
185         /// Note: This function receives only Unicode strings, i.e.: UTF-8, UTF-16 or UTF-32. It does not take
186         /// in account the locale encoding, because Unicode decomposition and composition are meaningless outside
187         /// of a Unicode character set.
188         ///
189         /// \note throws std::bad_cast if loc does not have \ref converter facet installed
190         ///
191         template<typename CharType>
normalize(CharType const * begin,CharType const * end,norm_type n=norm_default,std::locale const & loc=std::locale ())192         std::basic_string<CharType> normalize(  CharType const *begin,
193                                                 CharType const *end,
194                                                 norm_type n=norm_default,
195                                                 std::locale const &loc=std::locale())
196         {
197             return std::use_facet<converter<CharType> >(loc).convert(converter_base::normalization,begin,end,n);
198         }
199 
200         ///////////////////////////////////////////////////
201 
202         ///
203         /// Convert a string \a str to upper case according to locale \a loc
204         ///
205         /// \note throws std::bad_cast if loc does not have \ref converter facet installed
206         ///
207 
208         template<typename CharType>
to_upper(std::basic_string<CharType> const & str,std::locale const & loc=std::locale ())209         std::basic_string<CharType> to_upper(std::basic_string<CharType> const &str,std::locale const &loc=std::locale())
210         {
211             return std::use_facet<converter<CharType> >(loc).convert(converter_base::upper_case,str.data(),str.data()+str.size());
212         }
213 
214         ///
215         /// Convert a NUL terminated string \a str to upper case according to locale \a loc
216         ///
217         /// \note throws std::bad_cast if loc does not have \ref converter facet installed
218         ///
219         template<typename CharType>
to_upper(CharType const * str,std::locale const & loc=std::locale ())220         std::basic_string<CharType> to_upper(CharType const *str,std::locale const &loc=std::locale())
221         {
222             CharType const *end=str;
223             while(*end)
224                 end++;
225             return std::use_facet<converter<CharType> >(loc).convert(converter_base::upper_case,str,end);
226         }
227 
228         ///
229         /// Convert a string in range [begin,end) to upper case according to locale \a loc
230         ///
231         /// \note throws std::bad_cast if loc does not have \ref converter facet installed
232         ///
233         template<typename CharType>
to_upper(CharType const * begin,CharType const * end,std::locale const & loc=std::locale ())234         std::basic_string<CharType> to_upper(CharType const *begin,CharType const *end,std::locale const &loc=std::locale())
235         {
236             return std::use_facet<converter<CharType> >(loc).convert(converter_base::upper_case,begin,end);
237         }
238 
239         ///////////////////////////////////////////////////
240 
241         ///
242         /// Convert a string \a str to lower case according to locale \a loc
243         ///
244         /// \note throws std::bad_cast if loc does not have \ref converter facet installed
245         ///
246 
247         template<typename CharType>
to_lower(std::basic_string<CharType> const & str,std::locale const & loc=std::locale ())248         std::basic_string<CharType> to_lower(std::basic_string<CharType> const &str,std::locale const &loc=std::locale())
249         {
250             return std::use_facet<converter<CharType> >(loc).convert(converter_base::lower_case,str.data(),str.data()+str.size());
251         }
252 
253         ///
254         /// Convert a NUL terminated string \a str to lower case according to locale \a loc
255         ///
256         /// \note throws std::bad_cast if loc does not have \ref converter facet installed
257         ///
258         template<typename CharType>
to_lower(CharType const * str,std::locale const & loc=std::locale ())259         std::basic_string<CharType> to_lower(CharType const *str,std::locale const &loc=std::locale())
260         {
261             CharType const *end=str;
262             while(*end)
263                 end++;
264             return std::use_facet<converter<CharType> >(loc).convert(converter_base::lower_case,str,end);
265         }
266 
267         ///
268         /// Convert a string in range [begin,end) to lower case according to locale \a loc
269         ///
270         /// \note throws std::bad_cast if loc does not have \ref converter facet installed
271         ///
272         template<typename CharType>
to_lower(CharType const * begin,CharType const * end,std::locale const & loc=std::locale ())273         std::basic_string<CharType> to_lower(CharType const *begin,CharType const *end,std::locale const &loc=std::locale())
274         {
275             return std::use_facet<converter<CharType> >(loc).convert(converter_base::lower_case,begin,end);
276         }
277         ///////////////////////////////////////////////////
278 
279         ///
280         /// Convert a string \a str to title case according to locale \a loc
281         ///
282         /// \note throws std::bad_cast if loc does not have \ref converter facet installed
283         ///
284 
285         template<typename CharType>
to_title(std::basic_string<CharType> const & str,std::locale const & loc=std::locale ())286         std::basic_string<CharType> to_title(std::basic_string<CharType> const &str,std::locale const &loc=std::locale())
287         {
288             return std::use_facet<converter<CharType> >(loc).convert(converter_base::title_case,str.data(),str.data()+str.size());
289         }
290 
291         ///
292         /// Convert a NUL terminated string \a str to title case according to locale \a loc
293         ///
294         /// \note throws std::bad_cast if loc does not have \ref converter facet installed
295         ///
296         template<typename CharType>
to_title(CharType const * str,std::locale const & loc=std::locale ())297         std::basic_string<CharType> to_title(CharType const *str,std::locale const &loc=std::locale())
298         {
299             CharType const *end=str;
300             while(*end)
301                 end++;
302             return std::use_facet<converter<CharType> >(loc).convert(converter_base::title_case,str,end);
303         }
304 
305         ///
306         /// Convert a string in range [begin,end) to title case according to locale \a loc
307         ///
308         /// \note throws std::bad_cast if loc does not have \ref converter facet installed
309         ///
310         template<typename CharType>
to_title(CharType const * begin,CharType const * end,std::locale const & loc=std::locale ())311         std::basic_string<CharType> to_title(CharType const *begin,CharType const *end,std::locale const &loc=std::locale())
312         {
313             return std::use_facet<converter<CharType> >(loc).convert(converter_base::title_case,begin,end);
314         }
315 
316         ///////////////////////////////////////////////////
317 
318         ///
319         /// Fold case of a string \a str according to locale \a loc
320         ///
321         /// \note throws std::bad_cast if loc does not have \ref converter facet installed
322         ///
323 
324         template<typename CharType>
fold_case(std::basic_string<CharType> const & str,std::locale const & loc=std::locale ())325         std::basic_string<CharType> fold_case(std::basic_string<CharType> const &str,std::locale const &loc=std::locale())
326         {
327             return std::use_facet<converter<CharType> >(loc).convert(converter_base::case_folding,str.data(),str.data()+str.size());
328         }
329 
330         ///
331         /// Fold case of a NUL terminated string \a str according to locale \a loc
332         ///
333         /// \note throws std::bad_cast if loc does not have \ref converter facet installed
334         ///
335         template<typename CharType>
fold_case(CharType const * str,std::locale const & loc=std::locale ())336         std::basic_string<CharType> fold_case(CharType const *str,std::locale const &loc=std::locale())
337         {
338             CharType const *end=str;
339             while(*end)
340                 end++;
341             return std::use_facet<converter<CharType> >(loc).convert(converter_base::case_folding,str,end);
342         }
343 
344         ///
345         /// Fold case of a string in range [begin,end) according to locale \a loc
346         ///
347         /// \note throws std::bad_cast if loc does not have \ref converter facet installed
348         ///
349         template<typename CharType>
fold_case(CharType const * begin,CharType const * end,std::locale const & loc=std::locale ())350         std::basic_string<CharType> fold_case(CharType const *begin,CharType const *end,std::locale const &loc=std::locale())
351         {
352             return std::use_facet<converter<CharType> >(loc).convert(converter_base::case_folding,begin,end);
353         }
354 
355         ///
356         ///@}
357         ///
358     } // locale
359 
360 } // boost
361 
362 #ifdef BOOST_MSVC
363 #pragma warning(pop)
364 #endif
365 
366 
367 #endif
368 
369 ///
370 /// \example conversions.cpp
371 ///
372 /// Example of using various text conversion functions.
373 ///
374 /// \example wconversions.cpp
375 ///
376 /// Example of using various text conversion functions with wide strings.
377 ///
378 
379 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
380 
381