• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *
3  * Copyright (c) 2004
4  * John Maddock
5  *
6  * Use, modification and distribution are subject to the
7  * Boost Software License, Version 1.0. (See accompanying file
8  * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9  *
10  */
11 
12  /*
13   *   LOCATION:    see http://www.boost.org for most recent version.
14   *   FILE         test_unicode.hpp
15   *   VERSION      see <boost/version.hpp>
16   *   DESCRIPTION: Unicode specific tests (requires ICU).
17   */
18 
19 #include <boost/regex/config.hpp>
20 #ifdef BOOST_HAS_ICU
21 #include "test.hpp"
22 
23 #ifdef BOOST_MSVC
24 #pragma warning(disable:4127)
25 #endif
26 
27 #ifndef BOOST_NO_STD_WSTRING
28 
29 #define TEST_REGEX_SEARCH_U(s, f, t, m, a)\
30    do{\
31       const wchar_t e[] = { s };\
32       std::wstring se(e, (sizeof(e) / sizeof(wchar_t)) - 1);\
33       const wchar_t st[] = { t };\
34       std::wstring sst(st, (sizeof(st) / sizeof(wchar_t)) - 1);\
35       test_info<wchar_t>::set_info(__FILE__, __LINE__, se, f, sst, m, a);\
36       test_icu(wchar_t(0), test_regex_search_tag());\
37    }while(0)
38 
39 #define TEST_REGEX_CLASS_U(classname, character)\
40    TEST_REGEX_SEARCH_U(\
41       L"[[:" BOOST_JOIN(L, BOOST_STRINGIZE(classname)) L":]]",\
42       perl, \
43       BOOST_JOIN(L, \
44          BOOST_STRINGIZE(\
45             BOOST_JOIN(\x, character))), \
46       match_default, \
47       make_array(0, 1, -2, -2))
48 
49 #else
50 
51 #define TEST_REGEX_SEARCH_U(s, f, t, m, a)
52 #define TEST_REGEX_CLASS_U(classname, character)
53 
54 #endif
55 
test_unicode()56 void test_unicode()
57 {
58    using namespace boost::regex_constants;
59 
60    TEST_REGEX_CLASS_U(L*, 3108);
61    TEST_REGEX_CLASS_U(Letter, 3108);
62    TEST_REGEX_CLASS_U(Lu, 2145);
63    TEST_REGEX_CLASS_U(Uppercase Letter, 2145);
64    TEST_REGEX_CLASS_U(Ll, 2146);
65    TEST_REGEX_CLASS_U(Lowercase Letter, 2146);
66    TEST_REGEX_CLASS_U(Lt, 1FFC);
67    TEST_REGEX_CLASS_U(Titlecase Letter, 1FFC);
68    TEST_REGEX_CLASS_U(Lm, 1D61);
69    TEST_REGEX_CLASS_U(Modifier Letter, 1D61);
70    TEST_REGEX_CLASS_U(Lo, 1974);
71    TEST_REGEX_CLASS_U(Other Letter, 1974);
72    TEST_REGEX_CLASS_U(M*, 20EA);
73    TEST_REGEX_CLASS_U(Mark, 20EA);
74    TEST_REGEX_CLASS_U(Mn, 20EA);
75    TEST_REGEX_CLASS_U(Non-Spacing Mark, 20EA);
76    TEST_REGEX_CLASS_U(Mc, 1938);
77    TEST_REGEX_CLASS_U(Spacing Combining Mark, 1938);
78    TEST_REGEX_CLASS_U(Me, 0488);
79    TEST_REGEX_CLASS_U(Enclosing Mark, 0488);
80    TEST_REGEX_CLASS_U(N*, 0669);
81    TEST_REGEX_CLASS_U(Number, 0669);
82    TEST_REGEX_CLASS_U(Nd, 0669);
83    TEST_REGEX_CLASS_U(Decimal Digit Number, 0669);
84    TEST_REGEX_CLASS_U(Nl, 303A);
85    TEST_REGEX_CLASS_U(Letter Number, 303A);
86    TEST_REGEX_CLASS_U(No, 2793);
87    TEST_REGEX_CLASS_U(Other Number, 2793);
88 
89    TEST_REGEX_CLASS_U(S*, 2144);
90    TEST_REGEX_CLASS_U(Symbol, 2144);
91    TEST_REGEX_CLASS_U(Sm, 2144);
92    TEST_REGEX_CLASS_U(Math Symbol, 2144);
93    TEST_REGEX_CLASS_U(Sc, 20B1);
94    TEST_REGEX_CLASS_U(Currency Symbol, 20B1);
95    TEST_REGEX_CLASS_U(Sk, 1FFE);
96    TEST_REGEX_CLASS_U(Modifier Symbol, 1FFE);
97    TEST_REGEX_CLASS_U(So, 19FF);
98    TEST_REGEX_CLASS_U(Other Symbol, 19FF);
99 
100    TEST_REGEX_CLASS_U(P*, 005F);
101    TEST_REGEX_CLASS_U(Punctuation, 005F);
102    TEST_REGEX_CLASS_U(Pc, 005F);
103    TEST_REGEX_CLASS_U(Connector Punctuation, 005F);
104    TEST_REGEX_CLASS_U(Pd, 002D);
105    TEST_REGEX_CLASS_U(Dash Punctuation, 002D);
106    TEST_REGEX_CLASS_U(Ps, 0028);
107    TEST_REGEX_CLASS_U(Open Punctuation, 0028);
108    TEST_REGEX_CLASS_U(Pe, FF63);
109    TEST_REGEX_CLASS_U(Close Punctuation, FF63);
110    TEST_REGEX_CLASS_U(Pi, 2039);
111    TEST_REGEX_CLASS_U(Initial Punctuation, 2039);
112    TEST_REGEX_CLASS_U(Pf, 203A);
113    TEST_REGEX_CLASS_U(Final Punctuation, 203A);
114    TEST_REGEX_CLASS_U(Po, 2038);
115    TEST_REGEX_CLASS_U(Other Punctuation, 2038);
116 
117    TEST_REGEX_CLASS_U(Z*, 202F);
118    TEST_REGEX_CLASS_U(Separator, 202F);
119    TEST_REGEX_CLASS_U(Zs, 202F);
120    TEST_REGEX_CLASS_U(Space Separator, 202F);
121    TEST_REGEX_CLASS_U(Zl, 2028);
122    TEST_REGEX_CLASS_U(Line Separator, 2028);
123    TEST_REGEX_CLASS_U(Zp, 2029);
124    TEST_REGEX_CLASS_U(Paragraph Separator, 2029);
125 #if !BOOST_WORKAROUND(BOOST_MSVC, < 1300)
126    // Some tests have to be disabled for VC6 because the compiler
127    // mangles the string literals...
128    TEST_REGEX_CLASS_U(C*, 009F);
129    TEST_REGEX_CLASS_U(Other, 009F);
130    TEST_REGEX_CLASS_U(Cc, 009F);
131    TEST_REGEX_CLASS_U(Control, 009F);
132 #endif
133    TEST_REGEX_CLASS_U(Cf, FFFB);
134    TEST_REGEX_CLASS_U(Format, FFFB);
135    //TEST_REGEX_CLASS_U(Cs, DC00);
136    //TEST_REGEX_CLASS_U(Surrogate, DC00);
137    TEST_REGEX_CLASS_U(Co, F8FF);
138    TEST_REGEX_CLASS_U(Private Use, F8FF);
139    TEST_REGEX_CLASS_U(Cn, FFFF);
140    TEST_REGEX_CLASS_U(Not Assigned, FFFF);
141    TEST_REGEX_CLASS_U(Any, 2038);
142    TEST_REGEX_CLASS_U(Assigned, 2038);
143    TEST_REGEX_CLASS_U(ASCII, 7f);
144    TEST_REGEX_SEARCH_U(L"[[:Assigned:]]", perl, L"\xffff", match_default, make_array(-2, -2));
145    TEST_REGEX_SEARCH_U(L"[[:ASCII:]]", perl, L"\x80", match_default, make_array(-2, -2));
146 
147    TEST_REGEX_SEARCH_U(L"\\N{KHMER DIGIT SIX}", perl, L"\x17E6", match_default, make_array(0, 1, -2, -2));
148    TEST_REGEX_SEARCH_U(L"\\N{MODIFIER LETTER LOW ACUTE ACCENT}", perl, L"\x02CF", match_default, make_array(0, 1, -2, -2));
149    TEST_REGEX_SEARCH_U(L"\\N{SUPERSCRIPT ONE}", perl, L"\x00B9", match_default, make_array(0, 1, -2, -2));
150    TEST_REGEX_SEARCH_U(L"[\\N{KHMER DIGIT SIX}]", perl, L"\x17E6", match_default, make_array(0, 1, -2, -2));
151    TEST_REGEX_SEARCH_U(L"[\\N{MODIFIER LETTER LOW ACUTE ACCENT}]", perl, L"\x02CF", match_default, make_array(0, 1, -2, -2));
152    TEST_REGEX_SEARCH_U(L"[\\N{SUPERSCRIPT ONE}]", perl, L"\x00B9", match_default, make_array(0, 1, -2, -2));
153    TEST_REGEX_SEARCH_U(L"\\N{CJK UNIFIED IDEOGRAPH-7FED}", perl, L"\x7FED", match_default, make_array(0, 1, -2, -2));
154 #if !BOOST_WORKAROUND(BOOST_MSVC, < 1300)
155    // Some tests have to be disabled for VC6 because the compiler
156    // mangles the string literals...
157    TEST_REGEX_SEARCH_U(L"\\w+", perl, L" e\x301" L"coute ", match_default, make_array(1, 8, -2, -2));
158 
159    TEST_REGEX_SEARCH_U(L"^", perl, L" \x2028 \x2029 \x000D\x000A \x000A \x000C \x000D \x0085 ",
160       match_default | match_not_bol, make_array(2, 2, -2, 4, 4, -2, 7, 7, -2, 9, 9, -2, 11, 11, -2, 13, 13, -2, 15, 15, -2, -2));
161    TEST_REGEX_SEARCH_U(L"$", perl, L" \x2028 \x2029 \x000D\x000A \x000A \x000C \x000D \x0085 ",
162       match_default | match_not_eol, make_array(1, 1, -2, 3, 3, -2, 5, 5, -2, 8, 8, -2, 10, 10, -2, 12, 12, -2, 14, 14, -2, -2));
163    TEST_REGEX_SEARCH_U(L".", perl, L" \x2028\x2029\x000D\x000A\x000A\x000C\x000D\x0085 ",
164       match_default | match_not_dot_newline, make_array(0, 1, -2, 9, 10, -2, -2));
165 #endif
166 }
167 
168 #else
test_unicode()169 void test_unicode(){}
170 #endif
171