• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 //  Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
3 //
4 //  Distributed under the Boost Software License, Version 1.0. (See
5 //  accompanying file LICENSE_1_0.txt or copy at
6 //  http://www.boost.org/LICENSE_1_0.txt)
7 //
8 #include "test_locale.hpp"
9 #include "test_locale_tools.hpp"
10 #include <boost/locale/util.hpp>
11 #ifdef BOOST_LOCALE_WITH_ICU
12 #include "../src/icu/codecvt.hpp"
13 #endif
14 #if defined(BOOST_LOCALE_WITH_ICONV) && !defined(BOOST_LOCALE_NO_POSIX_BACKEND)
15 #include "../src/posix/codecvt.hpp"
16 #endif
17 
18 #include <string.h>
19 
make2(unsigned v)20 char *make2(unsigned v)
21 {
22     static unsigned char buf[3] = {0};
23     buf[0] = 0xC0 | (v >> 6);
24     buf[1] = 0x80 | (v & 0x3F );
25     return reinterpret_cast<char*>(buf);
26 }
27 
make3(unsigned v)28 char *make3(unsigned v)
29 {
30     static unsigned char buf[4] = {0};
31     buf[0] = 0xE0 | ((v >> 12) ) ;
32     buf[1] = 0x80 | ((v >>  6) & 0x3F );
33     buf[2] = 0x80 | ((v >>  0) & 0x3F );
34     return reinterpret_cast<char*>(buf);
35 }
36 
make4(unsigned v)37 char *make4(unsigned v)
38 {
39     static unsigned char buf[5] = {0};
40     buf[0] = 0xF0 | ((v >> 18) ) ;
41     buf[1] = 0x80 | ((v >> 12) & 0x3F );
42     buf[2] = 0x80 | ((v >>  6) & 0x3F );
43     buf[3] = 0x80 | ((v >>  0) & 0x3F );
44     return reinterpret_cast<char*>(buf);
45 }
46 
47 static const unsigned illegal=0xFFFFFFFF;
48 static const unsigned incomplete=0xFFFFFFFE;
49 
50 
test_to(boost::locale::util::base_converter & cvt,char const * s,unsigned codepoint)51 bool test_to(boost::locale::util::base_converter &cvt,char const *s,unsigned codepoint)
52 {
53     size_t len = strlen(s);
54     char const *end = s + len;
55     return cvt.to_unicode(s,end) == codepoint;
56 }
57 
test_from(boost::locale::util::base_converter & cvt,unsigned codepoint,char const * str)58 bool test_from(boost::locale::util::base_converter &cvt,unsigned codepoint,char const *str)
59 {
60     char buf[32];
61     unsigned res = cvt.from_unicode(codepoint,buf,buf+sizeof(buf));
62     if(res == boost::locale::util::base_converter::illegal) {
63         return str == 0;
64     }
65     else {
66         return str!=0 && strlen(str) == res && memcmp(str,buf,res) == 0;
67     }
68 }
69 
test_incomplete(boost::locale::util::base_converter & cvt,unsigned codepoint,int len)70 bool test_incomplete(boost::locale::util::base_converter &cvt,unsigned codepoint,int len)
71 {
72     char buf[32];
73     unsigned res = cvt.from_unicode(codepoint,buf,buf+len);
74     return res == incomplete;
75 }
76 
77 
78 
79 #define TEST_TO(str,codepoint) TEST(test_to(*cvt,str,codepoint))
80 #define TEST_FROM(str,codepoint) TEST(test_from(*cvt,codepoint,str))
81 #define TEST_INC(codepoint,len) TEST(test_incomplete(*cvt,codepoint,len))
82 
test_shiftjis(boost::locale::util::base_converter * pcvt)83 void test_shiftjis(boost::locale::util::base_converter* pcvt)
84 {
85     boost::locale::hold_ptr<boost::locale::util::base_converter> cvt(pcvt);
86 
87         std::cout << "- Correct" << std::endl;
88         TEST_TO("a",'a');
89         TEST_TO("X",'X');
90         TEST_TO("\xCB",0xFF8b); // half width katakana Hi ヒ
91         TEST_TO("\x83\x71",0x30d2); // Full width katakana Hi ヒ
92         TEST_TO("\x82\xd0",0x3072); // Full width hiragana Hi ひ
93 
94         TEST_FROM("a",'a');
95         TEST_FROM("X",'X');
96         TEST_FROM("\xCB",0xFF8b); // half width katakana Hi ヒ
97         TEST_FROM("\x83\x71",0x30d2); // Full width katakana Hi ヒ
98         TEST_FROM("\x82\xd0",0x3072); // Full width hiragana Hi ひ
99 
100         std::cout << "- Illegal/incomplete" << std::endl;
101 
102         TEST_TO("\xa0",illegal);
103         TEST_TO("\x82",incomplete);
104         TEST_TO("\x83\xf0",illegal);
105 
106         TEST_INC(0x30d2,1); // Full width katakana Hi ヒ
107         TEST_INC(0x3072,1); // Full width hiragana Hi ひ
108 
109         TEST_FROM(0,0x5e9); // Hebrew ש not in ShiftJIS
110 }
111 
112 
main()113 int main()
114 {
115     try {
116         using namespace boost::locale::util;
117 
118         #ifndef BOOST_NO_CXX11_SMART_PTR
119         std::unique_ptr<base_converter> cvt;
120         #else
121         std::auto_ptr<base_converter> cvt;
122         #endif
123 
124         std::cout << "Test UTF-8" << std::endl;
125         std::cout << "- From UTF-8" << std::endl;
126 
127 
128         #ifndef BOOST_NO_CXX11_SMART_PTR
129         cvt = std::move(create_utf8_converter_unique_ptr());
130         #else
131         cvt = create_utf8_converter();
132         #endif
133 
134         TEST(cvt.get());
135         TEST(cvt->is_thread_safe());
136         TEST(cvt->max_len() == 4);
137 
138         std::cout << "-- Correct" << std::endl;
139 
140         TEST_TO("\x7f",0x7f);
141         TEST_TO("\xC2\x80",0x80);
142         TEST_TO("\xdf\xBF",0x7FF);
143         TEST_TO("\xe0\xa0\x80",0x800);
144         TEST_TO("\xef\xbf\xbf",0xFFFF);
145         TEST_TO("\xf0\x90\x80\x80",0x10000);
146         TEST_TO("\xf4\x8f\xbf\xbf",0x10FFFF);
147 
148         std::cout << "-- Too big" << std::endl;
149         TEST_TO("\xf4\x9f\x80\x80",illegal); //  11 0000
150         TEST_TO("\xfb\xbf\xbf\xbf",illegal); // 3FF FFFF
151         TEST_TO("\xf8\x90\x80\x80\x80",illegal);  // 400 0000
152         TEST_TO("\xfd\xbf\xbf\xbf\xbf\xbf",illegal);  // 7fff ffff
153 
154         std::cout << "-- Invalid trail" << std::endl;
155         TEST_TO("\xC2\x7F",illegal);
156         TEST_TO("\xdf\x7F",illegal);
157         TEST_TO("\xe0\x7F\x80",illegal);
158         TEST_TO("\xef\xbf\x7F",illegal);
159         TEST_TO("\xe0\x7F\x80",illegal);
160         TEST_TO("\xef\xbf\x7F",illegal);
161         TEST_TO("\xf0\x7F\x80\x80",illegal);
162         TEST_TO("\xf4\x7f\xbf\xbf",illegal);
163         TEST_TO("\xf0\x90\x7F\x80",illegal);
164         TEST_TO("\xf4\x8f\x7F\xbf",illegal);
165         TEST_TO("\xf0\x90\x80\x7F",illegal);
166         TEST_TO("\xf4\x8f\xbf\x7F",illegal);
167 
168         std::cout << "-- Invalid length" << std::endl;
169 
170         /// Test that this actually works
171         TEST_TO(make2(0x80),0x80);
172         TEST_TO(make2(0x7ff),0x7ff);
173 
174         TEST_TO(make3(0x800),0x800);
175         TEST_TO(make3(0xffff),0xffff);
176 
177         TEST_TO(make4(0x10000),0x10000);
178         TEST_TO(make4(0x10ffff),0x10ffff);
179 
180         TEST_TO(make4(0x110000),illegal);
181         TEST_TO(make4(0x1fffff),illegal);
182 
183         TEST_TO(make2(0),illegal);
184         TEST_TO(make3(0),illegal);
185         TEST_TO(make4(0),illegal);
186         TEST_TO(make2(0x7f),illegal);
187         TEST_TO(make3(0x7f),illegal);
188         TEST_TO(make4(0x7f),illegal);
189 
190         TEST_TO(make3(0x80),illegal);
191         TEST_TO(make4(0x80),illegal);
192         TEST_TO(make3(0x7ff),illegal);
193         TEST_TO(make4(0x7ff),illegal);
194 
195         TEST_TO(make4(0x8000),illegal);
196         TEST_TO(make4(0xffff),illegal);
197 
198         std::cout << "-- Invalid surrogate" << std::endl;
199 
200         TEST_TO(make3(0xD800),illegal);
201         TEST_TO(make3(0xDBFF),illegal);
202         TEST_TO(make3(0xDC00),illegal);
203         TEST_TO(make3(0xDFFF),illegal);
204 
205         TEST_TO(make4(0xD800),illegal);
206         TEST_TO(make4(0xDBFF),illegal);
207         TEST_TO(make4(0xDC00),illegal);
208         TEST_TO(make4(0xDFFF),illegal);
209 
210         std::cout <<"-- Incomplete" << std::endl;
211 
212         TEST_TO("\x80",illegal);
213         TEST_TO("\xC2",incomplete);
214 
215         TEST_TO("\xdf",incomplete);
216 
217         TEST_TO("\xe0",incomplete);
218         TEST_TO("\xe0\xa0",incomplete);
219 
220         TEST_TO("\xef\xbf",incomplete);
221         TEST_TO("\xef",incomplete);
222 
223         TEST_TO("\xf0\x90\x80",incomplete);
224         TEST_TO("\xf0\x90",incomplete);
225         TEST_TO("\xf0",incomplete);
226 
227         TEST_TO("\xf4\x8f\xbf",incomplete);
228         TEST_TO("\xf4\x8f",incomplete);
229         TEST_TO("\xf4",incomplete);
230 
231         std::cout << "- To UTF-8" << std::endl;
232 
233         std::cout << "-- Test correct" << std::endl;
234 
235         TEST_FROM("\x7f",0x7f);
236         TEST_FROM("\xC2\x80",0x80);
237         TEST_FROM("\xdf\xBF",0x7FF);
238         TEST_INC(0x7FF,1);
239         TEST_FROM("\xe0\xa0\x80",0x800);
240         TEST_INC(0x800,2);
241         TEST_INC(0x800,1);
242         TEST_FROM("\xef\xbf\xbf",0xFFFF);
243         TEST_INC(0x10000,3);
244         TEST_INC(0x10000,2);
245         TEST_INC(0x10000,1);
246         TEST_FROM("\xf0\x90\x80\x80",0x10000);
247         TEST_FROM("\xf4\x8f\xbf\xbf",0x10FFFF);
248 
249         std::cout << "-- Test no surrogate " << std::endl;
250 
251         TEST_FROM(0,0xD800);
252         TEST_FROM(0,0xDBFF);
253         TEST_FROM(0,0xDC00);
254         TEST_FROM(0,0xDFFF);
255 
256         std::cout << "-- Test invalid " << std::endl;
257 
258         TEST_FROM(0,0x110000);
259         TEST_FROM(0,0x1FFFFF);
260 
261 
262         std::cout << "Test windows-1255" << std::endl;
263 
264         #ifndef BOOST_NO_CXX11_SMART_PTR
265         cvt = std::move(create_simple_converter_unique_ptr("windows-1255"));
266         #else
267         cvt = create_simple_converter("windows-1255");
268         #endif
269 
270         TEST(cvt.get());
271         TEST(cvt->is_thread_safe());
272         TEST(cvt->max_len() == 1);
273 
274         std::cout << "- From 1255" << std::endl;
275 
276         TEST_TO("\xa4",0x20aa);
277         TEST_TO("\xe0",0x05d0);
278         TEST_TO("\xc4",0x5b4);
279         TEST_TO("\xfb",illegal);
280         TEST_TO("\xdd",illegal);
281         TEST_TO("\xff",illegal);
282         TEST_TO("\xfe",0x200f);
283 
284         std::cout << "- To 1255" << std::endl;
285 
286         TEST_FROM("\xa4",0x20aa);
287         TEST_FROM("\xe0",0x05d0);
288         TEST_FROM("\xc4",0x5b4);
289         TEST_FROM("\xfe",0x200f);
290 
291         TEST_FROM(0,0xe4);
292         TEST_FROM(0,0xd0);
293 
294         #ifdef BOOST_LOCALE_WITH_ICU
295         std::cout << "Testing Shift-JIS using ICU/uconv" << std::endl;
296 
297         cvt.reset(boost::locale::impl_icu::create_uconv_converter("Shift-JIS"));
298         TEST(cvt.get());
299         test_shiftjis(cvt.release());
300         #endif
301 
302         #if defined(BOOST_LOCALE_WITH_ICONV) && !defined(BOOST_LOCALE_NO_POSIX_BACKEND)
303         std::cout << "Testing Shift-JIS using POSIX/iconv" << std::endl;
304 
305         cvt.reset(boost::locale::impl_posix::create_iconv_converter("Shift-JIS"));
306         if(cvt.get()) {
307             test_shiftjis(cvt.release());
308         }
309         else {
310             std::cout<< "- Shift-JIS is not supported!" << std::endl;
311         }
312         #endif
313 
314     }
315     catch(std::exception const &e) {
316         std::cerr << "Failed " << e.what() << std::endl;
317         return EXIT_FAILURE;
318     }
319     FINALIZE();
320 }
321 
322 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
323 // boostinspect:noascii
324