1 //
2 // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See
5 // accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
7 //
8
9 #include <boost/locale/encoding.hpp>
10 #include <boost/locale/generator.hpp>
11 #include <boost/locale/localization_backend.hpp>
12 #include <boost/locale/info.hpp>
13 #include <boost/locale/config.hpp>
14 #include <fstream>
15 #include "test_locale.hpp"
16 #include "test_locale_tools.hpp"
17
18
19 #ifndef BOOST_LOCALE_NO_POSIX_BACKEND
20 # ifdef __APPLE__
21 # include <xlocale.h>
22 # endif
23 # include <locale.h>
24 #endif
25
26 #if !defined(BOOST_LOCALE_WITH_ICU) && !defined(BOOST_LOCALE_WITH_ICONV) && (defined(BOOST_WINDOWS) || defined(__CYGWIN__))
27 #ifndef NOMINMAX
28 # define NOMINMAX
29 #endif
30 #include <windows.h>
31 #endif
32
33
34 bool test_iso;
35 bool test_iso_8859_8 = true;
36 bool test_utf;
37 bool test_sjis;
38
39 std::string he_il_8bit;
40 std::string en_us_8bit;
41 std::string ja_jp_shiftjis;
42
43
44 template<typename Char>
read_file(std::basic_istream<Char> & in)45 std::basic_string<Char> read_file(std::basic_istream<Char> &in)
46 {
47 std::basic_string<Char> res;
48 Char c;
49 while(in.get(c))
50 res+=c;
51 return res;
52 }
53
54
55 template<typename Char>
test_ok(std::string file,std::locale const & l,std::basic_string<Char> cmp=std::basic_string<Char> ())56 void test_ok(std::string file,std::locale const &l,std::basic_string<Char> cmp=std::basic_string<Char>())
57 {
58 if(cmp.empty())
59 cmp=to<Char>(file);
60 std::ofstream test("testi.txt");
61 test << file;
62 test.close();
63 typedef std::basic_fstream<Char> stream_type;
64
65 stream_type f1("testi.txt",stream_type::in);
66 f1.imbue(l);
67 TEST(read_file<Char>(f1) == cmp);
68 f1.close();
69
70 stream_type f2("testo.txt",stream_type::out);
71 f2.imbue(l);
72 f2 << cmp;
73 f2.close();
74
75 std::ifstream testo("testo.txt");
76 TEST(read_file<char>(testo) == file);
77 }
78
79 template<typename Char>
test_rfail(std::string file,std::locale const & l,int pos)80 void test_rfail(std::string file,std::locale const &l,int pos)
81 {
82 std::ofstream test("testi.txt");
83 test << file;
84 test.close();
85 typedef std::basic_fstream<Char> stream_type;
86
87 stream_type f1("testi.txt",stream_type::in);
88 f1.imbue(l);
89 Char c;
90 for(int i=0;i<pos;i++) {
91 f1.get(c);
92 if(f1.fail()) { // failed before as detected errors at forward;
93 return;
94 }
95 TEST(f1);
96 }
97 // if the pos above suceed, at this point
98 // it MUST fail
99 TEST(f1.get(c).fail());
100 }
101
102 template<typename Char>
test_wfail(std::string file,std::locale const & l,int pos)103 void test_wfail(std::string file,std::locale const &l,int pos)
104 {
105 typedef std::basic_fstream<Char> stream_type;
106 stream_type f1("testo.txt",stream_type::out);
107 f1.imbue(l);
108 std::basic_string<Char> out=to<Char>(file);
109 int i;
110 for(i=0;i<pos;i++) {
111 f1 << out.at(i);
112 f1<<std::flush;
113 TEST(f1.good());
114 }
115 f1 << out.at(i);
116 TEST(f1.fail() || (f1<<std::flush).fail());
117 }
118
119
120 template<typename Char>
test_for_char()121 void test_for_char()
122 {
123 boost::locale::generator g;
124 if(test_utf) {
125 std::cout << " UTF-8" << std::endl;
126 test_ok<Char>("grüße\nn i",g("en_US.UTF-8"));
127 test_rfail<Char>("abc\xFF\xFF",g("en_US.UTF-8"),3);
128 std::cout << " Testing codepoints above 0xFFFF" << std::endl;
129 std::cout << " Single U+2008A" << std::endl;
130 test_ok<Char>("\xf0\xa0\x82\x8a",g("en_US.UTF-8")); // U+2008A
131 std::cout << " Single U+2008A withing text" << std::endl;
132 test_ok<Char>("abc\"\xf0\xa0\x82\x8a\"",g("en_US.UTF-8")); // U+2008A
133 std::string one = "\xf0\xa0\x82\x8a";
134 std::string res;
135 for(unsigned i=0;i<1000;i++)
136 res+=one;
137 std::cout << " U+2008A x 1000" << std::endl;
138 test_ok<Char>(res.c_str(),g("en_US.UTF-8")); // U+2008A
139 }
140 else {
141 std::cout << " UTF-8 Not supported " << std::endl;
142 }
143
144 if(test_iso) {
145 if(test_iso_8859_8) {
146 std::cout << " ISO8859-8" << std::endl;
147 test_ok<Char>("hello \xf9\xec\xe5\xed",g(he_il_8bit),to<Char>("hello שלום"));
148 }
149 std::cout << " ISO8859-1" << std::endl;
150 test_ok<Char>(to<char>("grüße\nn i"),g(en_us_8bit),to<Char>("grüße\nn i"));
151 test_wfail<Char>("grüßen שלום",g(en_us_8bit),7);
152 }
153
154 if(test_sjis) {
155 std::cout << " Shift-JIS" << std::endl;
156 test_ok<Char>("\x93\xfa\x96\x7b",g(ja_jp_shiftjis),
157 boost::locale::conv::to_utf<Char>("\xe6\x97\xa5\xe6\x9c\xac","UTF-8")); // Japan
158 }
159 }
test_wide_io()160 void test_wide_io()
161 {
162 std::cout << " wchar_t" << std::endl;
163 test_for_char<wchar_t>();
164
165 #if defined BOOST_LOCALE_ENABLE_CHAR16_T && !defined(BOOST_NO_CHAR16_T_CODECVT)
166 std::cout << " char16_t" << std::endl;
167 test_for_char<char16_t>();
168 #endif
169 #if defined BOOST_LOCALE_ENABLE_CHAR32_T && !defined(BOOST_NO_CHAR32_T_CODECVT)
170 std::cout << " char32_t" << std::endl;
171 test_for_char<char32_t>();
172 #endif
173 }
174
175 template<typename Char>
test_pos(std::string source,std::basic_string<Char> target,std::string encoding)176 void test_pos(std::string source,std::basic_string<Char> target,std::string encoding)
177 {
178 using namespace boost::locale::conv;
179 boost::locale::generator g;
180 std::locale l= encoding == "ISO8859-8" ? g("he_IL."+encoding) : g("en_US."+encoding);
181 TEST(to_utf<Char>(source,encoding)==target);
182 TEST(to_utf<Char>(source.c_str(),encoding)==target);
183 TEST(to_utf<Char>(source.c_str(),source.c_str()+source.size(),encoding)==target);
184
185 TEST(to_utf<Char>(source,l)==target);
186 TEST(to_utf<Char>(source.c_str(),l)==target);
187 TEST(to_utf<Char>(source.c_str(),source.c_str()+source.size(),l)==target);
188
189 TEST(from_utf<Char>(target,encoding)==source);
190 TEST(from_utf<Char>(target.c_str(),encoding)==source);
191 TEST(from_utf<Char>(target.c_str(),target.c_str()+target.size(),encoding)==source);
192
193 TEST(from_utf<Char>(target,l)==source);
194 TEST(from_utf<Char>(target.c_str(),l)==source);
195 TEST(from_utf<Char>(target.c_str(),target.c_str()+target.size(),l)==source);
196 }
197
198 #define TESTF(X) TEST_THROWS(X,boost::locale::conv::conversion_error)
199
200 template<typename Char>
test_to_neg(std::string source,std::basic_string<Char> target,std::string encoding)201 void test_to_neg(std::string source,std::basic_string<Char> target,std::string encoding)
202 {
203 using namespace boost::locale::conv;
204 boost::locale::generator g;
205 std::locale l=g("en_US."+encoding);
206
207 TEST(to_utf<Char>(source,encoding)==target);
208 TEST(to_utf<Char>(source.c_str(),encoding)==target);
209 TEST(to_utf<Char>(source.c_str(),source.c_str()+source.size(),encoding)==target);
210 TEST(to_utf<Char>(source,l)==target);
211 TEST(to_utf<Char>(source.c_str(),l)==target);
212 TEST(to_utf<Char>(source.c_str(),source.c_str()+source.size(),l)==target);
213
214 TESTF(to_utf<Char>(source,encoding,stop));
215 TESTF(to_utf<Char>(source.c_str(),encoding,stop));
216 TESTF(to_utf<Char>(source.c_str(),source.c_str()+source.size(),encoding,stop));
217 TESTF(to_utf<Char>(source,l,stop));
218 TESTF(to_utf<Char>(source.c_str(),l,stop));
219 TESTF(to_utf<Char>(source.c_str(),source.c_str()+source.size(),l,stop));
220 }
221
222 template<typename Char>
test_from_neg(std::basic_string<Char> source,std::string target,std::string encoding)223 void test_from_neg(std::basic_string<Char> source,std::string target,std::string encoding)
224 {
225 using namespace boost::locale::conv;
226 boost::locale::generator g;
227 std::locale l=g("en_US."+encoding);
228
229 TEST(from_utf<Char>(source,encoding)==target);
230 TEST(from_utf<Char>(source.c_str(),encoding)==target);
231 TEST(from_utf<Char>(source.c_str(),source.c_str()+source.size(),encoding)==target);
232 TEST(from_utf<Char>(source,l)==target);
233 TEST(from_utf<Char>(source.c_str(),l)==target);
234 TEST(from_utf<Char>(source.c_str(),source.c_str()+source.size(),l)==target);
235
236 TESTF(from_utf<Char>(source,encoding,stop));
237 TESTF(from_utf<Char>(source.c_str(),encoding,stop));
238 TESTF(from_utf<Char>(source.c_str(),source.c_str()+source.size(),encoding,stop));
239 TESTF(from_utf<Char>(source,l,stop));
240 TESTF(from_utf<Char>(source.c_str(),l,stop));
241 TESTF(from_utf<Char>(source.c_str(),source.c_str()+source.size(),l,stop));
242 }
243
244 template<typename Char>
utf(char const * s)245 std::basic_string<Char> utf(char const *s)
246 {
247 return to<Char>(s);
248 }
249
250 template<>
utf(char const * s)251 std::basic_string<char> utf(char const *s)
252 {
253 return s;
254 }
255
256 template<typename Char>
test_with_0()257 void test_with_0()
258 {
259 std::string a("abc\0\0 yz\0",3+2+3+1);
260 TEST(boost::locale::conv::from_utf<Char>(boost::locale::conv::to_utf<Char>(a,"UTF-8"),"UTF-8") == a);
261 TEST(boost::locale::conv::from_utf<Char>(boost::locale::conv::to_utf<Char>(a,"ISO8859-1"),"ISO8859-1") == a);
262 }
263
264 template<typename Char,int n=sizeof(Char)>
265 struct utfutf;
266
267 template<>
268 struct utfutf<char,1> {
okutfutf269 static char const *ok() {return "grüßen";}
badutfutf270 static char const *bad() { return "gr\xFF" "üßen"; }
271 // split into 2 to make SunCC happy
272 };
273
274 template<>
275 struct utfutf<wchar_t,2> {
okutfutf276 static wchar_t const *ok(){ return L"\x67\x72\xfc\xdf\x65\x6e"; }
badutfutf277 static wchar_t const *bad() {
278 static wchar_t buf[256] = L"\x67\x72\xFF\xfc\xFE\xFD\xdf\x65\x6e";
279 buf[2]=0xDC01; // second surrogate must not be
280 buf[4]=0xD801; // First
281 buf[5]=0xD801; // Must be surrogate trail
282 return buf;
283 }
284 };
285 template<>
286 struct utfutf<wchar_t,4> {
okutfutf287 static wchar_t const *ok(){ return L"\x67\x72\xfc\xdf\x65\x6e"; }
badutfutf288 static wchar_t const *bad() {
289 static wchar_t buf[256] = L"\x67\x72\xFF\xfc\xdf\x65\x6e";
290 buf[2]=static_cast<wchar_t>(0x1000000); // > 10FFFF
291 return buf;
292 }
293 };
294
295
296 template<typename CharOut,typename CharIn>
test_combinations()297 void test_combinations()
298 {
299 using boost::locale::conv::utf_to_utf;
300 typedef utfutf<CharOut> out;
301 typedef utfutf<CharIn> in;
302 TEST( (utf_to_utf<CharOut,CharIn>(in::ok())==out::ok()) );
303 TESTF( (utf_to_utf<CharOut,CharIn>(in::bad(),boost::locale::conv::stop)) );
304 TEST( (utf_to_utf<CharOut,CharIn>(in::bad())==out::ok()) );
305 }
306
test_all_combinations()307 void test_all_combinations()
308 {
309 std::cout << "Testing utf_to_utf" << std::endl;
310 std::cout <<" char<-char"<<std::endl;
311 test_combinations<char,char>();
312 std::cout <<" char<-wchar"<<std::endl;
313 test_combinations<char,wchar_t>();
314 std::cout <<" wchar<-char"<<std::endl;
315 test_combinations<wchar_t,char>();
316 std::cout <<" wchar<-wchar"<<std::endl;
317 test_combinations<wchar_t,wchar_t>();
318 }
319
320 template<typename Char>
test_to()321 void test_to()
322 {
323 test_pos<Char>(to<char>("grüßen"),utf<Char>("grüßen"),"ISO8859-1");
324 if(test_iso_8859_8)
325 test_pos<Char>("\xf9\xec\xe5\xed",utf<Char>("שלום"),"ISO8859-8");
326 test_pos<Char>("grüßen",utf<Char>("grüßen"),"UTF-8");
327 test_pos<Char>("abc\"\xf0\xa0\x82\x8a\"",utf<Char>("abc\"\xf0\xa0\x82\x8a\""),"UTF-8");
328
329 test_to_neg<Char>("g\xFFrüßen",utf<Char>("grüßen"),"UTF-8");
330 test_from_neg<Char>(utf<Char>("hello שלום"),"hello ","ISO8859-1");
331
332 test_with_0<Char>();
333 }
334
335
test_skip(char const * enc,char const * utf,char const * name,char const * opt=0)336 void test_skip(char const *enc,char const *utf,char const *name,char const *opt=0)
337 {
338 if(opt!=0) {
339 if(boost::locale::conv::to_utf<char>(enc,name) == opt) {
340 test_skip(enc,opt,name);
341 return;
342 }
343 }
344 TEST(boost::locale::conv::to_utf<char>(enc,name) == utf);
345 TEST(boost::locale::conv::to_utf<wchar_t>(enc,name) == boost::locale::conv::utf_to_utf<wchar_t>(utf));
346 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
347 TEST(boost::locale::conv::to_utf<char16_t>(enc,name) == boost::locale::conv::utf_to_utf<char16_t>(utf));
348 #endif
349 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
350 TEST(boost::locale::conv::to_utf<char32_t>(enc,name) == boost::locale::conv::utf_to_utf<char32_t>(utf));
351 #endif
352 }
353
test_simple_conversions()354 void test_simple_conversions()
355 {
356 namespace blc=boost::locale::conv;
357 std::cout << "- Testing correct invalid bytes skipping" << std::endl;
358 try {
359 std::cout << "-- ISO-8859-8" << std::endl;
360 test_skip("test \xE0\xE1\xFB-","test \xd7\x90\xd7\x91-","ISO-8859-8");
361 test_skip("\xFB","","ISO-8859-8");
362 test_skip("test \xE0\xE1\xFB","test \xd7\x90\xd7\x91","ISO-8859-8");
363 test_skip("\xFB-","-","ISO-8859-8");
364 }
365 catch(blc::invalid_charset_error const &) {
366 std::cout <<"--- not supported" << std::endl;
367 }
368 try {
369 std::cout << "-- cp932" << std::endl;
370 test_skip("test\xE0\xA0 \x83\xF8-","test\xe7\x87\xbf -","cp932","test\xe7\x87\xbf ");
371 test_skip("\x83\xF8","","cp932");
372 test_skip("test\xE0\xA0 \x83\xF8","test\xe7\x87\xbf ","cp932");
373 test_skip("\x83\xF8-","-","cp932","");
374 }
375 catch(blc::invalid_charset_error const &) {
376 std::cout <<"--- not supported" << std::endl;
377 }
378 }
379
380
main()381 int main()
382 {
383 try {
384 std::vector<std::string> def;
385 #ifdef BOOST_LOCALE_WITH_ICU
386 def.push_back("icu");
387 #endif
388 #ifndef BOOST_LOCALE_NO_STD_BACKEND
389 def.push_back("std");
390 #endif
391 #ifndef BOOST_LOCALE_NO_WINAPI_BACKEND
392 def.push_back("winapi");
393 #endif
394 #ifndef BOOST_LOCALE_NO_POSIX_BACKEND
395 def.push_back("posix");
396 #endif
397
398 #if !defined(BOOST_LOCALE_WITH_ICU) && !defined(BOOST_LOCALE_WITH_ICONV) && (defined(BOOST_WINDOWS) || defined(__CYGWIN__))
399 test_iso_8859_8 = IsValidCodePage(28598)!=0;
400 #endif
401
402 test_simple_conversions();
403
404
405 for(int type = 0; type < int(def.size()); type ++ ) {
406 boost::locale::localization_backend_manager tmp_backend = boost::locale::localization_backend_manager::global();
407 tmp_backend.select(def[type]);
408 boost::locale::localization_backend_manager::global(tmp_backend);
409
410 std::string bname = def[type];
411
412 if(bname=="std") {
413 en_us_8bit = get_std_name("en_US.ISO8859-1");
414 he_il_8bit = get_std_name("he_IL.ISO8859-8");
415 ja_jp_shiftjis = get_std_name("ja_JP.SJIS");
416 if(!ja_jp_shiftjis.empty() && !test_std_supports_SJIS_codecvt(ja_jp_shiftjis))
417 {
418 std::cout << "Warning: detected unproper support of " << ja_jp_shiftjis << " locale, disableling it" << std::endl;
419 ja_jp_shiftjis = "";
420 }
421 }
422 else {
423 en_us_8bit = "en_US.ISO8859-1";
424 he_il_8bit = "he_IL.ISO8859-8";
425 ja_jp_shiftjis = "ja_JP.SJIS";
426 }
427
428 std::cout << "Testing for backend " << def[type] << std::endl;
429
430 test_iso = true;
431 if(bname=="std" && (he_il_8bit.empty() || en_us_8bit.empty())) {
432 std::cout << "no iso locales availible, passing" << std::endl;
433 test_iso = false;
434 }
435 test_sjis = true;
436 if(bname=="std" && ja_jp_shiftjis.empty()) {
437 test_sjis = false;
438 }
439 if(bname=="winapi") {
440 test_iso = false;
441 test_sjis = false;
442 }
443 test_utf = true;
444 #ifndef BOOST_LOCALE_NO_POSIX_BACKEND
445 if(bname=="posix") {
446 {
447 locale_t l = newlocale(LC_ALL_MASK,he_il_8bit.c_str(),0);
448 if(!l)
449 test_iso = false;
450 else
451 freelocale(l);
452 }
453 {
454 locale_t l = newlocale(LC_ALL_MASK,en_us_8bit.c_str(),0);
455 if(!l)
456 test_iso = false;
457 else
458 freelocale(l);
459 }
460 {
461 locale_t l = newlocale(LC_ALL_MASK,"en_US.UTF-8",0);
462 if(!l)
463 test_utf = false;
464 else
465 freelocale(l);
466 }
467 #ifdef BOOST_LOCALE_WITH_ICONV
468 {
469 locale_t l = newlocale(LC_ALL_MASK,ja_jp_shiftjis.c_str(),0);
470 if(!l)
471 test_sjis = false;
472 else
473 freelocale(l);
474 }
475 #else
476 test_sjis = false;
477 #endif
478 }
479 #endif
480
481 if(def[type]=="std" && (get_std_name("en_US.UTF-8").empty() || get_std_name("he_IL.UTF-8").empty()))
482 {
483 test_utf = false;
484 }
485
486 std::cout << "Testing wide I/O" << std::endl;
487 test_wide_io();
488 std::cout << "Testing charset to/from UTF conversion functions" << std::endl;
489 std::cout << " char" << std::endl;
490 test_to<char>();
491 std::cout << " wchar_t" << std::endl;
492 test_to<wchar_t>();
493 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
494 if(bname == "icu" || bname == "std") {
495 std::cout << " char16_t" << std::endl;
496 test_to<char16_t>();
497 }
498 #endif
499 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
500 if(bname == "icu" || bname == "std") {
501 std::cout << " char32_t" << std::endl;
502 test_to<char32_t>();
503 }
504 #endif
505
506 test_all_combinations();
507 }
508 }
509 catch(std::exception const &e) {
510 std::cerr << "Failed " << e.what() << std::endl;
511 return EXIT_FAILURE;
512 }
513 FINALIZE();
514 }
515
516 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
517 // boostinspect:noascii
518