1 //
2 // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See
5 // accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
7 //
8
9 #ifndef BOOST_LOCALE_WITH_ICU
10 #include <iostream>
main()11 int main()
12 {
13 std::cout << "ICU is not build... Skipping" << std::endl;
14 }
15 #else
16
17 #define BOOST_LOCALE_ERROR_LIMIT 100000
18
19 #include <boost/locale/boundary.hpp>
20 #include <boost/locale/generator.hpp>
21 #include "test_locale.hpp"
22 #include "test_locale_tools.hpp"
23 #include <list>
24 #include <unicode/uversion.h>
25
26 // Debugging code
27
28 template<typename Char>
print_str(std::basic_string<Char> const &)29 void print_str(std::basic_string<Char> const &/*s*/)
30 {
31 }
32
33 template<>
print_str(std::basic_string<char> const & s)34 void print_str<char>(std::basic_string<char> const &s)
35 {
36 std::cout << "[" << s <<"]" << std::endl;
37 }
38
39
40 namespace lb = boost::locale::boundary;
41
42 template<typename Char,typename Iterator>
test_word_container(Iterator begin,Iterator end,std::vector<int> const & ipos,std::vector<int> const & imasks,std::vector<std::basic_string<Char>> const & ichunks,std::locale l,lb::boundary_type bt=lb::word)43 void test_word_container(Iterator begin,Iterator end,
44 std::vector<int> const &ipos,
45 std::vector<int> const &imasks,
46 std::vector<std::basic_string<Char> > const &ichunks,
47 std::locale l,
48 lb::boundary_type bt=lb::word
49 )
50 {
51 for(int sm=(bt == lb::word ? 31 : 3 ) ;sm>=0;sm--) {
52 unsigned mask =
53 ((sm & 1 ) != 0) * 0xF
54 + ((sm & 2 ) != 0) * 0xF0
55 + ((sm & 4 ) != 0) * 0xF00
56 + ((sm & 8 ) != 0) * 0xF000
57 + ((sm & 16) != 0) * 0xF0000;
58
59 std::vector<int> masks,pos;
60 std::vector<unsigned> bmasks;
61 std::basic_string<Char> empty_chunk;
62
63 std::vector<std::basic_string<Char> > chunks;
64 std::vector<std::basic_string<Char> > fchunks;
65 std::vector<Iterator> iters;
66 iters.push_back(begin);
67 bmasks.push_back(0);
68
69 for(unsigned i=0;i<imasks.size();i++) {
70 if(imasks[i] & mask) {
71 masks.push_back(imasks[i]);
72 chunks.push_back(ichunks[i]);
73 fchunks.push_back(empty_chunk + ichunks[i]);
74 empty_chunk.clear();
75 pos.push_back(ipos[i]);
76 }
77 else {
78 empty_chunk+=ichunks[i];
79 }
80
81 if((imasks[i] & mask) || i==imasks.size()-1){
82 Iterator ptr=begin;
83 std::advance(ptr,ipos[i]);
84 iters.push_back(ptr);
85 bmasks.push_back(imasks[i]);
86 }
87 }
88
89 //
90 // segment iterator tests
91 //
92 {
93 lb::segment_index<Iterator> map(bt,begin,end,l);
94 typedef typename lb::segment_index<Iterator>::iterator iter_type;
95
96 map.rule(mask);
97
98 {
99 unsigned i=0;
100 iter_type p;
101 map.full_select(false);
102 for(p=map.begin();p!=map.end();++p,i++) {
103 TEST(p->str()==chunks[i]);
104 TEST(p->rule() == unsigned(masks[i]));
105 }
106
107 TEST(chunks.size() == i);
108 for(;;) {
109 if(p==map.begin()) {
110 TEST(i==0);
111 break;
112 }
113 else {
114 --p;
115 TEST(p->str()==chunks[--i]);
116 TEST(p->rule() == unsigned(masks[i]));
117 }
118 }
119 for(i=0,p=map.end();i<chunks.size();i++){
120 --p;
121 unsigned index = chunks.size() - i - 1;
122 TEST(p->str()==chunks[index]);
123 TEST(p->rule() == unsigned(masks[index]));
124 }
125 TEST(p==map.begin());
126 }
127
128 {
129 unsigned i=0;
130 iter_type p;
131 map.full_select(true);
132 for(p=map.begin();p!=map.end();++p,i++) {
133 TEST(p->str()==fchunks[i]);
134 TEST(p->rule() == unsigned(masks[i]));
135 }
136
137 TEST(chunks.size() == i);
138
139 for(;;) {
140 if(p==map.begin()) {
141 TEST(i==0);
142 break;
143 }
144 else {
145 --p;
146 if(p->str()!=fchunks[i-1]) {
147 print_str(p->str());
148 print_str(fchunks[i-1]);
149 }
150 TEST(p->str()==fchunks[--i]);
151 TEST(p->rule() == unsigned(masks[i]));
152 }
153 }
154
155 for(i=0,p=map.end();i<chunks.size();i++){
156 --p;
157 unsigned index = chunks.size() - i - 1;
158 TEST(p->str()==fchunks[index]);
159 TEST(p->rule() == unsigned(masks[index]));
160 }
161 TEST(p==map.begin());
162 }
163
164 {
165 iter_type p;
166 unsigned chunk_ptr=0;
167 unsigned i=0;
168 map.full_select(false);
169 for(Iterator optr=begin;optr!=end;optr++,i++) {
170 p=map.find(optr);
171 if(chunk_ptr < pos.size() && i>=unsigned(pos[chunk_ptr])){
172 chunk_ptr++;
173 }
174 if(chunk_ptr>=pos.size()) {
175 TEST(p==map.end());
176 }
177 else {
178 TEST(p->str()==chunks[chunk_ptr]);
179 TEST(p->rule()==unsigned(masks[chunk_ptr]));
180 }
181 }
182 }
183 {
184 iter_type p;
185 unsigned chunk_ptr=0;
186 unsigned i=0;
187 map.full_select(true);
188 for(Iterator optr=begin;optr!=end;optr++,i++) {
189 p=map.find(optr);
190 if(chunk_ptr < pos.size() && i>=unsigned(pos[chunk_ptr])){
191 chunk_ptr++;
192 }
193 if(chunk_ptr>=pos.size()) {
194 TEST(p==map.end());
195 }
196 else {
197 TEST(p->str()==fchunks[chunk_ptr]);
198 TEST(p->rule()==unsigned(masks[chunk_ptr]));
199 }
200 }
201 }
202
203 } // segment iterator tests
204
205 { // break iterator tests
206 lb::boundary_point_index<Iterator> map(bt,begin,end,l);
207 typedef typename lb::boundary_point_index<Iterator>::iterator iter_type;
208
209 map.rule(mask);
210
211 unsigned i=0;
212 iter_type p;
213 for(p=map.begin();p!=map.end();++p,i++) {
214 TEST(p->iterator()==iters[i]);
215 TEST(p->rule()==bmasks[i]);
216 }
217
218 TEST(iters.size() == i);
219
220 do {
221 --p;
222 --i;
223 TEST(p->iterator()==iters.at(i));
224 } while(p!=map.begin());
225 TEST(i==0);
226
227 unsigned iters_ptr=0;
228 for(Iterator optr=begin;optr!=end;optr++) {
229 p=map.find(optr);
230 TEST(p->iterator()==iters[iters_ptr]);
231 if(iters.at(iters_ptr)==optr)
232 iters_ptr++;
233 }
234
235 } // break iterator tests
236
237 { // copy test
238 typedef lb::segment_index<Iterator> ti_type;
239 typedef lb::boundary_point_index<Iterator> bi_type;
240 { // segment to bound
241 ti_type ti(bt,begin,end,l);
242 ti.rule(mask);
243 {
244 bi_type bi(ti);
245 bi.rule(mask);
246 unsigned i=0;
247 typename bi_type::iterator p;
248 for(p=bi.begin();p!=bi.end();++p,i++) {
249 TEST(p->iterator()==iters[i]);
250 TEST(p->rule()==bmasks[i]);
251 }
252 }
253 {
254 bi_type bi;
255 bi.rule(mask);
256 bi = ti;
257 unsigned i=0;
258 typename bi_type::iterator p;
259 for(p=bi.begin();p!=bi.end();++p,i++) {
260 TEST(p->iterator()==iters[i]);
261 TEST(p->rule()==bmasks[i]);
262 }
263 }
264 // boundary_point to bound
265 bi_type bi_2(bt,begin,end,l);
266 bi_2.rule(mask);
267 {
268 bi_type bi(bi_2);
269 unsigned i=0;
270 typename bi_type::iterator p;
271 for(p=bi.begin();p!=bi.end();++p,i++) {
272 TEST(p->iterator()==iters[i]);
273 TEST(p->rule()==bmasks[i]);
274 }
275 }
276 {
277 bi_type bi;
278 bi = bi_2;
279 unsigned i=0;
280 typename bi_type::iterator p;
281 for(p=bi.begin();p!=bi.end();++p,i++) {
282 TEST(p->iterator()==iters[i]);
283 TEST(p->rule()==bmasks[i]);
284 }
285 }
286 }
287 { // boundary_point to segment
288 bi_type bi(bt,begin,end,l);
289 {
290 ti_type ti(bi);
291 ti.rule(mask);
292 unsigned i=0;
293 typename ti_type::iterator p;
294 for(p=ti.begin();p!=ti.end();++p,i++) {
295 TEST(p->str()==chunks[i]);
296 TEST(p->rule()==unsigned(masks[i]));
297 }
298 }
299 {
300 ti_type ti;
301 ti.rule(mask);
302 ti = (bi);
303 unsigned i=0;
304 typename ti_type::iterator p;
305 for(p=ti.begin();p!=ti.end();++p,i++) {
306 TEST(p->str()==chunks[i]);
307 TEST(p->rule()==unsigned(masks[i]));
308 }
309 }
310 ti_type ti_2(bt,begin,end,l);
311 ti_2.rule(mask);
312 {
313 ti_type ti(ti_2);
314 unsigned i=0;
315 typename ti_type::iterator p;
316 for(p=ti.begin();p!=ti.end();++p,i++) {
317 TEST(p->str()==chunks[i]);
318 TEST(p->rule()==unsigned(masks[i]));
319 }
320 }
321 {
322 ti_type ti;
323 ti = (ti_2);
324 unsigned i=0;
325 typename ti_type::iterator p;
326 for(p=ti.begin();p!=ti.end();++p,i++) {
327 TEST(p->str()==chunks[i]);
328 TEST(p->rule()==unsigned(masks[i]));
329 }
330 }
331 }
332 }
333 } // for mask
334
335 }
336
337 template<typename Char>
run_word(std::string * original,int * none,int * num,int * word,int * kana,int * ideo,std::locale l,lb::boundary_type b=lb::word)338 void run_word(std::string *original,int *none,int *num,int *word,int *kana,int *ideo,std::locale l,lb::boundary_type b=lb::word)
339 {
340 std::vector<int> pos;
341 std::vector<std::basic_string<Char> > chunks;
342 std::vector<int> masks;
343 std::basic_string<Char> test_string;
344 for(int i=0;!original[i].empty();i++) {
345 chunks.push_back(to_correct_string<Char>(original[i],l));
346 test_string+=chunks.back();
347 pos.push_back(test_string.size());
348 masks.push_back(
349 ( none ? none[i]*15 : 0)
350 | ( num ? ((num[i]*15) << 4) : 0)
351 | ( word ? ((word[i]*15) << 8) : 0)
352 | ( kana ? ((kana[i]*15) << 12) : 0)
353 | ( ideo ? ((ideo[i]*15) << 16) : 0)
354 );
355 }
356
357 std::list<Char> lst(test_string.begin(),test_string.end());
358 test_word_container<Char>(lst.begin(),lst.end(),pos,masks,chunks,l,b);
359 test_word_container<Char>(test_string.begin(),test_string.end(),pos,masks,chunks,l,b);
360 }
361
362 std::string character[]={"שָ","ל","וֹ","ם","!",""};
363 int nones[]={1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
364
365 std::string sentence1[]={"To be\n","or not\n","to be?\n"," That is the question. ","Or maybe not",""};
366 int sentence1a[]={ 0, 0, 1, 1, 0, 0};
367 int sentence1b[]={ 1, 1, 0, 0, 1, 0};
368
369 std::string line1[]={"To ","be\n","or ","not\n","to ","be",""};
370 int line1a[]={ 1, 0, 1 , 0, 1, 1 , 0 };
371 int line1b[]={ 0, 1, 0 , 1, 0, 0 , 0 };
372
373
test_boundaries(std::string * all,int * first,int * second,lb::boundary_type t)374 void test_boundaries(std::string *all,int *first,int *second,lb::boundary_type t)
375 {
376 boost::locale::generator g;
377 std::cout << " char UTF-8" << std::endl;
378 run_word<char>(all,first,second,0,0,0,g("he_IL.UTF-8"),t);
379 std::cout << " char CP1255" << std::endl;
380 run_word<char>(all,first,second,0,0,0,g("he_IL.cp1255"),t);
381 std::cout << " wchar_t"<<std::endl;
382 run_word<wchar_t>(all,first,second,0,0,0,g("he_IL.UTF-8"),t);
383 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
384 std::cout << " char16_t"<<std::endl;
385 run_word<char16_t>(all,first,second,0,0,0,g("he_IL.UTF-8"),t);
386 #endif
387 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
388 std::cout << " char32_t"<<std::endl;
389 run_word<char32_t>(all,first,second,0,0,0,g("he_IL.UTF-8"),t);
390 #endif
391
392 }
393
word_boundary()394 void word_boundary()
395 {
396 boost::locale::generator g;
397 //std::string all1[]={"10"," ","Hello"," ","Windows7"," ","平仮名","ひらがな","ヒラガナ",""};
398 //std::string all1[]={"10"," ","Hello"," ","Windows7"," ","平仮名","ひん","アヒル",""};
399 std::string all1[]={"10"," ","Hello"," ","Windows7"," ","平仮名","アヒル",""};
400 int none1[]={ 0, 1, 0, 1, 0, 1, 0, 0, 0};
401 int num1[]={ 1, 0, 0, 0, 1, 0, 0 , 0 , 0};
402 int word1[]={ 0, 0, 1, 0, 1, 0, 0 , 0 , 0};
403 #if U_ICU_VERSION_MAJOR_NUM >= 50
404 int kana1[]={ 0, 0, 0, 0, 0, 0, 0, 0 , 0};
405 int ideo1[]={ 0, 0, 0, 0, 0, 0, 1, 1 , 1};
406 #else
407 int kana1[]={ 0, 0, 0, 0, 0, 0, 0, 1 , 1};
408 int ideo1[]={ 0, 0, 0, 0, 0, 0, 1, 0 , 0};
409 #endif
410
411
412 int zero[25]={0};
413 std::string all2[]={""};
414
415 std::string all3[]={" "," ","Hello",",","World","!"," ",""};
416 int none3[]={ 1, 1, 0, 1, 0, 1, 1, 0};
417 int word3[]={ 0, 0, 1, 0, 1, 0, 0, 0};
418
419 std::cout << " char UTF-8" << std::endl;
420 run_word<char>(all1,none1,num1,word1,kana1,ideo1,g("ja_JP.UTF-8"));
421 run_word<char>(all2,zero,zero,zero,zero,zero,g("en_US.UTF-8"));
422 run_word<char>(all3,none3,zero,word3,zero,zero,g("en_US.UTF-8"));
423
424 std::cout << " char Shift-JIS" << std::endl;
425 run_word<char>(all1,none1,num1,word1,kana1,ideo1,g("ja_JP.Shift-JIS"));
426 run_word<char>(all2,zero,zero,zero,zero,zero,g("ja_JP.Shift-JIS"));
427 run_word<char>(all3,none3,zero,word3,zero,zero,g("ja_JP.Shift-JIS"));
428
429 std::cout << " wchar_t"<<std::endl;
430 run_word<wchar_t>(all1,none1,num1,word1,kana1,ideo1,g("ja_JP.UTF-8"));
431 run_word<wchar_t>(all2,zero,zero,zero,zero,zero,g("en_US.UTF-8"));
432 run_word<wchar_t>(all3,none3,zero,word3,zero,zero,g("en_US.UTF-8"));
433
434 #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
435 std::cout << " char16_t"<<std::endl;
436 run_word<char16_t>(all1,none1,num1,word1,kana1,ideo1,g("ja_JP.UTF-8"));
437 run_word<char16_t>(all2,zero,zero,zero,zero,zero,g("en_US.UTF-8"));
438 run_word<char16_t>(all3,none3,zero,word3,zero,zero,g("en_US.UTF-8"));
439 #endif
440
441 #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
442 std::cout << " char32_t"<<std::endl;
443 run_word<char32_t>(all1,none1,num1,word1,kana1,ideo1,g("ja_JP.UTF-8"));
444 run_word<char32_t>(all2,zero,zero,zero,zero,zero,g("en_US.UTF-8"));
445 run_word<char32_t>(all3,none3,zero,word3,zero,zero,g("en_US.UTF-8"));
446 #endif
447 }
test_op_one_side(std::string const & sl,std::string const & sr,int val)448 void test_op_one_side(std::string const &sl,std::string const &sr,int val)
449 {
450 boost::locale::boundary::ssegment l(sl.begin(),sl.end(),0),r(sr.begin(),sr.end(),0);
451
452 // segment
453 TEST( (l==r) == (val==0));
454 TEST( (l!=r) == (val!=0));
455 TEST( (l<=r) == (val<=0));
456 TEST( (l< r) == (val<0));
457 TEST( (l>=r) == (val>=0));
458 TEST( (l> r) == (val>0));
459
460 // C string
461 TEST( (l==sr.c_str()) == (val==0));
462 TEST( (l!=sr.c_str()) == (val!=0));
463 TEST( (l<=sr.c_str()) == (val<=0));
464 TEST( (l< sr.c_str()) == (val<0));
465 TEST( (l>=sr.c_str()) == (val>=0));
466 TEST( (l> sr.c_str()) == (val>0));
467
468 TEST( (sl.c_str()==r) == (val==0));
469 TEST( (sl.c_str()!=r) == (val!=0));
470 TEST( (sl.c_str()<=r) == (val<=0));
471 TEST( (sl.c_str()< r) == (val<0));
472 TEST( (sl.c_str()>=r) == (val>=0));
473 TEST( (sl.c_str()> r) == (val>0));
474
475
476 // C++ string
477 TEST( (l==sr) == (val==0));
478 TEST( (l!=sr) == (val!=0));
479 TEST( (l<=sr) == (val<=0));
480 TEST( (l< sr) == (val<0));
481 TEST( (l>=sr) == (val>=0));
482 TEST( (l> sr) == (val>0));
483
484 TEST( (sl==r) == (val==0));
485 TEST( (sl!=r) == (val!=0));
486 TEST( (sl<=r) == (val<=0));
487 TEST( (sl< r) == (val<0));
488 TEST( (sl>=r) == (val>=0));
489 TEST( (sl> r) == (val>0));
490 // self check
491 TEST( (sl==sr) == (val==0));
492 TEST( (sl!=sr) == (val!=0));
493 TEST( (sl<=sr) == (val<=0));
494 TEST( (sl< sr) == (val<0));
495 TEST( (sl>=sr) == (val>=0));
496 TEST( (sl> sr) == (val>0));
497
498 }
499
test_op(std::string const & sl,std::string const & sr,int val)500 void test_op(std::string const &sl,std::string const &sr,int val)
501 {
502 test_op_one_side(sl,sr,val);
503 test_op_one_side(sr,sl,-val);
504 }
segment_operator()505 void segment_operator()
506 {
507 test_op("","a",-1);
508 test_op("","",0);
509 test_op("aa","aaa",-1);
510 test_op("aa","ab",-1);
511 }
512
main()513 int main()
514 {
515 try {
516 std::cout << "Testing segment operators" << std::endl;
517 segment_operator();
518 std::cout << "Testing word boundary" << std::endl;
519 word_boundary();
520 std::cout << "Testing character boundary" << std::endl;
521 test_boundaries(character,nones,0,lb::character);
522 std::cout << "Testing sentence boundary" << std::endl;
523 test_boundaries(sentence1,sentence1a,sentence1b,lb::sentence);
524 std::cout << "Testing line boundary" << std::endl;
525 test_boundaries(line1,line1a,line1b,lb::line);
526 }
527 catch(std::exception const &e) {
528 std::cerr << "Failed " << e.what() << std::endl;
529 return EXIT_FAILURE;
530 }
531 FINALIZE();
532 }
533
534 #endif // NOICU
535 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
536
537 // boostinspect:noascii
538