1 //
2 // Copyright (c) 2015 Artyom Beilis (Tonkikh)
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See
5 // accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
7 //
8 #include <boost/locale/utf8_codecvt.hpp>
9 #include <locale>
10 #include <iostream>
11 #include <iomanip>
12 #include <string.h>
13 #include <wchar.h>
14 #include <memory.h>
15 #define BOOST_LOCALE_ERROR_LIMIT -1
16 #include "test_locale.hpp"
17
18 static char const *utf8_name = "\xf0\x9d\x92\x9e-\xD0\xBF\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82-\xE3\x82\x84\xE3\x81\x82.txt";
19 static wchar_t const *wide_name = L"\U0001D49E-\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042.txt";
20
res(std::codecvt_base::result r)21 char const *res(std::codecvt_base::result r)
22 {
23 switch(r){
24 case std::codecvt_base::ok: return "ok";
25 case std::codecvt_base::partial: return "partial";
26 case std::codecvt_base::error: return "error";
27 case std::codecvt_base::noconv: return "noconv";
28 default:
29 return "error";
30 }
31 }
32
33 typedef std::codecvt<wchar_t,char,std::mbstate_t> cvt_type;
34
test_codecvt_in_n_m(cvt_type const & cvt,int n,int m)35 void test_codecvt_in_n_m(cvt_type const &cvt,int n,int m)
36 {
37 wchar_t const *wptr = wide_name;
38 int wlen = wcslen(wide_name);
39 int u8len = strlen(utf8_name);
40 char const *from = utf8_name;
41 char const *end = from;
42 char const *real_end = utf8_name + u8len;
43 char const *from_next = from;
44 std::mbstate_t mb=std::mbstate_t();
45 while(from_next < real_end) {
46 if(from == end) {
47 end = from + n;
48 if(end > real_end)
49 end = real_end;
50 }
51
52 wchar_t buf[128];
53 wchar_t *to = buf;
54 wchar_t *to_end = to + m;
55 wchar_t *to_next = to;
56
57
58 std::mbstate_t mb2 = mb;
59 std::codecvt_base::result r = cvt.in(mb,from,end,from_next,to,to_end,to_next);
60 //std::cout << "In from_size=" << (end-from) << " from move=" << (from_next - from) << " to move= " << to_next - to << " state = " << res(r) << std::endl;
61
62 int count = cvt.length(mb2,from,end,to_end - to);
63 #ifndef BOOST_LOCALE_DO_LENGTH_MBSTATE_CONST
64 TEST(memcmp(&mb,&mb2,sizeof(mb))==0);
65 if(count != from_next - from) {
66 std::cout << count << " " << from_next - from << std::endl;
67 }
68 TEST(count == from_next - from);
69 #else
70 TEST(count == to_next - to);
71 #endif
72
73
74 if(r == cvt_type::partial) {
75 end+=n;
76 if(end > real_end)
77 end = real_end;
78 }
79 else
80 TEST(r == cvt_type::ok);
81 while(to!=to_next) {
82 TEST(*wptr == *to);
83 wptr++;
84 to++;
85 }
86 to=to_next;
87 from = from_next;
88 }
89 TEST(wptr == wide_name + wlen);
90 TEST(from == real_end);
91
92 }
93
test_codecvt_out_n_m(cvt_type const & cvt,int n,int m)94 void test_codecvt_out_n_m(cvt_type const &cvt,int n,int m)
95 {
96 char const *nptr = utf8_name;
97 int wlen = wcslen(wide_name);
98 int u8len = strlen(utf8_name);
99
100 std::mbstate_t mb=std::mbstate_t();
101
102 wchar_t const *from_next = wide_name;
103 wchar_t const *real_from_end = wide_name + wlen;
104
105 char buf[256];
106 char *to = buf;
107 char *to_next = to;
108 char *to_end = to + n;
109 char *real_to_end = buf + sizeof(buf);
110
111 while(from_next < real_from_end) {
112 wchar_t const *from = from_next;
113 wchar_t const *from_end = from + m;
114 if(from_end > real_from_end)
115 from_end = real_from_end;
116 if(to_end == to) {
117 to_end = to+n;
118 }
119
120 std::codecvt_base::result r = cvt.out(mb,from,from_end,from_next,to,to_end,to_next);
121 //std::cout << "In from_size=" << (end-from) << " from move=" << (from_next - from) << " to move= " << to_next - to << " state = " << res(r) << std::endl;
122 if(r == cvt_type::partial) {
123 TEST(to_end - to_next < cvt.max_length());
124 to_end += n;
125 if(to_end > real_to_end)
126 to_end = real_to_end;
127 }
128 else {
129 TEST(r == cvt_type::ok);
130 }
131
132 while(to!=to_next) {
133 TEST(*nptr == *to);
134 nptr++;
135 to++;
136 }
137 from = from_next;
138 }
139 TEST(nptr == utf8_name + u8len);
140 TEST(from_next == real_from_end);
141 TEST(cvt.unshift(mb,to,to+n,to_next)==cvt_type::ok);
142 TEST(to_next == to);
143
144 }
145
146
test_codecvt_conv()147 void test_codecvt_conv()
148 {
149 std::cout << "Conversions " << std::endl;
150 std::locale l(std::locale::classic(),new boost::locale::utf8_codecvt<wchar_t>());
151
152 cvt_type const &cvt = std::use_facet<cvt_type>(l);
153
154 TEST(cvt.max_length()==4);
155
156 for(int i=1;i<=(int)strlen(utf8_name)+1;i++) {
157 for(int j=1;j<=(int)wcslen(wide_name)+1;j++) {
158 try {
159 test_codecvt_in_n_m(cvt,i,j);
160 test_codecvt_out_n_m(cvt,i,j);
161 }
162 catch(...) {
163 std::cerr << "Wlen=" <<j << " Nlen=" << i << std::endl;
164 throw;
165 }
166 }
167 }
168 }
169
test_codecvt_err()170 void test_codecvt_err()
171 {
172 std::cout << "Errors " << std::endl;
173 std::locale l(std::locale::classic(),new boost::locale::utf8_codecvt<wchar_t>());
174
175 cvt_type const &cvt = std::use_facet<cvt_type>(l);
176
177 std::cout << "- UTF-8" << std::endl;
178 {
179
180 wchar_t buf[2];
181 wchar_t *to=buf;
182 wchar_t *to_end = buf+2;
183 wchar_t *to_next = to;
184 char const *err_utf="1\xFF\xFF";
185 {
186 std::mbstate_t mb=std::mbstate_t();
187 char const *from=err_utf;
188 char const *from_end = from + strlen(from);
189 char const *from_next = from;
190 to_next = to;
191 TEST(cvt.in(mb,from,from_end,from_next,to,to_end,to_next)==cvt_type::error);
192 TEST(from_next == from+1);
193 TEST(to_next == to + 1);
194 TEST(*to == '1');
195 }
196 err_utf++;
197 {
198 std::mbstate_t mb=std::mbstate_t();
199 char const *from=err_utf;
200 char const *from_end = from + strlen(from);
201 char const *from_next = from;
202 TEST(cvt.in(mb,from,from_end,from_next,to,to_end,to_next)==cvt_type::error);
203 TEST(from_next == from);
204 TEST(to_next == to);
205 }
206 }
207
208 std::cout << "- UTF-16/32" << std::endl;
209 {
210
211 char buf[32];
212 char *to=buf;
213 char *to_end = buf+32;
214 char *to_next = to;
215 wchar_t err_buf[3] = { '1' , 0xDC9E }; // second surrogate not works both for UTF-16 and 32
216 wchar_t const *err_utf = err_buf;
217 {
218 std::mbstate_t mb=std::mbstate_t();
219 wchar_t const *from=err_utf;
220 wchar_t const *from_end = from + wcslen(from);
221 wchar_t const *from_next = from;
222 TEST(cvt.out(mb,from,from_end,from_next,to,to_end,to_next)==cvt_type::error);
223 TEST(from_next == from+1);
224 TEST(to_next == to + 1);
225 TEST(*to == '1');
226 }
227 err_utf++;
228 {
229 std::mbstate_t mb=std::mbstate_t();
230 wchar_t const *from=err_utf;
231 wchar_t const *from_end = from + wcslen(from);
232 wchar_t const *from_next = from;
233 to_next = to;
234 TEST(cvt.out(mb,from,from_end,from_next,to,to_end,to_next)==cvt_type::error);
235 TEST(from_next == from);
236 TEST(to_next == to);
237 }
238 }
239
240 }
241
242
test_char_char()243 void test_char_char()
244 {
245 std::cout << "Char-char specialization"<<std::endl;
246 std::locale l(std::locale::classic(),new boost::locale::utf8_codecvt<char>());
247 std::codecvt<char,char,std::mbstate_t> const &cvt=std::use_facet<std::codecvt<char,char,std::mbstate_t> >(l);
248 std::mbstate_t mb=std::mbstate_t();
249 char const *from = "a";
250 char const *from_end = from+1;
251 char const *from_next = from;
252 char buf[2];
253 char *to = buf;
254 char *to_end = buf+1;
255 char *to_next = to;
256 TEST(cvt.always_noconv()==true);
257 TEST(cvt.in(mb,from,from_end,from_next,to,to_end,to_next)==cvt_type::noconv);
258 TEST(from_next == from);
259 TEST(to_next == to);
260 TEST(cvt.out(mb,from,from_end,from_next,to,to_end,to_next)==cvt_type::noconv);
261 TEST(from_next == from);
262 TEST(to_next == to);
263 TEST(cvt.encoding()==1);
264 TEST(cvt.max_length()==1);
265 }
266
main()267 int main()
268 {
269 try {
270 test_codecvt_conv();
271 test_codecvt_err();
272 test_char_char();
273
274 }
275 catch(std::exception const &e) {
276 std::cerr << "Failed : " << e.what() << std::endl;
277 return 1;
278 }
279 std::cout << "Ok" << std::endl;
280 return 0;
281 }
282 ///
283 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
284