1 /////////1/////////2/////////3/////////4/////////5/////////6/////////7/////////8
2 // test_utf8_codecvt.cpp
3
4 // (C) Copyright 2002-4 Robert Ramey - http://www.rrsd.com .
5 // Use, modification and distribution is subject to the Boost Software
6 // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
7 // http://www.boost.org/LICENSE_1_0.txt)
8
9 #include <algorithm>
10 #include <fstream>
11 #include <iostream>
12 #include <iterator>
13 #include <locale>
14 #include <vector>
15 #include <string>
16
17 #include <cstddef> // size_t
18 #include <boost/config.hpp>
19 #if defined(BOOST_NO_STDC_NAMESPACE)
20 namespace std{
21 using ::size_t;
22 } // namespace std
23 #endif
24
25 #include <cwchar>
26 #ifdef BOOST_NO_STDC_NAMESPACE
27 namespace std{ using ::wcslen; }
28 #endif
29
30 #include "../test/test_tools.hpp"
31 #include <boost/archive/iterators/istream_iterator.hpp>
32 #include <boost/archive/iterators/ostream_iterator.hpp>
33
34 #include <boost/archive/add_facet.hpp>
35 #include <boost/archive/detail/utf8_codecvt_facet.hpp>
36
37 template<std::size_t s>
38 struct test_data
39 {
40 static unsigned char utf8_encoding[];
41 static wchar_t wchar_encoding[];
42 };
43
44 template<>
45 unsigned char test_data<2>::utf8_encoding[] = {
46 0x01,
47 0x7f,
48 0xc2, 0x80,
49 0xdf, 0xbf,
50 0xe0, 0xa0, 0x80,
51 0xe7, 0xbf, 0xbf
52 };
53
54 template<>
55 wchar_t test_data<2>::wchar_encoding[] = {
56 0x0001,
57 0x007f,
58 0x0080,
59 0x07ff,
60 0x0800,
61 0x7fff
62 };
63
64 template<>
65 unsigned char test_data<4>::utf8_encoding[] = {
66 0x01,
67 0x7f,
68 0xc2, 0x80,
69 0xdf, 0xbf,
70 0xe0, 0xa0, 0x80,
71 0xef, 0xbf, 0xbf,
72 0xf0, 0x90, 0x80, 0x80,
73 0xf4, 0x8f, 0xbf, 0xbf,
74 0xf7, 0xbf, 0xbf, 0xbf,
75 0xf8, 0x88, 0x80, 0x80, 0x80,
76 0xfb, 0xbf, 0xbf, 0xbf, 0xbf,
77 0xfc, 0x84, 0x80, 0x80, 0x80, 0x80,
78 0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf
79 };
80
81 template<>
82 wchar_t test_data<4>::wchar_encoding[] = {
83 0x00000001,
84 0x0000007f,
85 0x00000080,
86 0x000007ff,
87 0x00000800,
88 0x0000ffff,
89 0x00010000,
90 0x0010ffff,
91 0x001fffff,
92 0x00200000,
93 0x03ffffff,
94 0x04000000,
95 0x7fffffff
96 };
97
98 int
test_main(int,char * [])99 test_main(int /* argc */, char * /* argv */[]) {
100 std::locale old_loc;
101 std::locale * utf8_locale
102 = boost::archive::add_facet(
103 old_loc,
104 new boost::archive::detail::utf8_codecvt_facet
105 );
106
107 typedef char utf8_t;
108 typedef test_data<sizeof(wchar_t)> td;
109
110 // Send our test UTF-8 data to file
111 {
112 std::ofstream ofs;
113 ofs.open("test.dat", std::ios::binary);
114 std::copy(
115 td::utf8_encoding,
116 #if ! defined(BOOST_BORLANDC)
117 // borland 5.60 complains about this
118 td::utf8_encoding + sizeof(td::utf8_encoding) / sizeof(unsigned char),
119 #else
120 // so use this instead
121 td::utf8_encoding + 12,
122 #endif
123 boost::archive::iterators::ostream_iterator<utf8_t>(ofs)
124 );
125 }
126
127 // Read the test data back in, converting to UCS-4 on the way in
128 std::vector<wchar_t> from_file;
129 {
130 std::wifstream ifs;
131 ifs.imbue(*utf8_locale);
132 ifs.open("test.dat");
133
134 wchar_t item = 0;
135 // note can't use normal vector from iterator constructor because
136 // dinkumware doesn't have it.
137 for(;;){
138 item = ifs.get();
139 if(item == WEOF)
140 break;
141 //ifs >> item;
142 //if(ifs.eof())
143 // break;
144 from_file.push_back(item);
145 }
146 }
147
148 // compare the data read back in with the orginal
149 #if ! defined(BOOST_BORLANDC)
150 // borland 5.60 complains about this
151 BOOST_CHECK(from_file.size() == sizeof(td::wchar_encoding)/sizeof(wchar_t));
152 #else
153 // so use this instead
154 BOOST_CHECK(from_file.size() == 6);
155 #endif
156
157 BOOST_CHECK(std::equal(from_file.begin(), from_file.end(), td::wchar_encoding));
158
159 // Send the UCS4_data back out, converting to UTF-8
160 {
161 std::wofstream ofs;
162 ofs.imbue(*utf8_locale);
163 ofs.open("test2.dat");
164 std::copy(
165 from_file.begin(),
166 from_file.end(),
167 boost::archive::iterators::ostream_iterator<wchar_t>(ofs)
168 );
169 }
170
171 // Make sure that both files are the same
172 {
173 typedef boost::archive::iterators::istream_iterator<utf8_t> is_iter;
174 is_iter end_iter;
175
176 std::ifstream ifs1("test.dat");
177 is_iter it1(ifs1);
178 std::vector<utf8_t> data1;
179 std::copy(it1, end_iter, std::back_inserter(data1));
180
181 std::ifstream ifs2("test2.dat");
182 is_iter it2(ifs2);
183 std::vector<utf8_t> data2;
184 std::copy(it2, end_iter, std::back_inserter(data2));
185
186 BOOST_CHECK(data1 == data2);
187 }
188
189 // some libraries have trouble that only shows up with longer strings
190
191 wchar_t * test3_data = L"\
192 <?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\" ?>\
193 <!DOCTYPE boost_serialization>\
194 <boost_serialization signature=\"serialization::archive\" version=\"3\">\
195 <a class_id=\"0\" tracking_level=\"0\">\
196 <b>1</b>\
197 <f>96953204</f>\
198 <g>177129195</g>\
199 <l>1</l>\
200 <m>5627</m>\
201 <n>23010</n>\
202 <o>7419</o>\
203 <p>16212</p>\
204 <q>4086</q>\
205 <r>2749</r>\
206 <c>-33</c>\
207 <s>124</s>\
208 <t>28</t>\
209 <u>32225</u>\
210 <v>17543</v>\
211 <w>0.84431422</w>\
212 <x>1.0170664757130923</x>\
213 <y>tjbx</y>\
214 <z>cuwjentqpkejp</z>\
215 </a>\
216 </boost_serialization>\
217 ";
218
219 // Send the UCS4_data back out, converting to UTF-8
220 std::size_t l = std::wcslen(test3_data);
221 {
222 std::wofstream ofs;
223 ofs.imbue(*utf8_locale);
224 ofs.open("test3.dat");
225 std::copy(
226 test3_data,
227 test3_data + l,
228 boost::archive::iterators::ostream_iterator<wchar_t>(ofs)
229 );
230 }
231
232 // Make sure that both files are the same
233 {
234 std::wifstream ifs;
235 ifs.imbue(*utf8_locale);
236 ifs.open("test3.dat");
237 BOOST_CHECK(
238 std::equal(
239 test3_data,
240 test3_data + l,
241 boost::archive::iterators::istream_iterator<wchar_t>(ifs)
242 )
243 );
244 }
245
246 delete utf8_locale;
247 return EXIT_SUCCESS;
248 }
249