• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /////////1/////////2/////////3/////////4/////////5/////////6/////////7/////////8
2 // test_utf8_codecvt.cpp
3 
4 // (C) Copyright 2002-4 Robert Ramey - http://www.rrsd.com .
5 // Use, modification and distribution is subject to the Boost Software
6 // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
7 // http://www.boost.org/LICENSE_1_0.txt)
8 
9 #include <algorithm>
10 #include <fstream>
11 #include <iostream>
12 #include <iterator>
13 #include <locale>
14 #include <vector>
15 #include <string>
16 
17 #include <cstddef> // size_t
18 #include <boost/config.hpp>
19 #if defined(BOOST_NO_STDC_NAMESPACE)
20 namespace std{
21     using ::size_t;
22 } // namespace std
23 #endif
24 
25 #include <cwchar>
26 #ifdef BOOST_NO_STDC_NAMESPACE
27 namespace std{ using ::wcslen; }
28 #endif
29 
30 #include "../test/test_tools.hpp"
31 #include <boost/archive/iterators/istream_iterator.hpp>
32 #include <boost/archive/iterators/ostream_iterator.hpp>
33 
34 #include <boost/archive/add_facet.hpp>
35 #include <boost/archive/detail/utf8_codecvt_facet.hpp>
36 
37 template<std::size_t s>
38 struct test_data
39 {
40     static unsigned char utf8_encoding[];
41     static wchar_t wchar_encoding[];
42 };
43 
44 template<>
45 unsigned char test_data<2>::utf8_encoding[] = {
46     0x01,
47     0x7f,
48     0xc2, 0x80,
49     0xdf, 0xbf,
50     0xe0, 0xa0, 0x80,
51     0xe7, 0xbf, 0xbf
52 };
53 
54 template<>
55 wchar_t test_data<2>::wchar_encoding[] = {
56     0x0001,
57     0x007f,
58     0x0080,
59     0x07ff,
60     0x0800,
61     0x7fff
62 };
63 
64 template<>
65 unsigned char test_data<4>::utf8_encoding[] = {
66     0x01,
67     0x7f,
68     0xc2, 0x80,
69     0xdf, 0xbf,
70     0xe0, 0xa0, 0x80,
71     0xef, 0xbf, 0xbf,
72     0xf0, 0x90, 0x80, 0x80,
73     0xf4, 0x8f, 0xbf, 0xbf,
74     0xf7, 0xbf, 0xbf, 0xbf,
75     0xf8, 0x88, 0x80, 0x80, 0x80,
76     0xfb, 0xbf, 0xbf, 0xbf, 0xbf,
77     0xfc, 0x84, 0x80, 0x80, 0x80, 0x80,
78     0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf
79 };
80 
81 template<>
82 wchar_t test_data<4>::wchar_encoding[] = {
83     0x00000001,
84     0x0000007f,
85     0x00000080,
86     0x000007ff,
87     0x00000800,
88     0x0000ffff,
89     0x00010000,
90     0x0010ffff,
91     0x001fffff,
92     0x00200000,
93     0x03ffffff,
94     0x04000000,
95     0x7fffffff
96 };
97 
98 int
test_main(int,char * [])99 test_main(int /* argc */, char * /* argv */[]) {
100     std::locale old_loc;
101     std::locale * utf8_locale
102         = boost::archive::add_facet(
103             old_loc,
104             new boost::archive::detail::utf8_codecvt_facet
105         );
106 
107     typedef char utf8_t;
108     typedef test_data<sizeof(wchar_t)> td;
109 
110     // Send our test UTF-8 data to file
111     {
112         std::ofstream ofs;
113         ofs.open("test.dat", std::ios::binary);
114         std::copy(
115             td::utf8_encoding,
116             #if ! defined(BOOST_BORLANDC)
117                 // borland 5.60 complains about this
118                 td::utf8_encoding + sizeof(td::utf8_encoding) / sizeof(unsigned char),
119             #else
120                 // so use this instead
121                 td::utf8_encoding + 12,
122             #endif
123             boost::archive::iterators::ostream_iterator<utf8_t>(ofs)
124         );
125     }
126 
127     // Read the test data back in, converting to UCS-4 on the way in
128     std::vector<wchar_t> from_file;
129     {
130         std::wifstream ifs;
131         ifs.imbue(*utf8_locale);
132         ifs.open("test.dat");
133 
134         wchar_t item = 0;
135         // note can't use normal vector from iterator constructor because
136         // dinkumware doesn't have it.
137         for(;;){
138             item = ifs.get();
139             if(item == WEOF)
140                 break;
141             //ifs >> item;
142             //if(ifs.eof())
143             //    break;
144             from_file.push_back(item);
145         }
146     }
147 
148     // compare the data read back in with the orginal
149     #if ! defined(BOOST_BORLANDC)
150         // borland 5.60 complains about this
151         BOOST_CHECK(from_file.size() == sizeof(td::wchar_encoding)/sizeof(wchar_t));
152     #else
153         // so use this instead
154         BOOST_CHECK(from_file.size() == 6);
155     #endif
156 
157     BOOST_CHECK(std::equal(from_file.begin(), from_file.end(), td::wchar_encoding));
158 
159     // Send the UCS4_data back out, converting to UTF-8
160     {
161         std::wofstream ofs;
162         ofs.imbue(*utf8_locale);
163         ofs.open("test2.dat");
164         std::copy(
165             from_file.begin(),
166             from_file.end(),
167             boost::archive::iterators::ostream_iterator<wchar_t>(ofs)
168         );
169     }
170 
171     // Make sure that both files are the same
172     {
173         typedef boost::archive::iterators::istream_iterator<utf8_t> is_iter;
174         is_iter end_iter;
175 
176         std::ifstream ifs1("test.dat");
177         is_iter it1(ifs1);
178         std::vector<utf8_t> data1;
179         std::copy(it1, end_iter, std::back_inserter(data1));
180 
181         std::ifstream ifs2("test2.dat");
182         is_iter it2(ifs2);
183         std::vector<utf8_t> data2;
184         std::copy(it2, end_iter, std::back_inserter(data2));
185 
186         BOOST_CHECK(data1 == data2);
187     }
188 
189     // some libraries have trouble that only shows up with longer strings
190 
191     wchar_t * test3_data = L"\
192     <?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\" ?>\
193     <!DOCTYPE boost_serialization>\
194     <boost_serialization signature=\"serialization::archive\" version=\"3\">\
195     <a class_id=\"0\" tracking_level=\"0\">\
196         <b>1</b>\
197         <f>96953204</f>\
198         <g>177129195</g>\
199         <l>1</l>\
200         <m>5627</m>\
201         <n>23010</n>\
202         <o>7419</o>\
203         <p>16212</p>\
204         <q>4086</q>\
205         <r>2749</r>\
206         <c>-33</c>\
207         <s>124</s>\
208         <t>28</t>\
209         <u>32225</u>\
210         <v>17543</v>\
211         <w>0.84431422</w>\
212         <x>1.0170664757130923</x>\
213         <y>tjbx</y>\
214         <z>cuwjentqpkejp</z>\
215     </a>\
216     </boost_serialization>\
217     ";
218 
219     // Send the UCS4_data back out, converting to UTF-8
220     std::size_t l = std::wcslen(test3_data);
221     {
222         std::wofstream ofs;
223         ofs.imbue(*utf8_locale);
224         ofs.open("test3.dat");
225         std::copy(
226             test3_data,
227             test3_data + l,
228             boost::archive::iterators::ostream_iterator<wchar_t>(ofs)
229         );
230     }
231 
232     // Make sure that both files are the same
233     {
234         std::wifstream ifs;
235         ifs.imbue(*utf8_locale);
236         ifs.open("test3.dat");
237         BOOST_CHECK(
238             std::equal(
239                 test3_data,
240                 test3_data + l,
241                 boost::archive::iterators::istream_iterator<wchar_t>(ifs)
242             )
243         );
244     }
245 
246     delete utf8_locale;
247     return EXIT_SUCCESS;
248 }
249