1 // Copyright Vladimir Prus 2002-2004.
2 // Distributed under the Boost Software License, Version 1.0.
3 // (See accompanying file LICENSE_1_0.txt
4 // or copy at http://www.boost.org/LICENSE_1_0.txt)
5
6 #include <cstring>
7 #include <cassert>
8 #include <string>
9 #include <fstream>
10 #include <sstream>
11 #include <iostream>
12 #include <boost/progress.hpp>
13 #include <boost/bind.hpp>
14 #include <boost/ref.hpp>
15
16 #include <boost/program_options/detail/convert.hpp>
17 #include <boost/program_options/detail/utf8_codecvt_facet.hpp>
18
19 #include "minitest.hpp"
20
21 using namespace std;
22
file_content(const string & filename)23 string file_content(const string& filename)
24 {
25 ifstream ifs(filename.c_str());
26 assert(ifs);
27
28 stringstream ss;
29 ss << ifs.rdbuf();
30
31 return ss.str();
32 }
33
34 // A version of from_8_bit which does not use functional object, for
35 // performance comparison.
from_8_bit_2(const std::string & s,const codecvt<wchar_t,char,mbstate_t> & cvt)36 std::wstring from_8_bit_2(const std::string& s,
37 const codecvt<wchar_t, char, mbstate_t>& cvt)
38 {
39 std::wstring result;
40
41
42 std::mbstate_t state = std::mbstate_t();
43
44 const char* from = s.data();
45 const char* from_end = s.data() + s.size();
46 // The interace of cvt is not really iterator-like, and it's
47 // not possible the tell the required output size without the conversion.
48 // All we can is convert data by pieces.
49 while(from != from_end) {
50
51 // std::basic_string does not provide non-const pointers to the data,
52 // so converting directly into string is not possible.
53 wchar_t buffer[32];
54
55 wchar_t* to_next = buffer;
56 // Try to convert remaining input.
57 std::codecvt_base::result r =
58 cvt.in(state, from, from_end, from, buffer, buffer + 32, to_next);
59
60 if (r == std::codecvt_base::error)
61 throw logic_error("character conversion failed");
62 // 'partial' is not an error, it just means not all source characters
63 // we converted. However, we need to check that at least one new target
64 // character was produced. If not, it means the source data is
65 // incomplete, and since we don't have extra data to add to source, it's
66 // error.
67 if (to_next == buffer)
68 throw logic_error("character conversion failed");
69
70 // Add converted characters
71 result.append(buffer, to_next);
72 }
73
74 return result;
75 }
76
77
test_convert(const std::string & input,const std::string & expected_output)78 void test_convert(const std::string& input,
79 const std::string& expected_output)
80 {
81 boost::program_options::detail::utf8_codecvt_facet facet;
82
83 std::wstring output;
84 {
85 boost::progress_timer t;
86 for (int i = 0; i < 10000; ++i)
87 output = boost::from_8_bit(
88 input,
89 facet);
90 }
91
92 {
93 boost::progress_timer t;
94 for (int i = 0; i < 10000; ++i)
95 output = from_8_bit_2(
96 input,
97 facet);
98 }
99
100 BOOST_CHECK(output.size()*2 == expected_output.size());
101
102 for(unsigned i = 0; i < output.size(); ++i) {
103
104 {
105 unsigned low = output[i];
106 low &= 0xFF;
107 unsigned low2 = expected_output[2*i];
108 low2 &= 0xFF;
109 BOOST_CHECK(low == low2);
110 }
111 {
112 unsigned high = output[i];
113 high >>= 8;
114 high &= 0xFF;
115 unsigned high2 = expected_output[2*i+1];
116 BOOST_CHECK(high == high2);
117 }
118 }
119
120 string ref = boost::to_8_bit(output, facet);
121
122 BOOST_CHECK(ref == input);
123 }
124
main(int ac,char * av[])125 int main(int ac, char* av[])
126 {
127 std::string input = file_content("utf8.txt");
128 std::string expected = file_content("ucs2.txt");
129
130 test_convert(input, expected);
131
132 if (ac > 1) {
133 cout << "Trying to convert the command line argument\n";
134
135 locale::global(locale(""));
136 std::wstring w = boost::from_local_8_bit(av[1]);
137
138 cout << "Got something, printing decimal code point values\n";
139 for (unsigned i = 0; i < w.size(); ++i) {
140 cout << (unsigned)w[i] << "\n";
141 }
142
143 }
144
145 return 0;
146 }
147