• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright Vladimir Prus 2002-2004.
2 // Distributed under the Boost Software License, Version 1.0.
3 // (See accompanying file LICENSE_1_0.txt
4 // or copy at http://www.boost.org/LICENSE_1_0.txt)
5 
6 #include <cstring>
7 #include <cassert>
8 #include <string>
9 #include <fstream>
10 #include <sstream>
11 #include <iostream>
12 #include <boost/progress.hpp>
13 #include <boost/bind.hpp>
14 #include <boost/ref.hpp>
15 
16 #include <boost/program_options/detail/convert.hpp>
17 #include <boost/program_options/detail/utf8_codecvt_facet.hpp>
18 
19 #include "minitest.hpp"
20 
21 using namespace std;
22 
file_content(const string & filename)23 string file_content(const string& filename)
24 {
25     ifstream ifs(filename.c_str());
26     assert(ifs);
27 
28     stringstream ss;
29     ss << ifs.rdbuf();
30 
31     return ss.str();
32 }
33 
34 // A version of from_8_bit which does not use functional object, for
35 // performance comparison.
from_8_bit_2(const std::string & s,const codecvt<wchar_t,char,mbstate_t> & cvt)36 std::wstring from_8_bit_2(const std::string& s,
37                           const codecvt<wchar_t, char, mbstate_t>& cvt)
38 {
39     std::wstring result;
40 
41 
42     std::mbstate_t state = std::mbstate_t();
43 
44     const char* from = s.data();
45     const char* from_end = s.data() + s.size();
46     // The interace of cvt is not really iterator-like, and it's
47     // not possible the tell the required output size without the conversion.
48     // All we can is convert data by pieces.
49     while(from != from_end) {
50 
51         // std::basic_string does not provide non-const pointers to the data,
52         // so converting directly into string is not possible.
53         wchar_t buffer[32];
54 
55         wchar_t* to_next = buffer;
56         // Try to convert remaining input.
57         std::codecvt_base::result r =
58             cvt.in(state, from, from_end, from, buffer, buffer + 32, to_next);
59 
60         if (r == std::codecvt_base::error)
61             throw logic_error("character conversion failed");
62         // 'partial' is not an error, it just means not all source characters
63         // we converted. However, we need to check that at least one new target
64         // character was produced. If not, it means the source data is
65         // incomplete, and since we don't have extra data to add to source, it's
66         // error.
67         if (to_next == buffer)
68             throw logic_error("character conversion failed");
69 
70         // Add converted characters
71         result.append(buffer, to_next);
72     }
73 
74     return result;
75 }
76 
77 
test_convert(const std::string & input,const std::string & expected_output)78 void test_convert(const std::string& input,
79                   const std::string& expected_output)
80 {
81     boost::program_options::detail::utf8_codecvt_facet facet;
82 
83     std::wstring output;
84     {
85         boost::progress_timer t;
86         for (int i = 0; i < 10000; ++i)
87             output = boost::from_8_bit(
88                 input,
89                 facet);
90     }
91 
92     {
93         boost::progress_timer t;
94         for (int i = 0; i < 10000; ++i)
95             output = from_8_bit_2(
96                 input,
97                 facet);
98     }
99 
100     BOOST_CHECK(output.size()*2 == expected_output.size());
101 
102     for(unsigned i = 0; i < output.size(); ++i) {
103 
104         {
105             unsigned low = output[i];
106             low &= 0xFF;
107             unsigned low2 = expected_output[2*i];
108             low2 &= 0xFF;
109             BOOST_CHECK(low == low2);
110         }
111         {
112             unsigned high = output[i];
113             high >>= 8;
114             high &= 0xFF;
115             unsigned high2 = expected_output[2*i+1];
116             BOOST_CHECK(high == high2);
117         }
118     }
119 
120     string ref = boost::to_8_bit(output, facet);
121 
122     BOOST_CHECK(ref == input);
123 }
124 
main(int ac,char * av[])125 int main(int ac, char* av[])
126 {
127     std::string input = file_content("utf8.txt");
128     std::string expected = file_content("ucs2.txt");
129 
130     test_convert(input, expected);
131 
132     if (ac > 1) {
133         cout << "Trying to convert the command line argument\n";
134 
135         locale::global(locale(""));
136         std::wstring w = boost::from_local_8_bit(av[1]);
137 
138         cout << "Got something, printing decimal code point values\n";
139         for (unsigned i = 0; i < w.size(); ++i) {
140             cout << (unsigned)w[i] << "\n";
141         }
142 
143     }
144 
145     return 0;
146 }
147