• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*==============================================================================
2     Copyright (c) 2001-2011 Joel de Guzman
3     Copyright (c) 2010-2011 Bryce Lelbach
4 
5     Distributed under the Boost Software License, Version 1.0. (See accompanying
6     file BOOST_LICENSE_1_0.rst or copy at http://www.boost.org/LICENSE_1_0.txt)
7 ==============================================================================*/
8 
9 #if !defined(BOOST_SPIRIT_UTREE_EXAMPLE_UTF8_PARSER_HPP)
10 #define BOOST_SPIRIT_UTREE_EXAMPLE_UTF8_PARSER_HPP
11 
12 #include <string>
13 
14 #include <boost/cstdint.hpp>
15 
16 #include <boost/regex/pending/unicode_iterator.hpp>
17 
18 #include <boost/spirit/include/qi.hpp>
19 #include <boost/spirit/include/phoenix_core.hpp>
20 #include <boost/spirit/include/phoenix_container.hpp>
21 #include <boost/spirit/include/phoenix_statement.hpp>
22 #include <boost/spirit/include/phoenix_operator.hpp>
23 #include <boost/spirit/include/support_utree.hpp>
24 
25 namespace utf8
26 {
27 
28 namespace qi = boost::spirit::qi;
29 namespace px = boost::phoenix;
30 namespace standard = boost::spirit::standard;
31 
32 using boost::spirit::utree;
33 using boost::spirit::utf8_symbol_type;
34 using boost::spirit::utf8_string_type;
35 using boost::spirit::binary_string_type;
36 
37 typedef boost::uint32_t uchar;
38 
39 struct push_string
40 {
41     template <typename, typename>
42     struct result
43     {
44         typedef void type;
45     };
46 
operator ()utf8::push_string47     void operator()(std::string& s, uchar code_point) const
48     {
49         typedef std::back_insert_iterator<std::string> insert_iter;
50         insert_iter out_iter(s);
51         boost::utf8_output_iterator<insert_iter> s_iter(out_iter);
52         *s_iter++ = code_point;
53     }
54 };
55 
56 struct push_escaped_string
57 {
58     template <typename, typename>
59     struct result
60     {
61         typedef void type;
62     };
63 
operator ()utf8::push_escaped_string64     void operator()(std::string& s, uchar c) const
65     {
66         switch (c) {
67             case 'b':
68                 s += '\b';
69                 break;
70             case 't':
71                 s += '\t';
72                 break;
73             case 'n':
74                 s += '\n';
75                 break;
76             case 'f':
77                 s += '\f';
78                 break;
79             case 'r':
80                 s += '\r';
81                 break;
82             case '"':
83                 s += '"';
84                 break;
85             case '\\':
86                 s += '\\';
87                 break;
88         }
89     }
90 };
91 
92 template <typename Iterator>
93 struct parser : qi::grammar<Iterator, std::string()>
94 {
95     qi::rule<Iterator, void(std::string&)>
96         escaped;
97 
98     qi::rule<Iterator, std::string()>
99         start;
100 
101     px::function<push_string>
102         push_str;
103 
104     px::function<push_escaped_string>
105         push_esc;
106 
parserutf8::parser107     parser() : parser::base_type (start)
108     {
109         using standard::char_;
110         using qi::uint_parser;
111         using qi::_val;
112         using qi::_r1;
113         using qi::_1;
114 
115         uint_parser<uchar, 16, 4, 4> hex4;
116         uint_parser<uchar, 16, 8, 8> hex8;
117 
118         escaped
119           = '\\'
120           > (   ('u' > hex4)                 [push_str(_r1, _1)]
121             |   ('U' > hex8)                 [push_str(_r1, _1)]
122             |   char_("btnfr\\\"'")          [push_esc(_r1, _1)]
123             );
124 
125         start
126           = '"'
127           > *(escaped(_val) | (~char_('"'))  [_val += _1])
128           > '"';
129 
130         escaped.name("escaped_string");
131         start.name("string");
132     }
133 };
134 
135 } // utf8
136 
137 #endif // BOOST_SPIRIT_UTREE_EXAMPLE_UTF8_PARSER_HPP
138 
139