• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*=============================================================================
2     Copyright (c) 2002 2004 2006 Joel de Guzman
3     Copyright (c) 2004 Eric Niebler
4     http://spirit.sourceforge.net/
5 
6     Use, modification and distribution is subject to the Boost Software
7     License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
8     http://www.boost.org/LICENSE_1_0.txt)
9 =============================================================================*/
10 #include "utils.hpp"
11 
12 #include <cctype>
13 #include <cstring>
14 #include <map>
15 #include <boost/spirit/include/classic_chset.hpp>
16 #include <boost/spirit/include/classic_core.hpp>
17 #include <boost/spirit/include/classic_numerics.hpp>
18 #include <boost/spirit/include/phoenix1_binders.hpp>
19 #include <boost/spirit/include/phoenix1_primitives.hpp>
20 
21 namespace quickbook
22 {
23     namespace detail
24     {
25         namespace cl = boost::spirit::classic;
26         namespace ph = phoenix;
27 
28         struct xml_decode_grammar : cl::grammar<xml_decode_grammar>
29         {
30             std::string& result;
xml_decode_grammarquickbook::detail::xml_decode_grammar31             xml_decode_grammar(std::string& result_) : result(result_) {}
32 
append_charquickbook::detail::xml_decode_grammar33             void append_char(char c) const { result += c; }
34 
append_escaped_charquickbook::detail::xml_decode_grammar35             void append_escaped_char(unsigned int c) const
36             {
37                 if (c < 0x80) {
38                     result += static_cast<char>(c);
39                 }
40                 else if (c < 0x800) {
41                     char e[] = {static_cast<char>(0xc0 + (c >> 6)),
42                                 static_cast<char>(0x80 + (c & 0x3f)), '\0'};
43                     result += e;
44                 }
45                 else if (c < 0x10000) {
46                     char e[] = {static_cast<char>(0xe0 + (c >> 12)),
47                                 static_cast<char>(0x80 + ((c >> 6) & 0x3f)),
48                                 static_cast<char>(0x80 + (c & 0x3f)), '\0'};
49                     result += e;
50                 }
51                 else if (c < 0x110000) {
52                     char e[] = {static_cast<char>(0xf0 + (c >> 18)),
53                                 static_cast<char>(0x80 + ((c >> 12) & 0x3f)),
54                                 static_cast<char>(0x80 + ((c >> 6) & 0x3f)),
55                                 static_cast<char>(0x80 + (c & 0x3f)), '\0'};
56                     result += e;
57                 }
58                 else {
59                     result += "\xEF\xBF\xBD";
60                 }
61             }
62 
63             template <typename Scanner> struct definition
64             {
definitionquickbook::detail::xml_decode_grammar::definition65                 definition(xml_decode_grammar const& self)
66                 {
67                     // clang-format off
68 
69                     auto append_escaped_char = ph::bind(&xml_decode_grammar::append_escaped_char);
70                     auto append_char = ph::bind(&xml_decode_grammar::append_char);
71                     auto encoded =
72                             cl::ch_p('&')
73                         >>  (   "#x"
74                             >>  cl::hex_p           [append_escaped_char(self, ph::arg1)]
75                             >>  !cl::ch_p(';')
76                             |   '#'
77                             >>  cl::uint_p          [append_escaped_char(self, ph::arg1)]
78                             >>  !cl::ch_p(';')
79                             |   cl::str_p("amp;")   [append_char(self, '&')]
80                             |   cl::str_p("apos;")  [append_char(self, '\'')]
81                             |   cl::str_p("gt;")    [append_char(self, '>')]
82                             |   cl::str_p("lt;")    [append_char(self, '<')]
83                             |   cl::str_p("quot;")  [append_char(self, '"')]
84                             );
85                     auto character = cl::anychar_p [append_char(self, ph::arg1)];
86                     xml_encoded = *(encoded | character);
87 
88                     // clang-format on
89                 }
90 
startquickbook::detail::xml_decode_grammar::definition91                 cl::rule<Scanner> const& start() { return xml_encoded; }
92                 cl::rule<Scanner> xml_encoded;
93             };
94 
95           private:
96             xml_decode_grammar& operator=(xml_decode_grammar const&);
97         };
98 
decode_string(quickbook::string_view str)99         std::string decode_string(quickbook::string_view str)
100         {
101             std::string result;
102             xml_decode_grammar xml_decode(result);
103             boost::spirit::classic::parse(str.begin(), str.end(), xml_decode);
104             return result;
105         }
106 
encode_string(quickbook::string_view str)107         std::string encode_string(quickbook::string_view str)
108         {
109             std::string result;
110             result.reserve(str.size());
111 
112             for (string_iterator it = str.begin(); it != str.end(); ++it) {
113                 switch (*it) {
114                 case '<':
115                     result += "&lt;";
116                     break;
117                 case '>':
118                     result += "&gt;";
119                     break;
120                 case '&':
121                     result += "&amp;";
122                     break;
123                 case '"':
124                     result += "&quot;";
125                     break;
126                 default:
127                     result += *it;
128                     break;
129                 }
130             }
131 
132             return result;
133         }
134 
print_char(char ch,std::ostream & out)135         void print_char(char ch, std::ostream& out)
136         {
137             switch (ch) {
138             case '<':
139                 out << "&lt;";
140                 break;
141             case '>':
142                 out << "&gt;";
143                 break;
144             case '&':
145                 out << "&amp;";
146                 break;
147             case '"':
148                 out << "&quot;";
149                 break;
150             default:
151                 out << ch;
152                 break;
153                 // note &apos; is not included. see the curse of apos:
154                 // http://fishbowl.pastiche.org/2003/07/01/the_curse_of_apos
155             }
156         }
157 
print_string(quickbook::string_view str,std::ostream & out)158         void print_string(quickbook::string_view str, std::ostream& out)
159         {
160             for (string_iterator cur = str.begin(); cur != str.end(); ++cur) {
161                 print_char(*cur, out);
162             }
163         }
164 
make_identifier(quickbook::string_view text)165         std::string make_identifier(quickbook::string_view text)
166         {
167             std::string id(text.begin(), text.end());
168             for (std::string::iterator i = id.begin(); i != id.end(); ++i) {
169                 if (!std::isalnum(static_cast<unsigned char>(*i))) {
170                     *i = '_';
171                 }
172                 else {
173                     *i = static_cast<char>(
174                         std::tolower(static_cast<unsigned char>(*i)));
175                 }
176             }
177 
178             return id;
179         }
180 
escape_uri_impl(quickbook::string_view uri_param,char const * mark)181         static std::string escape_uri_impl(
182             quickbook::string_view uri_param, char const* mark)
183         {
184             // Extra capital characters for validating percent escapes.
185             static char const hex[] = "0123456789abcdefABCDEF";
186 
187             std::string uri;
188             uri.reserve(uri_param.size());
189 
190             for (std::string::size_type n = 0; n < uri_param.size(); ++n) {
191                 if (static_cast<unsigned char>(uri_param[n]) > 127 ||
192                     (!std::isalnum(static_cast<unsigned char>(uri_param[n])) &&
193                      !std::strchr(mark, uri_param[n])) ||
194                     (uri_param[n] == '%' &&
195                      !(n + 2 < uri_param.size() &&
196                        std::strchr(hex, uri_param[n + 1]) &&
197                        std::strchr(hex, uri_param[n + 2])))) {
198                     char escape[] = {'%', hex[uri_param[n] / 16],
199                                      hex[uri_param[n] % 16], '\0'};
200                     uri += escape;
201                 }
202                 else {
203                     uri += uri_param[n];
204                 }
205             }
206 
207             return uri;
208         }
209 
escape_uri(quickbook::string_view uri_param)210         std::string escape_uri(quickbook::string_view uri_param)
211         {
212             std::string uri(uri_param.begin(), uri_param.end());
213             return escape_uri_impl(uri_param, "-_.!~*'()?\\/");
214         }
215 
partially_escape_uri(quickbook::string_view uri_param)216         std::string partially_escape_uri(quickbook::string_view uri_param)
217         {
218             return escape_uri_impl(uri_param, "-_.!~*'()?\\/:&=#%+");
219         }
220     }
221 }
222