1 /*============================================================================= 2 Copyright (c) 2002 2004 2006 Joel de Guzman 3 Copyright (c) 2004 Eric Niebler 4 http://spirit.sourceforge.net/ 5 6 Use, modification and distribution is subject to the Boost Software 7 License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at 8 http://www.boost.org/LICENSE_1_0.txt) 9 =============================================================================*/ 10 #include "utils.hpp" 11 12 #include <cctype> 13 #include <cstring> 14 #include <map> 15 #include <boost/spirit/include/classic_chset.hpp> 16 #include <boost/spirit/include/classic_core.hpp> 17 #include <boost/spirit/include/classic_numerics.hpp> 18 #include <boost/spirit/include/phoenix1_binders.hpp> 19 #include <boost/spirit/include/phoenix1_primitives.hpp> 20 21 namespace quickbook 22 { 23 namespace detail 24 { 25 namespace cl = boost::spirit::classic; 26 namespace ph = phoenix; 27 28 struct xml_decode_grammar : cl::grammar<xml_decode_grammar> 29 { 30 std::string& result; xml_decode_grammarquickbook::detail::xml_decode_grammar31 xml_decode_grammar(std::string& result_) : result(result_) {} 32 append_charquickbook::detail::xml_decode_grammar33 void append_char(char c) const { result += c; } 34 append_escaped_charquickbook::detail::xml_decode_grammar35 void append_escaped_char(unsigned int c) const 36 { 37 if (c < 0x80) { 38 result += static_cast<char>(c); 39 } 40 else if (c < 0x800) { 41 char e[] = {static_cast<char>(0xc0 + (c >> 6)), 42 static_cast<char>(0x80 + (c & 0x3f)), '\0'}; 43 result += e; 44 } 45 else if (c < 0x10000) { 46 char e[] = {static_cast<char>(0xe0 + (c >> 12)), 47 static_cast<char>(0x80 + ((c >> 6) & 0x3f)), 48 static_cast<char>(0x80 + (c & 0x3f)), '\0'}; 49 result += e; 50 } 51 else if (c < 0x110000) { 52 char e[] = {static_cast<char>(0xf0 + (c >> 18)), 53 static_cast<char>(0x80 + ((c >> 12) & 0x3f)), 54 static_cast<char>(0x80 + ((c >> 6) & 0x3f)), 55 static_cast<char>(0x80 + (c & 0x3f)), '\0'}; 56 result += e; 57 } 58 else { 59 result += "\xEF\xBF\xBD"; 60 } 61 } 62 63 template <typename Scanner> struct definition 64 { definitionquickbook::detail::xml_decode_grammar::definition65 definition(xml_decode_grammar const& self) 66 { 67 // clang-format off 68 69 auto append_escaped_char = ph::bind(&xml_decode_grammar::append_escaped_char); 70 auto append_char = ph::bind(&xml_decode_grammar::append_char); 71 auto encoded = 72 cl::ch_p('&') 73 >> ( "#x" 74 >> cl::hex_p [append_escaped_char(self, ph::arg1)] 75 >> !cl::ch_p(';') 76 | '#' 77 >> cl::uint_p [append_escaped_char(self, ph::arg1)] 78 >> !cl::ch_p(';') 79 | cl::str_p("amp;") [append_char(self, '&')] 80 | cl::str_p("apos;") [append_char(self, '\'')] 81 | cl::str_p("gt;") [append_char(self, '>')] 82 | cl::str_p("lt;") [append_char(self, '<')] 83 | cl::str_p("quot;") [append_char(self, '"')] 84 ); 85 auto character = cl::anychar_p [append_char(self, ph::arg1)]; 86 xml_encoded = *(encoded | character); 87 88 // clang-format on 89 } 90 startquickbook::detail::xml_decode_grammar::definition91 cl::rule<Scanner> const& start() { return xml_encoded; } 92 cl::rule<Scanner> xml_encoded; 93 }; 94 95 private: 96 xml_decode_grammar& operator=(xml_decode_grammar const&); 97 }; 98 decode_string(quickbook::string_view str)99 std::string decode_string(quickbook::string_view str) 100 { 101 std::string result; 102 xml_decode_grammar xml_decode(result); 103 boost::spirit::classic::parse(str.begin(), str.end(), xml_decode); 104 return result; 105 } 106 encode_string(quickbook::string_view str)107 std::string encode_string(quickbook::string_view str) 108 { 109 std::string result; 110 result.reserve(str.size()); 111 112 for (string_iterator it = str.begin(); it != str.end(); ++it) { 113 switch (*it) { 114 case '<': 115 result += "<"; 116 break; 117 case '>': 118 result += ">"; 119 break; 120 case '&': 121 result += "&"; 122 break; 123 case '"': 124 result += """; 125 break; 126 default: 127 result += *it; 128 break; 129 } 130 } 131 132 return result; 133 } 134 print_char(char ch,std::ostream & out)135 void print_char(char ch, std::ostream& out) 136 { 137 switch (ch) { 138 case '<': 139 out << "<"; 140 break; 141 case '>': 142 out << ">"; 143 break; 144 case '&': 145 out << "&"; 146 break; 147 case '"': 148 out << """; 149 break; 150 default: 151 out << ch; 152 break; 153 // note ' is not included. see the curse of apos: 154 // http://fishbowl.pastiche.org/2003/07/01/the_curse_of_apos 155 } 156 } 157 print_string(quickbook::string_view str,std::ostream & out)158 void print_string(quickbook::string_view str, std::ostream& out) 159 { 160 for (string_iterator cur = str.begin(); cur != str.end(); ++cur) { 161 print_char(*cur, out); 162 } 163 } 164 make_identifier(quickbook::string_view text)165 std::string make_identifier(quickbook::string_view text) 166 { 167 std::string id(text.begin(), text.end()); 168 for (std::string::iterator i = id.begin(); i != id.end(); ++i) { 169 if (!std::isalnum(static_cast<unsigned char>(*i))) { 170 *i = '_'; 171 } 172 else { 173 *i = static_cast<char>( 174 std::tolower(static_cast<unsigned char>(*i))); 175 } 176 } 177 178 return id; 179 } 180 escape_uri_impl(quickbook::string_view uri_param,char const * mark)181 static std::string escape_uri_impl( 182 quickbook::string_view uri_param, char const* mark) 183 { 184 // Extra capital characters for validating percent escapes. 185 static char const hex[] = "0123456789abcdefABCDEF"; 186 187 std::string uri; 188 uri.reserve(uri_param.size()); 189 190 for (std::string::size_type n = 0; n < uri_param.size(); ++n) { 191 if (static_cast<unsigned char>(uri_param[n]) > 127 || 192 (!std::isalnum(static_cast<unsigned char>(uri_param[n])) && 193 !std::strchr(mark, uri_param[n])) || 194 (uri_param[n] == '%' && 195 !(n + 2 < uri_param.size() && 196 std::strchr(hex, uri_param[n + 1]) && 197 std::strchr(hex, uri_param[n + 2])))) { 198 char escape[] = {'%', hex[uri_param[n] / 16], 199 hex[uri_param[n] % 16], '\0'}; 200 uri += escape; 201 } 202 else { 203 uri += uri_param[n]; 204 } 205 } 206 207 return uri; 208 } 209 escape_uri(quickbook::string_view uri_param)210 std::string escape_uri(quickbook::string_view uri_param) 211 { 212 std::string uri(uri_param.begin(), uri_param.end()); 213 return escape_uri_impl(uri_param, "-_.!~*'()?\\/"); 214 } 215 partially_escape_uri(quickbook::string_view uri_param)216 std::string partially_escape_uri(quickbook::string_view uri_param) 217 { 218 return escape_uri_impl(uri_param, "-_.!~*'()?\\/:&=#%+"); 219 } 220 } 221 } 222