/*============================================================================= Copyright (c) 2002 2004 2006 Joel de Guzman Copyright (c) 2004 Eric Niebler http://spirit.sourceforge.net/ Use, modification and distribution is subject to the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) =============================================================================*/ #include "syntax_highlight.hpp" #include #include #include #include #include #include "actions.hpp" #include "files.hpp" #include "grammar.hpp" #include "phrase_tags.hpp" #include "state.hpp" #include "stream.hpp" #include "utils.hpp" namespace quickbook { namespace cl = boost::spirit::classic; // Syntax Highlight Actions struct syntax_highlight_actions { quickbook::state& state; do_macro_action do_macro_impl; // State bool support_callouts; quickbook::string_view marked_text; syntax_highlight_actions(quickbook::state& state_, bool is_block_) : state(state_) , do_macro_impl(state_) , support_callouts( is_block_ && (qbk_version_n >= 107u || state.current_file->is_code_snippets)) , marked_text() { } void span(parse_iterator, parse_iterator, char const*); void span_start(parse_iterator, parse_iterator, char const*); void span_end(parse_iterator, parse_iterator); void unexpected_char(parse_iterator, parse_iterator); void plain_char(parse_iterator, parse_iterator); void pre_escape_back(parse_iterator, parse_iterator); void post_escape_back(parse_iterator, parse_iterator); void do_macro(std::string const&); void mark_text(parse_iterator, parse_iterator); void callout(parse_iterator, parse_iterator); }; void syntax_highlight_actions::span( parse_iterator first, parse_iterator last, char const* name) { state.phrase << ""; while (first != last) detail::print_char(*first++, state.phrase.get()); state.phrase << ""; } void syntax_highlight_actions::span_start( parse_iterator first, parse_iterator last, char const* name) { state.phrase << ""; while (first != last) detail::print_char(*first++, state.phrase.get()); } void syntax_highlight_actions::span_end( parse_iterator first, parse_iterator last) { while (first != last) detail::print_char(*first++, state.phrase.get()); state.phrase << ""; } void syntax_highlight_actions::unexpected_char( parse_iterator first, parse_iterator last) { file_position const pos = state.current_file->position_of(first.base()); detail::outwarn(state.current_file->path, pos.line) << "in column:" << pos.column << ", unexpected character: " << std::string(first.base(), last.base()) << "\n"; // print out an unexpected character state.phrase << ""; while (first != last) detail::print_char(*first++, state.phrase.get()); state.phrase << ""; } void syntax_highlight_actions::plain_char( parse_iterator first, parse_iterator last) { while (first != last) detail::print_char(*first++, state.phrase.get()); } void syntax_highlight_actions::pre_escape_back( parse_iterator, parse_iterator) { state.push_output(); // save the stream } void syntax_highlight_actions::post_escape_back( parse_iterator, parse_iterator) { std::string tmp; state.phrase.swap(tmp); state.pop_output(); // restore the stream state.phrase << tmp; } void syntax_highlight_actions::do_macro(std::string const& v) { do_macro_impl(v); } void syntax_highlight_actions::mark_text( parse_iterator first, parse_iterator last) { marked_text = quickbook::string_view(first.base(), last.base() - first.base()); } void syntax_highlight_actions::callout(parse_iterator, parse_iterator) { state.phrase << state.add_callout(qbk_value( state.current_file, marked_text.begin(), marked_text.end())); marked_text.clear(); } // Syntax struct keywords_holder { cl::symbols<> cpp, python; keywords_holder() { // clang-format off cpp = "alignas", "alignof", "and_eq", "and", "asm", "auto", "bitand", "bitor", "bool", "break", "case", "catch", "char", "char16_t", "char32_t", "class", "compl", "const", "const_cast", "constexpr", "continue", "decltype", "default", "delete", "do", "double", "dynamic_cast", "else", "enum", "explicit", "export", "extern", "false", "float", "for", "friend", "goto", "if", "inline", "int", "long", "mutable", "namespace", "new", "noexcept", "not_eq", "not", "nullptr", "operator", "or_eq", "or", "private", "protected", "public", "register", "reinterpret_cast", "return", "short", "signed", "sizeof", "static", "static_assert", "static_cast", "struct", "switch", "template", "this", "thread_local", "throw", "true", "try", "typedef", "typeid", "typename", "union", "unsigned", "using", "virtual", "void", "volatile", "wchar_t", "while", "xor_eq", "xor" ; python = "and", "del", "for", "is", "raise", "assert", "elif", "from", "lambda", "return", "break", "else", "global", "not", "try", "class", "except", "if", "or", "while", "continue", "exec", "import", "pass", "yield", "def", "finally", "in", "print", // Technically "as" and "None" are not yet keywords (at Python // 2.4). They are destined to become keywords, and we treat them // as such for syntax highlighting purposes. "as", "None" ; // clang-format on } }; namespace { keywords_holder keywords; } // Grammar for C++ highlighting struct cpp_highlight : public cl::grammar { explicit cpp_highlight(syntax_highlight_actions& actions_) : actions(actions_) { } template struct definition { definition(cpp_highlight const& self) : g(self.actions.state.grammar()) { // clang-format off member_action1 span(self.actions, &syntax_highlight_actions::span), span_start(self.actions, &syntax_highlight_actions::span_start); member_action span_end(self.actions, &syntax_highlight_actions::span_end), unexpected_char(self.actions, &syntax_highlight_actions::unexpected_char), plain_char(self.actions, &syntax_highlight_actions::plain_char), pre_escape_back(self.actions, &syntax_highlight_actions::pre_escape_back), post_escape_back(self.actions, &syntax_highlight_actions::post_escape_back), mark_text(self.actions, &syntax_highlight_actions::mark_text), callout(self.actions, &syntax_highlight_actions::callout); member_action_value do_macro(self.actions, &syntax_highlight_actions::do_macro); error_action error(self.actions.state); program = *( (*cl::space_p) [plain_char] >> (line_start | rest_of_line) >> *rest_of_line ) ; line_start = preprocessor [span("preprocessor")] ; rest_of_line = (+cl::blank_p) [plain_char] | macro | escape | cl::eps_p(ph::var(self.actions.support_callouts)) >> ( line_callout [callout] | inline_callout [callout] ) | comment | keyword [span("keyword")] | identifier [span("identifier")] | special [span("special")] | string_ [span("string")] | char_ [span("char")] | number [span("number")] | ~cl::eps_p(cl::eol_p) >> u8_codepoint_p [unexpected_char] ; macro = // must not be followed by alpha or underscore cl::eps_p(self.actions.state.macro >> (cl::eps_p - (cl::alpha_p | '_'))) >> self.actions.state.macro [do_macro] ; escape = cl::str_p("``") [pre_escape_back] >> ( ( ( (+(cl::anychar_p - "``") >> cl::eps_p("``")) & g.phrase_start ) >> cl::str_p("``") ) | ( cl::eps_p [error] >> *cl::anychar_p ) ) [post_escape_back] ; preprocessor = '#' >> *cl::space_p >> ((cl::alpha_p | '_') >> *(cl::alnum_p | '_')) ; inline_callout = cl::confix_p( "/*<" >> *cl::space_p, (*cl::anychar_p) [mark_text], ">*/" ) ; line_callout = cl::confix_p( "/*<<" >> *cl::space_p, (*cl::anychar_p) [mark_text], ">>*/" ) >> *cl::space_p ; comment = cl::str_p("//") [span_start("comment")] >> *( escape | (+(cl::anychar_p - (cl::eol_p | "``"))) [plain_char] ) >> cl::eps_p [span_end] | cl::str_p("/*") [span_start("comment")] >> *( escape | (+(cl::anychar_p - (cl::str_p("*/") | "``"))) [plain_char] ) >> (!cl::str_p("*/")) [span_end] ; keyword = keywords.cpp >> (cl::eps_p - (cl::alnum_p | '_')) ; // make sure we recognize whole words only special = +cl::chset_p("~!%^&*()+={[}]:;,<.>?/|\\#-") ; string_char = ('\\' >> u8_codepoint_p) | (cl::anychar_p - '\\'); string_ = !cl::as_lower_d['l'] >> cl::confix_p('"', *string_char, '"') ; char_ = !cl::as_lower_d['l'] >> cl::confix_p('\'', *string_char, '\'') ; number = ( cl::as_lower_d["0x"] >> cl::hex_p | '0' >> cl::oct_p | cl::real_p ) >> *cl::as_lower_d[cl::chset_p("ldfu")] ; identifier = (cl::alpha_p | '_') >> *(cl::alnum_p | '_') ; // clang-format on } cl::rule program, line_start, rest_of_line, macro, preprocessor, inline_callout, line_callout, comment, special, string_, char_, number, identifier, keyword, escape, string_char; quickbook_grammar& g; cl::rule const& start() const { return program; } }; syntax_highlight_actions& actions; }; // Grammar for Python highlighting // See also: The Python Reference Manual // http://docs.python.org/ref/ref.html struct python_highlight : public cl::grammar { explicit python_highlight(syntax_highlight_actions& actions_) : actions(actions_) { } template struct definition { definition(python_highlight const& self) : g(self.actions.state.grammar()) { // clang-format off member_action1 span(self.actions, &syntax_highlight_actions::span), span_start(self.actions, &syntax_highlight_actions::span_start); member_action span_end(self.actions, &syntax_highlight_actions::span_end), unexpected_char(self.actions, &syntax_highlight_actions::unexpected_char), plain_char(self.actions, &syntax_highlight_actions::plain_char), pre_escape_back(self.actions, &syntax_highlight_actions::pre_escape_back), post_escape_back(self.actions, &syntax_highlight_actions::post_escape_back), mark_text(self.actions, &syntax_highlight_actions::mark_text), callout(self.actions, &syntax_highlight_actions::callout); member_action_value do_macro(self.actions, &syntax_highlight_actions::do_macro); error_action error(self.actions.state); program = *( (+cl::space_p) [plain_char] | macro | escape | cl::eps_p(ph::var(self.actions.support_callouts)) >> ( line_callout [callout] | inline_callout [callout] ) | comment | keyword [span("keyword")] | identifier [span("identifier")] | special [span("special")] | string_ [span("string")] | number [span("number")] | u8_codepoint_p [unexpected_char] ) ; macro = // must not be followed by alpha or underscore cl::eps_p(self.actions.state.macro >> (cl::eps_p - (cl::alpha_p | '_'))) >> self.actions.state.macro [do_macro] ; escape = cl::str_p("``") [pre_escape_back] >> ( ( ( (+(cl::anychar_p - "``") >> cl::eps_p("``")) & g.phrase_start ) >> cl::str_p("``") ) | ( cl::eps_p [error] >> *cl::anychar_p ) ) [post_escape_back] ; inline_callout = "#<" >> *cl::space_p >> (*(cl::anychar_p - cl::eol_p)) [mark_text] ; line_callout = cl::confix_p( "#<<" >> *cl::space_p, (*cl::anychar_p) [mark_text], (cl::eol_p | cl::end_p) ) ; comment = cl::str_p("#") [span_start("comment")] >> *( escape | (+(cl::anychar_p - (cl::eol_p | "``"))) [plain_char] ) >> cl::eps_p [span_end] ; keyword = keywords.python >> (cl::eps_p - (cl::alnum_p | '_')) ; // make sure we recognize whole words only special = +cl::chset_p("~!%^&*()+={[}]:;,<.>/|\\-") ; string_prefix = cl::as_lower_d[cl::str_p("u") >> ! cl::str_p("r")] ; string_ = ! string_prefix >> (long_string | short_string) ; string_char = ('\\' >> u8_codepoint_p) | (cl::anychar_p - '\\'); short_string = cl::confix_p('\'', * string_char, '\'') | cl::confix_p('"', * string_char, '"') ; long_string // Note: the "cl::str_p" on the next two lines work around // an INTERNAL COMPILER ERROR when using VC7.1 = cl::confix_p(cl::str_p("'''"), * string_char, "'''") | cl::confix_p(cl::str_p("\"\"\""), * string_char, "\"\"\"") ; number = ( cl::as_lower_d["0x"] >> cl::hex_p | '0' >> cl::oct_p | cl::real_p ) >> *cl::as_lower_d[cl::chset_p("lj")] ; identifier = (cl::alpha_p | '_') >> *(cl::alnum_p | '_') ; // clang-format on } cl::rule program, macro, inline_callout, line_callout, comment, special, string_, string_prefix, short_string, long_string, number, identifier, keyword, escape, string_char; quickbook_grammar& g; cl::rule const& start() const { return program; } }; syntax_highlight_actions& actions; }; // Grammar for plain text (no actual highlighting) struct teletype_highlight : public cl::grammar { teletype_highlight(syntax_highlight_actions& actions_) : actions(actions_) { } template struct definition { definition(teletype_highlight const& self) : g(self.actions.state.grammar()) { member_action plain_char( self.actions, &syntax_highlight_actions::plain_char), pre_escape_back( self.actions, &syntax_highlight_actions::pre_escape_back), post_escape_back( self.actions, &syntax_highlight_actions::post_escape_back); member_action_value< syntax_highlight_actions, std::string const&> do_macro(self.actions, &syntax_highlight_actions::do_macro); error_action error(self.actions.state); // clang-format off program = *( macro | escape | u8_codepoint_p [plain_char] ) ; macro = // must not be followed by alpha or underscore cl::eps_p(self.actions.state.macro >> (cl::eps_p - (cl::alpha_p | '_'))) >> self.actions.state.macro [do_macro] ; escape = cl::str_p("``") [pre_escape_back] >> ( ( ( (+(cl::anychar_p - "``") >> cl::eps_p("``")) & g.phrase_start ) >> cl::str_p("``") ) | ( cl::eps_p [error] >> *cl::anychar_p ) ) [post_escape_back] ; // clang-format on } cl::rule program, macro, escape; quickbook_grammar& g; cl::rule const& start() const { return program; } }; syntax_highlight_actions& actions; }; void syntax_highlight( parse_iterator first, parse_iterator last, quickbook::state& state, source_mode_type source_mode, bool is_block) { syntax_highlight_actions syn_actions(state, is_block); // print the code with syntax coloring switch (source_mode) { case source_mode_tags::cpp: { cpp_highlight cpp_p(syn_actions); boost::spirit::classic::parse(first, last, cpp_p); break; } case source_mode_tags::python: { python_highlight python_p(syn_actions); boost::spirit::classic::parse(first, last, python_p); break; } case source_mode_tags::teletype: { teletype_highlight teletype_p(syn_actions); boost::spirit::classic::parse(first, last, teletype_p); break; } default: BOOST_ASSERT(0); } } }