1 /*============================================================================= 2 Copyright (c) 2002 2004 2006 Joel de Guzman 3 Copyright (c) 2004 Eric Niebler 4 http://spirit.sourceforge.net/ 5 6 Use, modification and distribution is subject to the Boost Software 7 License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at 8 http://www.boost.org/LICENSE_1_0.txt) 9 =============================================================================*/ 10 #include "syntax_highlight.hpp" 11 #include <boost/spirit/include/classic_chset.hpp> 12 #include <boost/spirit/include/classic_confix.hpp> 13 #include <boost/spirit/include/classic_core.hpp> 14 #include <boost/spirit/include/classic_loops.hpp> 15 #include <boost/spirit/include/classic_symbols.hpp> 16 #include "actions.hpp" 17 #include "files.hpp" 18 #include "grammar.hpp" 19 #include "phrase_tags.hpp" 20 #include "state.hpp" 21 #include "stream.hpp" 22 #include "utils.hpp" 23 24 namespace quickbook 25 { 26 namespace cl = boost::spirit::classic; 27 28 // Syntax Highlight Actions 29 30 struct syntax_highlight_actions 31 { 32 quickbook::state& state; 33 do_macro_action do_macro_impl; 34 35 // State 36 bool support_callouts; 37 quickbook::string_view marked_text; 38 syntax_highlight_actionsquickbook::syntax_highlight_actions39 syntax_highlight_actions(quickbook::state& state_, bool is_block_) 40 : state(state_) 41 , do_macro_impl(state_) 42 , support_callouts( 43 is_block_ && (qbk_version_n >= 107u || 44 state.current_file->is_code_snippets)) 45 , marked_text() 46 { 47 } 48 49 void span(parse_iterator, parse_iterator, char const*); 50 void span_start(parse_iterator, parse_iterator, char const*); 51 void span_end(parse_iterator, parse_iterator); 52 void unexpected_char(parse_iterator, parse_iterator); 53 void plain_char(parse_iterator, parse_iterator); 54 void pre_escape_back(parse_iterator, parse_iterator); 55 void post_escape_back(parse_iterator, parse_iterator); 56 void do_macro(std::string const&); 57 58 void mark_text(parse_iterator, parse_iterator); 59 void callout(parse_iterator, parse_iterator); 60 }; 61 span(parse_iterator first,parse_iterator last,char const * name)62 void syntax_highlight_actions::span( 63 parse_iterator first, parse_iterator last, char const* name) 64 { 65 state.phrase << "<phrase role=\"" << name << "\">"; 66 while (first != last) 67 detail::print_char(*first++, state.phrase.get()); 68 state.phrase << "</phrase>"; 69 } 70 span_start(parse_iterator first,parse_iterator last,char const * name)71 void syntax_highlight_actions::span_start( 72 parse_iterator first, parse_iterator last, char const* name) 73 { 74 state.phrase << "<phrase role=\"" << name << "\">"; 75 while (first != last) 76 detail::print_char(*first++, state.phrase.get()); 77 } 78 span_end(parse_iterator first,parse_iterator last)79 void syntax_highlight_actions::span_end( 80 parse_iterator first, parse_iterator last) 81 { 82 while (first != last) 83 detail::print_char(*first++, state.phrase.get()); 84 state.phrase << "</phrase>"; 85 } 86 unexpected_char(parse_iterator first,parse_iterator last)87 void syntax_highlight_actions::unexpected_char( 88 parse_iterator first, parse_iterator last) 89 { 90 file_position const pos = state.current_file->position_of(first.base()); 91 92 detail::outwarn(state.current_file->path, pos.line) 93 << "in column:" << pos.column << ", unexpected character: " 94 << std::string(first.base(), last.base()) << "\n"; 95 96 // print out an unexpected character 97 state.phrase << "<phrase role=\"error\">"; 98 while (first != last) 99 detail::print_char(*first++, state.phrase.get()); 100 state.phrase << "</phrase>"; 101 } 102 plain_char(parse_iterator first,parse_iterator last)103 void syntax_highlight_actions::plain_char( 104 parse_iterator first, parse_iterator last) 105 { 106 while (first != last) 107 detail::print_char(*first++, state.phrase.get()); 108 } 109 pre_escape_back(parse_iterator,parse_iterator)110 void syntax_highlight_actions::pre_escape_back( 111 parse_iterator, parse_iterator) 112 { 113 state.push_output(); // save the stream 114 } 115 post_escape_back(parse_iterator,parse_iterator)116 void syntax_highlight_actions::post_escape_back( 117 parse_iterator, parse_iterator) 118 { 119 std::string tmp; 120 state.phrase.swap(tmp); 121 state.pop_output(); // restore the stream 122 state.phrase << tmp; 123 } 124 do_macro(std::string const & v)125 void syntax_highlight_actions::do_macro(std::string const& v) 126 { 127 do_macro_impl(v); 128 } 129 mark_text(parse_iterator first,parse_iterator last)130 void syntax_highlight_actions::mark_text( 131 parse_iterator first, parse_iterator last) 132 { 133 marked_text = 134 quickbook::string_view(first.base(), last.base() - first.base()); 135 } 136 callout(parse_iterator,parse_iterator)137 void syntax_highlight_actions::callout(parse_iterator, parse_iterator) 138 { 139 state.phrase << state.add_callout(qbk_value( 140 state.current_file, marked_text.begin(), marked_text.end())); 141 marked_text.clear(); 142 } 143 144 // Syntax 145 146 struct keywords_holder 147 { 148 cl::symbols<> cpp, python; 149 keywords_holderquickbook::keywords_holder150 keywords_holder() 151 { 152 // clang-format off 153 154 cpp 155 = "alignas", "alignof", "and_eq", "and", "asm", "auto", 156 "bitand", "bitor", "bool", "break", "case", "catch", 157 "char", "char16_t", "char32_t", "class", "compl", 158 "const", "const_cast", "constexpr", "continue", 159 "decltype", "default", "delete", "do", "double", 160 "dynamic_cast", "else", "enum", "explicit", "export", 161 "extern", "false", "float", "for", "friend", "goto", 162 "if", "inline", "int", "long", "mutable", "namespace", 163 "new", "noexcept", "not_eq", "not", "nullptr", 164 "operator", "or_eq", "or", "private", "protected", 165 "public", "register", "reinterpret_cast", "return", 166 "short", "signed", "sizeof", "static", "static_assert", 167 "static_cast", "struct", "switch", "template", "this", 168 "thread_local", "throw", "true", "try", "typedef", 169 "typeid", "typename", "union", "unsigned", "using", 170 "virtual", "void", "volatile", "wchar_t", "while", 171 "xor_eq", "xor" 172 ; 173 174 python 175 = 176 "and", "del", "for", "is", "raise", 177 "assert", "elif", "from", "lambda", "return", 178 "break", "else", "global", "not", "try", 179 "class", "except", "if", "or", "while", 180 "continue", "exec", "import", "pass", "yield", 181 "def", "finally", "in", "print", 182 183 // Technically "as" and "None" are not yet keywords (at Python 184 // 2.4). They are destined to become keywords, and we treat them 185 // as such for syntax highlighting purposes. 186 187 "as", "None" 188 ; 189 190 // clang-format on 191 } 192 }; 193 194 namespace 195 { 196 keywords_holder keywords; 197 } 198 199 // Grammar for C++ highlighting 200 struct cpp_highlight : public cl::grammar<cpp_highlight> 201 { cpp_highlightquickbook::cpp_highlight202 explicit cpp_highlight(syntax_highlight_actions& actions_) 203 : actions(actions_) 204 { 205 } 206 207 template <typename Scanner> struct definition 208 { definitionquickbook::cpp_highlight::definition209 definition(cpp_highlight const& self) 210 : g(self.actions.state.grammar()) 211 { 212 // clang-format off 213 214 member_action1<syntax_highlight_actions, char const*> 215 span(self.actions, &syntax_highlight_actions::span), 216 span_start(self.actions, &syntax_highlight_actions::span_start); 217 member_action<syntax_highlight_actions> 218 span_end(self.actions, &syntax_highlight_actions::span_end), 219 unexpected_char(self.actions, &syntax_highlight_actions::unexpected_char), 220 plain_char(self.actions, &syntax_highlight_actions::plain_char), 221 pre_escape_back(self.actions, &syntax_highlight_actions::pre_escape_back), 222 post_escape_back(self.actions, &syntax_highlight_actions::post_escape_back), 223 mark_text(self.actions, &syntax_highlight_actions::mark_text), 224 callout(self.actions, &syntax_highlight_actions::callout); 225 member_action_value<syntax_highlight_actions, std::string const&> 226 do_macro(self.actions, &syntax_highlight_actions::do_macro); 227 error_action error(self.actions.state); 228 229 program = 230 *( (*cl::space_p) [plain_char] 231 >> (line_start | rest_of_line) 232 >> *rest_of_line 233 ) 234 ; 235 236 line_start = 237 preprocessor [span("preprocessor")] 238 ; 239 240 rest_of_line = 241 (+cl::blank_p) [plain_char] 242 | macro 243 | escape 244 | cl::eps_p(ph::var(self.actions.support_callouts)) 245 >> ( line_callout [callout] 246 | inline_callout [callout] 247 ) 248 | comment 249 | keyword [span("keyword")] 250 | identifier [span("identifier")] 251 | special [span("special")] 252 | string_ [span("string")] 253 | char_ [span("char")] 254 | number [span("number")] 255 | ~cl::eps_p(cl::eol_p) 256 >> u8_codepoint_p [unexpected_char] 257 ; 258 259 macro = 260 // must not be followed by alpha or underscore 261 cl::eps_p(self.actions.state.macro 262 >> (cl::eps_p - (cl::alpha_p | '_'))) 263 >> self.actions.state.macro 264 [do_macro] 265 ; 266 267 escape = 268 cl::str_p("``") [pre_escape_back] 269 >> 270 ( 271 ( 272 ( 273 (+(cl::anychar_p - "``") >> cl::eps_p("``")) 274 & g.phrase_start 275 ) 276 >> cl::str_p("``") 277 ) 278 | 279 ( 280 cl::eps_p [error] 281 >> *cl::anychar_p 282 ) 283 ) [post_escape_back] 284 ; 285 286 preprocessor 287 = '#' >> *cl::space_p >> ((cl::alpha_p | '_') >> *(cl::alnum_p | '_')) 288 ; 289 290 inline_callout 291 = cl::confix_p( 292 "/*<" >> *cl::space_p, 293 (*cl::anychar_p) [mark_text], 294 ">*/" 295 ) 296 ; 297 298 line_callout 299 = cl::confix_p( 300 "/*<<" >> *cl::space_p, 301 (*cl::anychar_p) [mark_text], 302 ">>*/" 303 ) 304 >> *cl::space_p 305 ; 306 307 comment 308 = cl::str_p("//") [span_start("comment")] 309 >> *( escape 310 | (+(cl::anychar_p - (cl::eol_p | "``"))) 311 [plain_char] 312 ) 313 >> cl::eps_p [span_end] 314 | cl::str_p("/*") [span_start("comment")] 315 >> *( escape 316 | (+(cl::anychar_p - (cl::str_p("*/") | "``"))) 317 [plain_char] 318 ) 319 >> (!cl::str_p("*/")) [span_end] 320 ; 321 322 keyword 323 = keywords.cpp >> (cl::eps_p - (cl::alnum_p | '_')) 324 ; // make sure we recognize whole words only 325 326 special 327 = +cl::chset_p("~!%^&*()+={[}]:;,<.>?/|\\#-") 328 ; 329 330 string_char = ('\\' >> u8_codepoint_p) | (cl::anychar_p - '\\'); 331 332 string_ 333 = !cl::as_lower_d['l'] >> cl::confix_p('"', *string_char, '"') 334 ; 335 336 char_ 337 = !cl::as_lower_d['l'] >> cl::confix_p('\'', *string_char, '\'') 338 ; 339 340 number 341 = ( 342 cl::as_lower_d["0x"] >> cl::hex_p 343 | '0' >> cl::oct_p 344 | cl::real_p 345 ) 346 >> *cl::as_lower_d[cl::chset_p("ldfu")] 347 ; 348 349 identifier 350 = (cl::alpha_p | '_') >> *(cl::alnum_p | '_') 351 ; 352 353 // clang-format on 354 } 355 356 cl::rule<Scanner> program, line_start, rest_of_line, macro, 357 preprocessor, inline_callout, line_callout, comment, special, 358 string_, char_, number, identifier, keyword, escape, 359 string_char; 360 361 quickbook_grammar& g; 362 startquickbook::cpp_highlight::definition363 cl::rule<Scanner> const& start() const { return program; } 364 }; 365 366 syntax_highlight_actions& actions; 367 }; 368 369 // Grammar for Python highlighting 370 // See also: The Python Reference Manual 371 // http://docs.python.org/ref/ref.html 372 struct python_highlight : public cl::grammar<python_highlight> 373 { python_highlightquickbook::python_highlight374 explicit python_highlight(syntax_highlight_actions& actions_) 375 : actions(actions_) 376 { 377 } 378 379 template <typename Scanner> struct definition 380 { definitionquickbook::python_highlight::definition381 definition(python_highlight const& self) 382 : g(self.actions.state.grammar()) 383 { 384 // clang-format off 385 386 member_action1<syntax_highlight_actions, char const*> 387 span(self.actions, &syntax_highlight_actions::span), 388 span_start(self.actions, &syntax_highlight_actions::span_start); 389 member_action<syntax_highlight_actions> 390 span_end(self.actions, &syntax_highlight_actions::span_end), 391 unexpected_char(self.actions, &syntax_highlight_actions::unexpected_char), 392 plain_char(self.actions, &syntax_highlight_actions::plain_char), 393 pre_escape_back(self.actions, &syntax_highlight_actions::pre_escape_back), 394 post_escape_back(self.actions, &syntax_highlight_actions::post_escape_back), 395 mark_text(self.actions, &syntax_highlight_actions::mark_text), 396 callout(self.actions, &syntax_highlight_actions::callout); 397 member_action_value<syntax_highlight_actions, std::string const&> 398 do_macro(self.actions, &syntax_highlight_actions::do_macro); 399 error_action error(self.actions.state); 400 401 program 402 = 403 *( (+cl::space_p) [plain_char] 404 | macro 405 | escape 406 | cl::eps_p(ph::var(self.actions.support_callouts)) 407 >> ( line_callout [callout] 408 | inline_callout [callout] 409 ) 410 | comment 411 | keyword [span("keyword")] 412 | identifier [span("identifier")] 413 | special [span("special")] 414 | string_ [span("string")] 415 | number [span("number")] 416 | u8_codepoint_p [unexpected_char] 417 ) 418 ; 419 420 macro = 421 // must not be followed by alpha or underscore 422 cl::eps_p(self.actions.state.macro 423 >> (cl::eps_p - (cl::alpha_p | '_'))) 424 >> self.actions.state.macro 425 [do_macro] 426 ; 427 428 escape = 429 cl::str_p("``") [pre_escape_back] 430 >> 431 ( 432 ( 433 ( 434 (+(cl::anychar_p - "``") >> cl::eps_p("``")) 435 & g.phrase_start 436 ) 437 >> cl::str_p("``") 438 ) 439 | 440 ( 441 cl::eps_p [error] 442 >> *cl::anychar_p 443 ) 444 ) [post_escape_back] 445 ; 446 447 inline_callout 448 = "#<" >> *cl::space_p >> 449 (*(cl::anychar_p - cl::eol_p)) [mark_text] 450 ; 451 452 line_callout 453 = cl::confix_p( 454 "#<<" >> *cl::space_p, 455 (*cl::anychar_p) [mark_text], 456 (cl::eol_p | cl::end_p) 457 ) 458 ; 459 460 comment 461 = cl::str_p("#") [span_start("comment")] 462 >> *( escape 463 | (+(cl::anychar_p - (cl::eol_p | "``"))) 464 [plain_char] 465 ) 466 >> cl::eps_p [span_end] 467 ; 468 469 keyword 470 = keywords.python >> (cl::eps_p - (cl::alnum_p | '_')) 471 ; // make sure we recognize whole words only 472 473 special 474 = +cl::chset_p("~!%^&*()+={[}]:;,<.>/|\\-") 475 ; 476 477 string_prefix 478 = cl::as_lower_d[cl::str_p("u") >> ! cl::str_p("r")] 479 ; 480 481 string_ 482 = ! string_prefix >> (long_string | short_string) 483 ; 484 485 string_char = ('\\' >> u8_codepoint_p) | (cl::anychar_p - '\\'); 486 487 short_string 488 = cl::confix_p('\'', * string_char, '\'') | 489 cl::confix_p('"', * string_char, '"') 490 ; 491 492 long_string 493 // Note: the "cl::str_p" on the next two lines work around 494 // an INTERNAL COMPILER ERROR when using VC7.1 495 = cl::confix_p(cl::str_p("'''"), * string_char, "'''") | 496 cl::confix_p(cl::str_p("\"\"\""), * string_char, "\"\"\"") 497 ; 498 499 number 500 = ( 501 cl::as_lower_d["0x"] >> cl::hex_p 502 | '0' >> cl::oct_p 503 | cl::real_p 504 ) 505 >> *cl::as_lower_d[cl::chset_p("lj")] 506 ; 507 508 identifier 509 = (cl::alpha_p | '_') >> *(cl::alnum_p | '_') 510 ; 511 512 // clang-format on 513 } 514 515 cl::rule<Scanner> program, macro, inline_callout, line_callout, 516 comment, special, string_, string_prefix, short_string, 517 long_string, number, identifier, keyword, escape, string_char; 518 519 quickbook_grammar& g; 520 startquickbook::python_highlight::definition521 cl::rule<Scanner> const& start() const { return program; } 522 }; 523 524 syntax_highlight_actions& actions; 525 }; 526 527 // Grammar for plain text (no actual highlighting) 528 struct teletype_highlight : public cl::grammar<teletype_highlight> 529 { teletype_highlightquickbook::teletype_highlight530 teletype_highlight(syntax_highlight_actions& actions_) 531 : actions(actions_) 532 { 533 } 534 535 template <typename Scanner> struct definition 536 { definitionquickbook::teletype_highlight::definition537 definition(teletype_highlight const& self) 538 : g(self.actions.state.grammar()) 539 { 540 member_action<syntax_highlight_actions> plain_char( 541 self.actions, &syntax_highlight_actions::plain_char), 542 pre_escape_back( 543 self.actions, 544 &syntax_highlight_actions::pre_escape_back), 545 post_escape_back( 546 self.actions, 547 &syntax_highlight_actions::post_escape_back); 548 member_action_value< 549 syntax_highlight_actions, std::string const&> 550 do_macro(self.actions, &syntax_highlight_actions::do_macro); 551 error_action error(self.actions.state); 552 553 // clang-format off 554 555 program 556 = 557 *( macro 558 | escape 559 | u8_codepoint_p [plain_char] 560 ) 561 ; 562 563 macro = 564 // must not be followed by alpha or underscore 565 cl::eps_p(self.actions.state.macro 566 >> (cl::eps_p - (cl::alpha_p | '_'))) 567 >> self.actions.state.macro 568 [do_macro] 569 ; 570 571 escape = 572 cl::str_p("``") [pre_escape_back] 573 >> 574 ( 575 ( 576 ( 577 (+(cl::anychar_p - "``") >> cl::eps_p("``")) 578 & g.phrase_start 579 ) 580 >> cl::str_p("``") 581 ) 582 | 583 ( 584 cl::eps_p [error] 585 >> *cl::anychar_p 586 ) 587 ) [post_escape_back] 588 ; 589 590 // clang-format on 591 } 592 593 cl::rule<Scanner> program, macro, escape; 594 595 quickbook_grammar& g; 596 startquickbook::teletype_highlight::definition597 cl::rule<Scanner> const& start() const { return program; } 598 }; 599 600 syntax_highlight_actions& actions; 601 }; 602 syntax_highlight(parse_iterator first,parse_iterator last,quickbook::state & state,source_mode_type source_mode,bool is_block)603 void syntax_highlight( 604 parse_iterator first, 605 parse_iterator last, 606 quickbook::state& state, 607 source_mode_type source_mode, 608 bool is_block) 609 { 610 syntax_highlight_actions syn_actions(state, is_block); 611 612 // print the code with syntax coloring 613 switch (source_mode) { 614 case source_mode_tags::cpp: { 615 cpp_highlight cpp_p(syn_actions); 616 boost::spirit::classic::parse(first, last, cpp_p); 617 break; 618 } 619 case source_mode_tags::python: { 620 python_highlight python_p(syn_actions); 621 boost::spirit::classic::parse(first, last, python_p); 622 break; 623 } 624 case source_mode_tags::teletype: { 625 teletype_highlight teletype_p(syn_actions); 626 boost::spirit::classic::parse(first, last, teletype_p); 627 break; 628 } 629 default: 630 BOOST_ASSERT(0); 631 } 632 } 633 } 634