• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*=============================================================================
2     Copyright (c) 2002 2004 2006 Joel de Guzman
3     Copyright (c) 2004 Eric Niebler
4     http://spirit.sourceforge.net/
5 
6     Use, modification and distribution is subject to the Boost Software
7     License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
8     http://www.boost.org/LICENSE_1_0.txt)
9 =============================================================================*/
10 #include "syntax_highlight.hpp"
11 #include <boost/spirit/include/classic_chset.hpp>
12 #include <boost/spirit/include/classic_confix.hpp>
13 #include <boost/spirit/include/classic_core.hpp>
14 #include <boost/spirit/include/classic_loops.hpp>
15 #include <boost/spirit/include/classic_symbols.hpp>
16 #include "actions.hpp"
17 #include "files.hpp"
18 #include "grammar.hpp"
19 #include "phrase_tags.hpp"
20 #include "state.hpp"
21 #include "stream.hpp"
22 #include "utils.hpp"
23 
24 namespace quickbook
25 {
26     namespace cl = boost::spirit::classic;
27 
28     // Syntax Highlight Actions
29 
30     struct syntax_highlight_actions
31     {
32         quickbook::state& state;
33         do_macro_action do_macro_impl;
34 
35         // State
36         bool support_callouts;
37         quickbook::string_view marked_text;
38 
syntax_highlight_actionsquickbook::syntax_highlight_actions39         syntax_highlight_actions(quickbook::state& state_, bool is_block_)
40             : state(state_)
41             , do_macro_impl(state_)
42             , support_callouts(
43                   is_block_ && (qbk_version_n >= 107u ||
44                                 state.current_file->is_code_snippets))
45             , marked_text()
46         {
47         }
48 
49         void span(parse_iterator, parse_iterator, char const*);
50         void span_start(parse_iterator, parse_iterator, char const*);
51         void span_end(parse_iterator, parse_iterator);
52         void unexpected_char(parse_iterator, parse_iterator);
53         void plain_char(parse_iterator, parse_iterator);
54         void pre_escape_back(parse_iterator, parse_iterator);
55         void post_escape_back(parse_iterator, parse_iterator);
56         void do_macro(std::string const&);
57 
58         void mark_text(parse_iterator, parse_iterator);
59         void callout(parse_iterator, parse_iterator);
60     };
61 
span(parse_iterator first,parse_iterator last,char const * name)62     void syntax_highlight_actions::span(
63         parse_iterator first, parse_iterator last, char const* name)
64     {
65         state.phrase << "<phrase role=\"" << name << "\">";
66         while (first != last)
67             detail::print_char(*first++, state.phrase.get());
68         state.phrase << "</phrase>";
69     }
70 
span_start(parse_iterator first,parse_iterator last,char const * name)71     void syntax_highlight_actions::span_start(
72         parse_iterator first, parse_iterator last, char const* name)
73     {
74         state.phrase << "<phrase role=\"" << name << "\">";
75         while (first != last)
76             detail::print_char(*first++, state.phrase.get());
77     }
78 
span_end(parse_iterator first,parse_iterator last)79     void syntax_highlight_actions::span_end(
80         parse_iterator first, parse_iterator last)
81     {
82         while (first != last)
83             detail::print_char(*first++, state.phrase.get());
84         state.phrase << "</phrase>";
85     }
86 
unexpected_char(parse_iterator first,parse_iterator last)87     void syntax_highlight_actions::unexpected_char(
88         parse_iterator first, parse_iterator last)
89     {
90         file_position const pos = state.current_file->position_of(first.base());
91 
92         detail::outwarn(state.current_file->path, pos.line)
93             << "in column:" << pos.column << ", unexpected character: "
94             << std::string(first.base(), last.base()) << "\n";
95 
96         // print out an unexpected character
97         state.phrase << "<phrase role=\"error\">";
98         while (first != last)
99             detail::print_char(*first++, state.phrase.get());
100         state.phrase << "</phrase>";
101     }
102 
plain_char(parse_iterator first,parse_iterator last)103     void syntax_highlight_actions::plain_char(
104         parse_iterator first, parse_iterator last)
105     {
106         while (first != last)
107             detail::print_char(*first++, state.phrase.get());
108     }
109 
pre_escape_back(parse_iterator,parse_iterator)110     void syntax_highlight_actions::pre_escape_back(
111         parse_iterator, parse_iterator)
112     {
113         state.push_output(); // save the stream
114     }
115 
post_escape_back(parse_iterator,parse_iterator)116     void syntax_highlight_actions::post_escape_back(
117         parse_iterator, parse_iterator)
118     {
119         std::string tmp;
120         state.phrase.swap(tmp);
121         state.pop_output(); // restore the stream
122         state.phrase << tmp;
123     }
124 
do_macro(std::string const & v)125     void syntax_highlight_actions::do_macro(std::string const& v)
126     {
127         do_macro_impl(v);
128     }
129 
mark_text(parse_iterator first,parse_iterator last)130     void syntax_highlight_actions::mark_text(
131         parse_iterator first, parse_iterator last)
132     {
133         marked_text =
134             quickbook::string_view(first.base(), last.base() - first.base());
135     }
136 
callout(parse_iterator,parse_iterator)137     void syntax_highlight_actions::callout(parse_iterator, parse_iterator)
138     {
139         state.phrase << state.add_callout(qbk_value(
140             state.current_file, marked_text.begin(), marked_text.end()));
141         marked_text.clear();
142     }
143 
144     // Syntax
145 
146     struct keywords_holder
147     {
148         cl::symbols<> cpp, python;
149 
keywords_holderquickbook::keywords_holder150         keywords_holder()
151         {
152             // clang-format off
153 
154             cpp
155                     =   "alignas", "alignof", "and_eq", "and", "asm", "auto",
156                         "bitand", "bitor", "bool", "break", "case", "catch",
157                         "char", "char16_t", "char32_t", "class", "compl",
158                         "const", "const_cast", "constexpr", "continue",
159                         "decltype", "default", "delete", "do", "double",
160                         "dynamic_cast",  "else", "enum", "explicit", "export",
161                         "extern", "false", "float", "for", "friend", "goto",
162                         "if", "inline", "int", "long", "mutable", "namespace",
163                         "new", "noexcept", "not_eq", "not", "nullptr",
164                         "operator", "or_eq", "or", "private", "protected",
165                         "public", "register", "reinterpret_cast", "return",
166                         "short", "signed", "sizeof", "static", "static_assert",
167                         "static_cast", "struct", "switch", "template", "this",
168                         "thread_local", "throw", "true", "try", "typedef",
169                         "typeid", "typename", "union", "unsigned", "using",
170                         "virtual", "void", "volatile", "wchar_t", "while",
171                         "xor_eq", "xor"
172                     ;
173 
174             python
175                     =
176                     "and",       "del",       "for",       "is",        "raise",
177                     "assert",    "elif",      "from",      "lambda",    "return",
178                     "break",     "else",      "global",    "not",       "try",
179                     "class",     "except",    "if",        "or",        "while",
180                     "continue",  "exec",      "import",    "pass",      "yield",
181                     "def",       "finally",   "in",        "print",
182 
183                     // Technically "as" and "None" are not yet keywords (at Python
184                     // 2.4). They are destined to become keywords, and we treat them
185                     // as such for syntax highlighting purposes.
186 
187                     "as", "None"
188                     ;
189 
190             // clang-format on
191         }
192     };
193 
194     namespace
195     {
196         keywords_holder keywords;
197     }
198 
199     // Grammar for C++ highlighting
200     struct cpp_highlight : public cl::grammar<cpp_highlight>
201     {
cpp_highlightquickbook::cpp_highlight202         explicit cpp_highlight(syntax_highlight_actions& actions_)
203             : actions(actions_)
204         {
205         }
206 
207         template <typename Scanner> struct definition
208         {
definitionquickbook::cpp_highlight::definition209             definition(cpp_highlight const& self)
210                 : g(self.actions.state.grammar())
211             {
212                 // clang-format off
213 
214                 member_action1<syntax_highlight_actions, char const*>
215                     span(self.actions, &syntax_highlight_actions::span),
216                     span_start(self.actions, &syntax_highlight_actions::span_start);
217                 member_action<syntax_highlight_actions>
218                     span_end(self.actions, &syntax_highlight_actions::span_end),
219                     unexpected_char(self.actions, &syntax_highlight_actions::unexpected_char),
220                     plain_char(self.actions, &syntax_highlight_actions::plain_char),
221                     pre_escape_back(self.actions, &syntax_highlight_actions::pre_escape_back),
222                     post_escape_back(self.actions, &syntax_highlight_actions::post_escape_back),
223                     mark_text(self.actions, &syntax_highlight_actions::mark_text),
224                     callout(self.actions, &syntax_highlight_actions::callout);
225                 member_action_value<syntax_highlight_actions, std::string const&>
226                     do_macro(self.actions, &syntax_highlight_actions::do_macro);
227                 error_action error(self.actions.state);
228 
229                 program =
230                     *(  (*cl::space_p)                  [plain_char]
231                     >>  (line_start | rest_of_line)
232                     >>  *rest_of_line
233                     )
234                     ;
235 
236                 line_start =
237                         preprocessor                    [span("preprocessor")]
238                     ;
239 
240                 rest_of_line =
241                         (+cl::blank_p)                  [plain_char]
242                     |   macro
243                     |   escape
244                     |   cl::eps_p(ph::var(self.actions.support_callouts))
245                     >>  (   line_callout                [callout]
246                         |   inline_callout              [callout]
247                         )
248                     |   comment
249                     |   keyword                         [span("keyword")]
250                     |   identifier                      [span("identifier")]
251                     |   special                         [span("special")]
252                     |   string_                         [span("string")]
253                     |   char_                           [span("char")]
254                     |   number                          [span("number")]
255                     |   ~cl::eps_p(cl::eol_p)
256                     >>  u8_codepoint_p                  [unexpected_char]
257                     ;
258 
259                 macro =
260                     // must not be followed by alpha or underscore
261                     cl::eps_p(self.actions.state.macro
262                         >> (cl::eps_p - (cl::alpha_p | '_')))
263                     >> self.actions.state.macro
264                                                         [do_macro]
265                     ;
266 
267                 escape =
268                     cl::str_p("``")                     [pre_escape_back]
269                     >>
270                     (
271                         (
272                             (
273                                 (+(cl::anychar_p - "``") >> cl::eps_p("``"))
274                                 & g.phrase_start
275                             )
276                             >>  cl::str_p("``")
277                         )
278                         |
279                         (
280                             cl::eps_p                   [error]
281                             >> *cl::anychar_p
282                         )
283                     )                                   [post_escape_back]
284                     ;
285 
286                 preprocessor
287                     =   '#' >> *cl::space_p >> ((cl::alpha_p | '_') >> *(cl::alnum_p | '_'))
288                     ;
289 
290                 inline_callout
291                     =   cl::confix_p(
292                             "/*<" >> *cl::space_p,
293                             (*cl::anychar_p)            [mark_text],
294                             ">*/"
295                         )
296                         ;
297 
298                 line_callout
299                     =   cl::confix_p(
300                             "/*<<" >> *cl::space_p,
301                             (*cl::anychar_p)            [mark_text],
302                             ">>*/"
303                         )
304                     >>  *cl::space_p
305                     ;
306 
307                 comment
308                     =   cl::str_p("//")                 [span_start("comment")]
309                     >>  *(  escape
310                         |   (+(cl::anychar_p - (cl::eol_p | "``")))
311                                                         [plain_char]
312                         )
313                     >>  cl::eps_p                       [span_end]
314                     |   cl::str_p("/*")                 [span_start("comment")]
315                     >>  *(  escape
316                         |   (+(cl::anychar_p - (cl::str_p("*/") | "``")))
317                                                         [plain_char]
318                         )
319                     >>  (!cl::str_p("*/"))              [span_end]
320                     ;
321 
322                 keyword
323                     =   keywords.cpp >> (cl::eps_p - (cl::alnum_p | '_'))
324                     ;   // make sure we recognize whole words only
325 
326                 special
327                     =   +cl::chset_p("~!%^&*()+={[}]:;,<.>?/|\\#-")
328                     ;
329 
330                 string_char = ('\\' >> u8_codepoint_p) | (cl::anychar_p - '\\');
331 
332                 string_
333                     =   !cl::as_lower_d['l'] >> cl::confix_p('"', *string_char, '"')
334                     ;
335 
336                 char_
337                     =   !cl::as_lower_d['l'] >> cl::confix_p('\'', *string_char, '\'')
338                     ;
339 
340                 number
341                     =   (
342                             cl::as_lower_d["0x"] >> cl::hex_p
343                         |   '0' >> cl::oct_p
344                         |   cl::real_p
345                         )
346                         >>  *cl::as_lower_d[cl::chset_p("ldfu")]
347                     ;
348 
349                 identifier
350                     =   (cl::alpha_p | '_') >> *(cl::alnum_p | '_')
351                     ;
352 
353                 // clang-format on
354             }
355 
356             cl::rule<Scanner> program, line_start, rest_of_line, macro,
357                 preprocessor, inline_callout, line_callout, comment, special,
358                 string_, char_, number, identifier, keyword, escape,
359                 string_char;
360 
361             quickbook_grammar& g;
362 
startquickbook::cpp_highlight::definition363             cl::rule<Scanner> const& start() const { return program; }
364         };
365 
366         syntax_highlight_actions& actions;
367     };
368 
369     // Grammar for Python highlighting
370     // See also: The Python Reference Manual
371     // http://docs.python.org/ref/ref.html
372     struct python_highlight : public cl::grammar<python_highlight>
373     {
python_highlightquickbook::python_highlight374         explicit python_highlight(syntax_highlight_actions& actions_)
375             : actions(actions_)
376         {
377         }
378 
379         template <typename Scanner> struct definition
380         {
definitionquickbook::python_highlight::definition381             definition(python_highlight const& self)
382                 : g(self.actions.state.grammar())
383             {
384                 // clang-format off
385 
386                 member_action1<syntax_highlight_actions, char const*>
387                     span(self.actions, &syntax_highlight_actions::span),
388                     span_start(self.actions, &syntax_highlight_actions::span_start);
389                 member_action<syntax_highlight_actions>
390                     span_end(self.actions, &syntax_highlight_actions::span_end),
391                     unexpected_char(self.actions, &syntax_highlight_actions::unexpected_char),
392                     plain_char(self.actions, &syntax_highlight_actions::plain_char),
393                     pre_escape_back(self.actions, &syntax_highlight_actions::pre_escape_back),
394                     post_escape_back(self.actions, &syntax_highlight_actions::post_escape_back),
395                     mark_text(self.actions, &syntax_highlight_actions::mark_text),
396                     callout(self.actions, &syntax_highlight_actions::callout);
397                 member_action_value<syntax_highlight_actions, std::string const&>
398                     do_macro(self.actions, &syntax_highlight_actions::do_macro);
399                 error_action error(self.actions.state);
400 
401                 program
402                     =
403                     *(  (+cl::space_p)                  [plain_char]
404                     |   macro
405                     |   escape
406                     |   cl::eps_p(ph::var(self.actions.support_callouts))
407                     >>  (   line_callout                [callout]
408                         |   inline_callout              [callout]
409                         )
410                     |   comment
411                     |   keyword                         [span("keyword")]
412                     |   identifier                      [span("identifier")]
413                     |   special                         [span("special")]
414                     |   string_                         [span("string")]
415                     |   number                          [span("number")]
416                     |   u8_codepoint_p                  [unexpected_char]
417                     )
418                     ;
419 
420                 macro =
421                     // must not be followed by alpha or underscore
422                     cl::eps_p(self.actions.state.macro
423                         >> (cl::eps_p - (cl::alpha_p | '_')))
424                     >> self.actions.state.macro
425                                                         [do_macro]
426                     ;
427 
428                 escape =
429                     cl::str_p("``")                     [pre_escape_back]
430                     >>
431                     (
432                         (
433                             (
434                                 (+(cl::anychar_p - "``") >> cl::eps_p("``"))
435                                 & g.phrase_start
436                             )
437                             >>  cl::str_p("``")
438                         )
439                         |
440                         (
441                             cl::eps_p                   [error]
442                             >> *cl::anychar_p
443                         )
444                     )                                   [post_escape_back]
445                     ;
446 
447                 inline_callout
448                     =   "#<" >> *cl::space_p >>
449                         (*(cl::anychar_p - cl::eol_p))  [mark_text]
450                     ;
451 
452                 line_callout
453                     =   cl::confix_p(
454                             "#<<" >> *cl::space_p,
455                             (*cl::anychar_p)            [mark_text],
456                             (cl::eol_p | cl::end_p)
457                         )
458                     ;
459 
460                 comment
461                     =   cl::str_p("#")                  [span_start("comment")]
462                     >>  *(  escape
463                         |   (+(cl::anychar_p - (cl::eol_p | "``")))
464                                                         [plain_char]
465                         )
466                     >>  cl::eps_p                       [span_end]
467                     ;
468 
469                 keyword
470                     =   keywords.python >> (cl::eps_p - (cl::alnum_p | '_'))
471                     ;   // make sure we recognize whole words only
472 
473                 special
474                     =   +cl::chset_p("~!%^&*()+={[}]:;,<.>/|\\-")
475                     ;
476 
477                 string_prefix
478                     =    cl::as_lower_d[cl::str_p("u") >> ! cl::str_p("r")]
479                     ;
480 
481                 string_
482                     =   ! string_prefix >> (long_string | short_string)
483                     ;
484 
485                 string_char = ('\\' >> u8_codepoint_p) | (cl::anychar_p - '\\');
486 
487                 short_string
488                     =   cl::confix_p('\'', * string_char, '\'') |
489                         cl::confix_p('"', * string_char, '"')
490                     ;
491 
492                 long_string
493                     // Note: the "cl::str_p" on the next two lines work around
494                     // an INTERNAL COMPILER ERROR when using VC7.1
495                     =   cl::confix_p(cl::str_p("'''"), * string_char, "'''") |
496                         cl::confix_p(cl::str_p("\"\"\""), * string_char, "\"\"\"")
497                     ;
498 
499                 number
500                     =   (
501                             cl::as_lower_d["0x"] >> cl::hex_p
502                         |   '0' >> cl::oct_p
503                         |   cl::real_p
504                         )
505                         >>  *cl::as_lower_d[cl::chset_p("lj")]
506                     ;
507 
508                 identifier
509                     =   (cl::alpha_p | '_') >> *(cl::alnum_p | '_')
510                     ;
511 
512                 // clang-format on
513             }
514 
515             cl::rule<Scanner> program, macro, inline_callout, line_callout,
516                 comment, special, string_, string_prefix, short_string,
517                 long_string, number, identifier, keyword, escape, string_char;
518 
519             quickbook_grammar& g;
520 
startquickbook::python_highlight::definition521             cl::rule<Scanner> const& start() const { return program; }
522         };
523 
524         syntax_highlight_actions& actions;
525     };
526 
527     // Grammar for plain text (no actual highlighting)
528     struct teletype_highlight : public cl::grammar<teletype_highlight>
529     {
teletype_highlightquickbook::teletype_highlight530         teletype_highlight(syntax_highlight_actions& actions_)
531             : actions(actions_)
532         {
533         }
534 
535         template <typename Scanner> struct definition
536         {
definitionquickbook::teletype_highlight::definition537             definition(teletype_highlight const& self)
538                 : g(self.actions.state.grammar())
539             {
540                 member_action<syntax_highlight_actions> plain_char(
541                     self.actions, &syntax_highlight_actions::plain_char),
542                     pre_escape_back(
543                         self.actions,
544                         &syntax_highlight_actions::pre_escape_back),
545                     post_escape_back(
546                         self.actions,
547                         &syntax_highlight_actions::post_escape_back);
548                 member_action_value<
549                     syntax_highlight_actions, std::string const&>
550                     do_macro(self.actions, &syntax_highlight_actions::do_macro);
551                 error_action error(self.actions.state);
552 
553                 // clang-format off
554 
555                 program
556                     =
557                     *(  macro
558                     |   escape
559                     |   u8_codepoint_p                  [plain_char]
560                     )
561                     ;
562 
563                 macro =
564                     // must not be followed by alpha or underscore
565                     cl::eps_p(self.actions.state.macro
566                         >> (cl::eps_p - (cl::alpha_p | '_')))
567                     >> self.actions.state.macro
568                                                         [do_macro]
569                     ;
570 
571                 escape =
572                     cl::str_p("``")                     [pre_escape_back]
573                     >>
574                     (
575                         (
576                             (
577                                 (+(cl::anychar_p - "``") >> cl::eps_p("``"))
578                                 & g.phrase_start
579                             )
580                             >>  cl::str_p("``")
581                         )
582                         |
583                         (
584                             cl::eps_p                   [error]
585                             >> *cl::anychar_p
586                         )
587                     )                                   [post_escape_back]
588                     ;
589 
590                 // clang-format on
591             }
592 
593             cl::rule<Scanner> program, macro, escape;
594 
595             quickbook_grammar& g;
596 
startquickbook::teletype_highlight::definition597             cl::rule<Scanner> const& start() const { return program; }
598         };
599 
600         syntax_highlight_actions& actions;
601     };
602 
syntax_highlight(parse_iterator first,parse_iterator last,quickbook::state & state,source_mode_type source_mode,bool is_block)603     void syntax_highlight(
604         parse_iterator first,
605         parse_iterator last,
606         quickbook::state& state,
607         source_mode_type source_mode,
608         bool is_block)
609     {
610         syntax_highlight_actions syn_actions(state, is_block);
611 
612         // print the code with syntax coloring
613         switch (source_mode) {
614         case source_mode_tags::cpp: {
615             cpp_highlight cpp_p(syn_actions);
616             boost::spirit::classic::parse(first, last, cpp_p);
617             break;
618         }
619         case source_mode_tags::python: {
620             python_highlight python_p(syn_actions);
621             boost::spirit::classic::parse(first, last, python_p);
622             break;
623         }
624         case source_mode_tags::teletype: {
625             teletype_highlight teletype_p(syn_actions);
626             boost::spirit::classic::parse(first, last, teletype_p);
627             break;
628         }
629         default:
630             BOOST_ASSERT(0);
631         }
632     }
633 }
634