• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*=============================================================================
2     Boost.Wave: A Standard compliant C++ preprocessor library
3 
4     Xpressive based C++ lexer
5 
6     http://www.boost.org/
7 
8     Copyright (c) 2001-2010 Hartmut Kaiser. Distributed under the Boost
9     Software License, Version 1.0. (See accompanying file
10     LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
11 =============================================================================*/
12 
13 #if !defined(BOOST_XLEX_LEXER_HPP)
14 #define BOOST_XLEX_LEXER_HPP
15 
16 #include <string>
17 #include <cstdio>
18 #include <cstdarg>
19 #if defined(BOOST_SPIRIT_DEBUG)
20 #include <iostream>
21 #endif // defined(BOOST_SPIRIT_DEBUG)
22 
23 #include <boost/concept_check.hpp>
24 #include <boost/assert.hpp>
25 #include <boost/spirit/include/classic_core.hpp>
26 
27 #include <boost/wave/token_ids.hpp>
28 #include <boost/wave/language_support.hpp>
29 #include <boost/wave/util/file_position.hpp>
30 #include <boost/wave/cpplexer/validate_universal_char.hpp>
31 #include <boost/wave/cpplexer/cpplexer_exceptions.hpp>
32 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
33 #include <boost/wave/cpplexer/detect_include_guards.hpp>
34 #endif
35 #include <boost/wave/cpplexer/cpp_lex_interface.hpp>
36 
37 // reuse the default token type
38 #include "../xlex_interface.hpp"
39 
40 // include the xpressive headers
41 #include "xpressive_lexer.hpp"
42 
43 ///////////////////////////////////////////////////////////////////////////////
44 namespace boost {
45 namespace wave {
46 namespace cpplexer {
47 namespace xlex {
48 namespace lexer {
49 
50 ///////////////////////////////////////////////////////////////////////////////
51 //
52 //  encapsulation of the xpressive based C++ lexer
53 //
54 ///////////////////////////////////////////////////////////////////////////////
55 
56 template <
57     typename Iterator,
58     typename Position = boost::wave::util::file_position_type
59 >
60 class lexer
61 {
62 public:
63     typedef char                                        char_type;
64     typedef boost::wave::cpplexer::lex_token<Position>  token_type;
65     typedef typename token_type::string_type            string_type;
66 
67     lexer(Iterator const &first, Iterator const &last,
68         Position const &pos, boost::wave::language_support language);
~lexer()69     ~lexer() {}
70 
71     token_type& get(token_type& t);
set_position(Position const & pos)72     void set_position(Position const &pos)
73     {
74         filename = pos.get_file();
75         line = pos.get_line();
76         column = pos.get_column();
77     }
78 
79 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
has_include_guards(std::string & guard_name) const80     bool has_include_guards(std::string& guard_name) const
81         { return guards.detected(guard_name); }
82 #endif
83 
84 private:
85     typedef xpressive_lexer<Iterator, token_id> lexer_type;
86     typedef typename lexer_type::callback_type callback_type;
87 
88     lexer_type xlexer;
89     Iterator first;
90     Iterator last;
91 
92     string_type filename;
93     int line;
94     int column;
95     bool at_eof;
96     boost::wave::language_support language;
97 
98 // initialization data (regular expressions for the token definitions)
99     struct lexer_data {
100         token_id tokenid;                 // token data
101         char_type const *tokenregex;      // associated token to match
102         callback_type tokencb;            // associated callback function
103     };
104 
105     static lexer_data const init_data[];        // common patterns
106     static lexer_data const init_data_cpp[];    // C++ only patterns
107 
108 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
109     boost::wave::cpplexer::include_guards<token_type> guards;
110 #endif
111 };
112 
113 ///////////////////////////////////////////////////////////////////////////////
114 //  helper for initializing token data
115 #define TOKEN_DATA(id, regex) \
116     { id, regex, 0 }
117 
118 #define TOKEN_DATA_EX(id, regex, callback) \
119     { id, regex, callback }
120 
121 ///////////////////////////////////////////////////////////////////////////////
122 //  data required for initialization of the lexer (token definitions)
123 #define OR                  "|"
124 #define Q(c)                "\\" c
125 #define TRI(c)              Q("?") Q("?") c
126 
127 // definition of some subtoken regexps to simplify the regex definitions
128 #define BLANK               "[ \t\v\f]"
129 #define CCOMMENT            Q("/") Q("*") ".*?" Q("*") Q("/")
130 
131 #define PPSPACE             "(" BLANK OR CCOMMENT ")*"
132 
133 #define OCTALDIGIT          "[0-7]"
134 #define DIGIT               "[0-9]"
135 #define HEXDIGIT            "[0-9a-fA-F]"
136 #define SIGN                "[-+]?"
137 #define EXPONENT            "(" "[eE]" SIGN "[0-9]+" ")"
138 
139 #define INTEGER             "(" \
140                                 "(0x|0X)" HEXDIGIT "+" OR \
141                                 "0" OCTALDIGIT "*" OR \
142                                 "[1-9]" DIGIT "*" \
143                             ")"
144 
145 #define INTEGER_SUFFIX      "(" "[uU][lL]?|[lL][uU]?" ")"
146 #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
147 #define LONGINTEGER_SUFFIX  "(" "[uU]" "(" "ll" OR "LL" ")" OR \
148                                 "(" "ll" OR "LL" ")" "[uU]" "?" OR \
149                                 "i64" \
150                             ")"
151 #else
152 #define LONGINTEGER_SUFFIX  "(" "[uU]" "(" "ll" OR "LL" ")" OR \
153                             "(" "ll" OR "LL" ")" "[uU]" "?" ")"
154 #endif
155 #define FLOAT_SUFFIX        "(" "[fF][lL]?|[lL][fF]?" ")"
156 #define CHAR_SPEC           "L?"
157 
158 #define BACKSLASH           "(" Q("\\") OR TRI(Q("/")) ")"
159 #define ESCAPESEQ           BACKSLASH "(" \
160                                 "[abfnrtv?'\"]" OR \
161                                 BACKSLASH OR \
162                                 "x" HEXDIGIT "+" OR \
163                                 OCTALDIGIT OCTALDIGIT "?" OCTALDIGIT "?" \
164                             ")"
165 #define HEXQUAD             HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT
166 #define UNIVERSALCHAR       BACKSLASH "(" \
167                                 "u" HEXQUAD OR \
168                                 "U" HEXQUAD HEXQUAD \
169                             ")"
170 
171 #define POUNDDEF            "(" "#" OR TRI("=") OR Q("%:") ")"
172 #define NEWLINEDEF          "(" "\n" OR "\r\n" OR "\r" ")"
173 
174 #if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
175 #define INCLUDEDEF          "(include_next|include)"
176 #else
177 #define INCLUDEDEF          "include"
178 #endif
179 
180 ///////////////////////////////////////////////////////////////////////////////
181 // common C++/C99 token definitions
182 template <typename Iterator, typename Position>
183 typename lexer<Iterator, Position>::lexer_data const
184 lexer<Iterator, Position>::init_data[] =
185 {
186     TOKEN_DATA(T_CCOMMENT, CCOMMENT),
187     TOKEN_DATA(T_CPPCOMMENT, Q("/") Q("/.*?") NEWLINEDEF ),
188     TOKEN_DATA(T_CHARLIT, CHAR_SPEC "'"
189                 "(" ESCAPESEQ OR "[^\n\r']" OR UNIVERSALCHAR ")+" "'"),
190     TOKEN_DATA(T_STRINGLIT, CHAR_SPEC Q("\"")
191                 "(" ESCAPESEQ OR "[^\n\r\"]" OR UNIVERSALCHAR ")*" Q("\"")),
192     TOKEN_DATA(T_ANDAND, "&&"),
193     TOKEN_DATA(T_ANDASSIGN, "&="),
194     TOKEN_DATA(T_AND, "&"),
195     TOKEN_DATA(T_EQUAL, "=="),
196     TOKEN_DATA(T_ASSIGN, "="),
197     TOKEN_DATA(T_ORASSIGN, Q("|=")),
198     TOKEN_DATA(T_ORASSIGN_TRIGRAPH, TRI("!=")),
199     TOKEN_DATA(T_OROR, Q("|") Q("|")),
200     TOKEN_DATA(T_OROR_TRIGRAPH, TRI("!") Q("|") OR Q("|") TRI("!") OR TRI("!") TRI("!")),
201     TOKEN_DATA(T_OR, Q("|")),
202     TOKEN_DATA(T_OR_TRIGRAPH, TRI("!")),
203     TOKEN_DATA(T_XORASSIGN, Q("^=")),
204     TOKEN_DATA(T_XORASSIGN_TRIGRAPH, TRI("'=")),
205     TOKEN_DATA(T_XOR, Q("^")),
206     TOKEN_DATA(T_XOR_TRIGRAPH, TRI("'")),
207     TOKEN_DATA(T_COMMA, ","),
208     TOKEN_DATA(T_RIGHTBRACKET_ALT, ":>"),
209     TOKEN_DATA(T_COLON, ":"),
210     TOKEN_DATA(T_DIVIDEASSIGN, Q("/=")),
211     TOKEN_DATA(T_DIVIDE, Q("/")),
212     TOKEN_DATA(T_ELLIPSIS, Q(".") Q(".") Q(".")),
213     TOKEN_DATA(T_SHIFTRIGHTASSIGN, ">>="),
214     TOKEN_DATA(T_SHIFTRIGHT, ">>"),
215     TOKEN_DATA(T_GREATEREQUAL, ">="),
216     TOKEN_DATA(T_GREATER, ">"),
217     TOKEN_DATA(T_LEFTBRACE, Q("{")),
218     TOKEN_DATA(T_SHIFTLEFTASSIGN, "<<="),
219     TOKEN_DATA(T_SHIFTLEFT, "<<"),
220     TOKEN_DATA(T_LEFTBRACE_ALT, "<" Q("%")),
221     TOKEN_DATA(T_LESSEQUAL, "<="),
222     TOKEN_DATA(T_LEFTBRACKET_ALT, "<:"),
223     TOKEN_DATA(T_LESS, "<"),
224     TOKEN_DATA(T_LEFTBRACE_TRIGRAPH, TRI("<")),
225     TOKEN_DATA(T_LEFTPAREN, Q("(")),
226     TOKEN_DATA(T_LEFTBRACKET, Q("[")),
227     TOKEN_DATA(T_LEFTBRACKET_TRIGRAPH, TRI(Q("("))),
228     TOKEN_DATA(T_MINUSMINUS, Q("-") Q("-")),
229     TOKEN_DATA(T_MINUSASSIGN, Q("-=")),
230     TOKEN_DATA(T_ARROW, Q("->")),
231     TOKEN_DATA(T_MINUS, Q("-")),
232     TOKEN_DATA(T_POUND_POUND_ALT, Q("%:") Q("%:")),
233     TOKEN_DATA(T_PERCENTASSIGN, Q("%=")),
234     TOKEN_DATA(T_RIGHTBRACE_ALT, Q("%>")),
235     TOKEN_DATA(T_POUND_ALT, Q("%:")),
236     TOKEN_DATA(T_PERCENT, Q("%")),
237     TOKEN_DATA(T_NOTEQUAL, "!="),
238     TOKEN_DATA(T_NOT, "!"),
239     TOKEN_DATA(T_PLUSASSIGN, Q("+=")),
240     TOKEN_DATA(T_PLUSPLUS, Q("+") Q("+")),
241     TOKEN_DATA(T_PLUS, Q("+")),
242     TOKEN_DATA(T_RIGHTBRACE, Q("}")),
243     TOKEN_DATA(T_RIGHTBRACE_TRIGRAPH, TRI(">")),
244     TOKEN_DATA(T_RIGHTPAREN, Q(")")),
245     TOKEN_DATA(T_RIGHTBRACKET, Q("]")),
246     TOKEN_DATA(T_RIGHTBRACKET_TRIGRAPH, TRI(Q(")"))),
247     TOKEN_DATA(T_SEMICOLON, ";"),
248     TOKEN_DATA(T_STARASSIGN, Q("*=")),
249     TOKEN_DATA(T_STAR, Q("*")),
250     TOKEN_DATA(T_COMPL, Q("~")),
251     TOKEN_DATA(T_COMPL_TRIGRAPH, TRI("-")),
252     TOKEN_DATA(T_ASM, "asm"),
253     TOKEN_DATA(T_AUTO, "auto"),
254     TOKEN_DATA(T_BOOL, "bool"),
255     TOKEN_DATA(T_FALSE, "false"),
256     TOKEN_DATA(T_TRUE, "true"),
257     TOKEN_DATA(T_BREAK, "break"),
258     TOKEN_DATA(T_CASE, "case"),
259     TOKEN_DATA(T_CATCH, "catch"),
260     TOKEN_DATA(T_CHAR, "char"),
261     TOKEN_DATA(T_CLASS, "class"),
262     TOKEN_DATA(T_CONSTCAST, "const_cast"),
263     TOKEN_DATA(T_CONST, "const"),
264     TOKEN_DATA(T_CONTINUE, "continue"),
265     TOKEN_DATA(T_DEFAULT, "default"),
266     TOKEN_DATA(T_DELETE, "delete"),
267     TOKEN_DATA(T_DOUBLE, "double"),
268     TOKEN_DATA(T_DO, "do"),
269     TOKEN_DATA(T_DYNAMICCAST, "dynamic_cast"),
270     TOKEN_DATA(T_ELSE, "else"),
271     TOKEN_DATA(T_ENUM, "enum"),
272     TOKEN_DATA(T_EXPLICIT, "explicit"),
273     TOKEN_DATA(T_EXPORT, "export"),
274     TOKEN_DATA(T_EXTERN, "extern"),
275     TOKEN_DATA(T_FLOAT, "float"),
276     TOKEN_DATA(T_FOR, "for"),
277     TOKEN_DATA(T_FRIEND, "friend"),
278     TOKEN_DATA(T_GOTO, "goto"),
279     TOKEN_DATA(T_IF, "if"),
280     TOKEN_DATA(T_INLINE, "inline"),
281     TOKEN_DATA(T_INT, "int"),
282     TOKEN_DATA(T_LONG, "long"),
283     TOKEN_DATA(T_MUTABLE, "mutable"),
284     TOKEN_DATA(T_NAMESPACE, "namespace"),
285     TOKEN_DATA(T_NEW, "new"),
286     TOKEN_DATA(T_OPERATOR, "operator"),
287     TOKEN_DATA(T_PRIVATE, "private"),
288     TOKEN_DATA(T_PROTECTED, "protected"),
289     TOKEN_DATA(T_PUBLIC, "public"),
290     TOKEN_DATA(T_REGISTER, "register"),
291     TOKEN_DATA(T_REINTERPRETCAST, "reinterpret_cast"),
292     TOKEN_DATA(T_RETURN, "return"),
293     TOKEN_DATA(T_SHORT, "short"),
294     TOKEN_DATA(T_SIGNED, "signed"),
295     TOKEN_DATA(T_SIZEOF, "sizeof"),
296     TOKEN_DATA(T_STATICCAST, "static_cast"),
297     TOKEN_DATA(T_STATIC, "static"),
298     TOKEN_DATA(T_STRUCT, "struct"),
299     TOKEN_DATA(T_SWITCH, "switch"),
300     TOKEN_DATA(T_TEMPLATE, "template"),
301     TOKEN_DATA(T_THIS, "this"),
302     TOKEN_DATA(T_THROW, "throw"),
303     TOKEN_DATA(T_TRY, "try"),
304     TOKEN_DATA(T_TYPEDEF, "typedef"),
305     TOKEN_DATA(T_TYPEID, "typeid"),
306     TOKEN_DATA(T_TYPENAME, "typename"),
307     TOKEN_DATA(T_UNION, "union"),
308     TOKEN_DATA(T_UNSIGNED, "unsigned"),
309     TOKEN_DATA(T_USING, "using"),
310     TOKEN_DATA(T_VIRTUAL, "virtual"),
311     TOKEN_DATA(T_VOID, "void"),
312     TOKEN_DATA(T_VOLATILE, "volatile"),
313     TOKEN_DATA(T_WCHART, "wchar_t"),
314     TOKEN_DATA(T_WHILE, "while"),
315 #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
316     TOKEN_DATA(T_MSEXT_INT8, "__int8"),
317     TOKEN_DATA(T_MSEXT_INT16, "__int16"),
318     TOKEN_DATA(T_MSEXT_INT32, "__int32"),
319     TOKEN_DATA(T_MSEXT_INT64, "__int64"),
320     TOKEN_DATA(T_MSEXT_BASED, "_?" "_based"),
321     TOKEN_DATA(T_MSEXT_DECLSPEC, "_?" "_declspec"),
322     TOKEN_DATA(T_MSEXT_CDECL, "_?" "_cdecl"),
323     TOKEN_DATA(T_MSEXT_FASTCALL, "_?" "_fastcall"),
324     TOKEN_DATA(T_MSEXT_STDCALL, "_?" "_stdcall"),
325     TOKEN_DATA(T_MSEXT_TRY , "__try"),
326     TOKEN_DATA(T_MSEXT_EXCEPT, "__except"),
327     TOKEN_DATA(T_MSEXT_FINALLY, "__finally"),
328     TOKEN_DATA(T_MSEXT_LEAVE, "__leave"),
329     TOKEN_DATA(T_MSEXT_INLINE, "_?" "_inline"),
330     TOKEN_DATA(T_MSEXT_ASM, "_?" "_asm"),
331     TOKEN_DATA(T_MSEXT_PP_REGION, POUNDDEF PPSPACE "region"),
332     TOKEN_DATA(T_MSEXT_PP_ENDREGION, POUNDDEF PPSPACE "endregion"),
333 #endif // BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
334     TOKEN_DATA(T_PP_DEFINE, POUNDDEF PPSPACE "define"),
335     TOKEN_DATA(T_PP_IFDEF, POUNDDEF PPSPACE "ifdef"),
336     TOKEN_DATA(T_PP_IFNDEF, POUNDDEF PPSPACE "ifndef"),
337     TOKEN_DATA(T_PP_IF, POUNDDEF PPSPACE "if"),
338     TOKEN_DATA(T_PP_ELSE, POUNDDEF PPSPACE "else"),
339     TOKEN_DATA(T_PP_ELIF, POUNDDEF PPSPACE "elif"),
340     TOKEN_DATA(T_PP_ENDIF, POUNDDEF PPSPACE "endif"),
341     TOKEN_DATA(T_PP_ERROR, POUNDDEF PPSPACE "error"),
342     TOKEN_DATA(T_PP_QHEADER, POUNDDEF PPSPACE \
343         INCLUDEDEF PPSPACE Q("\"") "[^\n\r\"]+" Q("\"")),
344     TOKEN_DATA(T_PP_HHEADER, POUNDDEF PPSPACE \
345         INCLUDEDEF PPSPACE "<" "[^\n\r>]+" ">"),
346     TOKEN_DATA(T_PP_INCLUDE, POUNDDEF PPSPACE \
347         INCLUDEDEF PPSPACE),
348     TOKEN_DATA(T_PP_LINE, POUNDDEF PPSPACE "line"),
349     TOKEN_DATA(T_PP_PRAGMA, POUNDDEF PPSPACE "pragma"),
350     TOKEN_DATA(T_PP_UNDEF, POUNDDEF PPSPACE "undef"),
351     TOKEN_DATA(T_PP_WARNING, POUNDDEF PPSPACE "warning"),
352     TOKEN_DATA(T_FLOATLIT,
353         "(" DIGIT "*" Q(".") DIGIT "+" OR DIGIT "+" Q(".") ")"
354         EXPONENT "?" FLOAT_SUFFIX "?" OR
355         DIGIT "+" EXPONENT FLOAT_SUFFIX "?"),
356     TOKEN_DATA(T_LONGINTLIT, INTEGER LONGINTEGER_SUFFIX),
357     TOKEN_DATA(T_INTLIT, INTEGER INTEGER_SUFFIX "?"),
358 #if BOOST_WAVE_USE_STRICT_LEXER != 0
359     TOKEN_DATA(T_IDENTIFIER, "([a-zA-Z_]" OR UNIVERSALCHAR ")([a-zA-Z0-9_]" OR UNIVERSALCHAR ")*"),
360 #else
361     TOKEN_DATA(T_IDENTIFIER, "([a-zA-Z_$]" OR UNIVERSALCHAR ")([a-zA-Z0-9_$]" OR UNIVERSALCHAR ")*"),
362 #endif
363     TOKEN_DATA(T_SPACE, BLANK "+"),
364     TOKEN_DATA(T_CONTLINE, Q("\\") "\n"),
365     TOKEN_DATA(T_NEWLINE, NEWLINEDEF),
366     TOKEN_DATA(T_POUND_POUND, "##"),
367     TOKEN_DATA(T_POUND_POUND_TRIGRAPH, TRI("=") TRI("=")),
368     TOKEN_DATA(T_POUND, "#"),
369     TOKEN_DATA(T_POUND_TRIGRAPH, TRI("=")),
370     TOKEN_DATA(T_ANY_TRIGRAPH, TRI(Q("/"))),
371     TOKEN_DATA(T_QUESTION_MARK, Q("?")),
372     TOKEN_DATA(T_DOT, Q(".")),
373     TOKEN_DATA(T_ANY, "."),
374     { token_id(0) }       // this should be the last entry
375 };
376 
377 ///////////////////////////////////////////////////////////////////////////////
378 // C++ only token definitions
379 template <typename Iterator, typename Position>
380 typename lexer<Iterator, Position>::lexer_data const
381 lexer<Iterator, Position>::init_data_cpp[] =
382 {
383     TOKEN_DATA(T_AND_ALT, "bitand"),
384     TOKEN_DATA(T_ANDASSIGN_ALT, "and_eq"),
385     TOKEN_DATA(T_ANDAND_ALT, "and"),
386     TOKEN_DATA(T_OR_ALT, "bitor"),
387     TOKEN_DATA(T_ORASSIGN_ALT, "or_eq"),
388     TOKEN_DATA(T_OROR_ALT, "or"),
389     TOKEN_DATA(T_XORASSIGN_ALT, "xor_eq"),
390     TOKEN_DATA(T_XOR_ALT, "xor"),
391     TOKEN_DATA(T_NOTEQUAL_ALT, "not_eq"),
392     TOKEN_DATA(T_NOT_ALT, "not"),
393     TOKEN_DATA(T_COMPL_ALT, "compl"),
394     TOKEN_DATA(T_ARROWSTAR, Q("->") Q("*")),
395     TOKEN_DATA(T_DOTSTAR, Q(".") Q("*")),
396     TOKEN_DATA(T_COLON_COLON, "::"),
397     { token_id(0) }       // this should be the last entry
398 };
399 
400 ///////////////////////////////////////////////////////////////////////////////
401 //  undefine macros, required for regular expression definitions
402 #undef INCLUDEDEF
403 #undef POUNDDEF
404 #undef CCOMMENT
405 #undef PPSPACE
406 #undef DIGIT
407 #undef OCTALDIGIT
408 #undef HEXDIGIT
409 #undef SIGN
410 #undef EXPONENT
411 #undef LONGINTEGER_SUFFIX
412 #undef INTEGER_SUFFIX
413 #undef INTEGER
414 #undef FLOAT_SUFFIX
415 #undef CHAR_SPEC
416 #undef BACKSLASH
417 #undef ESCAPESEQ
418 #undef HEXQUAD
419 #undef UNIVERSALCHAR
420 
421 #undef Q
422 #undef TRI
423 #undef OR
424 
425 #undef TOKEN_DATA
426 #undef TOKEN_DATA_EX
427 
428 ///////////////////////////////////////////////////////////////////////////////
429 // initialize cpp lexer
430 template <typename Iterator, typename Position>
431 inline
lexer(Iterator const & first,Iterator const & last,Position const & pos,boost::wave::language_support language)432 lexer<Iterator, Position>::lexer(Iterator const &first,
433         Iterator const &last, Position const &pos,
434         boost::wave::language_support language)
435 :   first(first), last(last),
436     filename(pos.get_file()), line(pos.get_line()), column(pos.get_column()),
437     at_eof(false), language(language)
438 {
439 // if in C99 mode, some of the keywords/operators are not valid
440     if (!boost::wave::need_c99(language)) {
441         for (int j = 0; 0 != init_data_cpp[j].tokenid; ++j) {
442             xlexer.register_regex(init_data_cpp[j].tokenregex,
443                 init_data_cpp[j].tokenid, init_data_cpp[j].tokencb);
444         }
445     }
446 
447 // tokens valid for C++ and C99
448     for (int i = 0; 0 != init_data[i].tokenid; ++i) {
449         xlexer.register_regex(init_data[i].tokenregex, init_data[i].tokenid,
450             init_data[i].tokencb);
451     }
452 }
453 
454 ///////////////////////////////////////////////////////////////////////////////
455 //  get the next token from the input stream
456 template <typename Iterator, typename Position>
457 inline boost::wave::cpplexer::lex_token<Position>&
get(boost::wave::cpplexer::lex_token<Position> & t)458 lexer<Iterator, Position>::get(boost::wave::cpplexer::lex_token<Position>& t)
459 {
460     using namespace boost::wave;    // to import token ids to this scope
461 
462     if (at_eof)
463         return t = cpplexer::lex_token<Position>();  // return T_EOI
464 
465     std::string tokval;
466     token_id id = xlexer.next_token(first, last, tokval);
467     string_type value = tokval.c_str();
468 
469     if ((token_id)(-1) == id)
470         id = T_EOF;     // end of input reached
471 
472     if (T_IDENTIFIER == id) {
473     // test identifier characters for validity (throws if invalid chars found)
474         if (!boost::wave::need_no_character_validation(language)) {
475             cpplexer::impl::validate_identifier_name(value, line, -1, filename);
476         }
477     }
478     else if (T_STRINGLIT == id || T_CHARLIT == id) {
479     // test literal characters for validity (throws if invalid chars found)
480         if (!boost::wave::need_no_character_validation(language)) {
481             cpplexer::impl::validate_literal(value, line, -1, filename);
482         }
483     }
484     else if (T_EOF == id) {
485     // T_EOF is returned as a valid token, the next call will return T_EOI,
486     // i.e. the actual end of input
487         at_eof = true;
488         value.clear();
489     }
490     else if (T_NEWLINE == id) {
491         ++line;
492         column = 1;
493     } else {
494         column += value.size();
495     }
496 
497 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
498     cpplexer::lex_token<Position> tok(id, value, Position(filename, line, column));
499     return t = guards.detect_guard(tok);
500 #else
501     return t = cpplexer::lex_token<Position>(id, value,
502         Position(filename, line, column));
503 #endif
504 }
505 
506 ///////////////////////////////////////////////////////////////////////////////
507 //
508 //  lex_functor
509 //
510 ///////////////////////////////////////////////////////////////////////////////
511 template <
512     typename Iterator,
513     typename Position = boost::wave::util::file_position_type
514 >
515 class xlex_functor
516 :   public xlex_input_interface<typename lexer<Iterator, Position>::token_type>
517 {
518 public:
519 
520     typedef typename lexer<Iterator, Position>::token_type   token_type;
521 
xlex_functor(Iterator const & first,Iterator const & last,Position const & pos,boost::wave::language_support language)522     xlex_functor(Iterator const &first, Iterator const &last,
523             Position const &pos, boost::wave::language_support language)
524     :   lexer_(first, last, pos, language)
525     {}
~xlex_functor()526     virtual ~xlex_functor() {}
527 
528 // get the next token from the input stream
get(token_type & t)529     token_type& get(token_type& t) BOOST_OVERRIDE { return lexer_.get(t); }
set_position(Position const & pos)530     void set_position(Position const &pos) BOOST_OVERRIDE { lexer_.set_position(pos); }
531 
532 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
has_include_guards(std::string & guard_name) const533     bool has_include_guards(std::string& guard_name) const BOOST_OVERRIDE
534         { return lexer_.has_include_guards(guard_name); }
535 #endif
536 
537 private:
538     lexer<Iterator, Position> lexer_;
539 };
540 
541 }   // namespace lexer
542 
543 ///////////////////////////////////////////////////////////////////////////////
544 //
545 //  The new_lexer_gen<>::new_lexer function (declared in cpp_slex_token.hpp)
546 //  should be defined inline, if the lex_functor shouldn't be instantiated
547 //  separately from the lex_iterator.
548 //
549 //  Separate (explicit) instantiation helps to reduce compilation time.
550 //
551 ///////////////////////////////////////////////////////////////////////////////
552 
553 #if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION != 0
554 #define BOOST_WAVE_XLEX_NEW_LEXER_INLINE
555 #else
556 #define BOOST_WAVE_XLEX_NEW_LEXER_INLINE inline
557 #endif
558 
559 ///////////////////////////////////////////////////////////////////////////////
560 //
561 //  The 'new_lexer' function allows the opaque generation of a new lexer object.
562 //  It is coupled to the iterator type to allow to decouple the lexer/iterator
563 //  configurations at compile time.
564 //
565 //  This function is declared inside the xlex_interface.hpp file, which is
566 //  referenced by the source file calling the lexer and the source file, which
567 //  instantiates the lex_functor. But it is defined here, so it will be
568 //  instantiated only while compiling the source file, which instantiates the
569 //  lex_functor. While the xlex_interface.hpp file may be included everywhere,
570 //  this file (xlex_lexer.hpp) should be included only once. This allows
571 //  to decouple the lexer interface from the lexer implementation and reduces
572 //  compilation time.
573 //
574 ///////////////////////////////////////////////////////////////////////////////
575 
576 template <typename Iterator, typename Position>
577 BOOST_WAVE_XLEX_NEW_LEXER_INLINE
578 lex_input_interface<boost::wave::cpplexer::lex_token<Position> > *
new_lexer(Iterator const & first,Iterator const & last,Position const & pos,wave::language_support language)579 new_lexer_gen<Iterator, Position>::new_lexer(Iterator const &first,
580     Iterator const &last, Position const &pos,
581     wave::language_support language)
582 {
583     return new lexer::xlex_functor<Iterator, Position>(
584         first, last, pos, language);
585 }
586 
587 #undef BOOST_WAVE_XLEX_NEW_LEXER_INLINE
588 
589 ///////////////////////////////////////////////////////////////////////////////
590 }   // namespace xlex
591 }   // namespace cpplexer
592 }   // namespace wave
593 }   // namespace boost
594 
595 #endif // !defined(BOOST_XLEX_LEXER_HPP)
596