• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*=============================================================================
2     Boost.Wave: A Standard compliant C++ preprocessor library
3 
4     Re2C based C++ lexer
5 
6     http://www.boost.org/
7 
8     Copyright (c) 2001-2012 Hartmut Kaiser. Distributed under the Boost
9     Software License, Version 1.0. (See accompanying file
10     LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
11 =============================================================================*/
12 
13 #if !defined(BOOST_CPP_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED)
14 #define BOOST_CPP_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED
15 
16 #include <string>
17 #include <cstdio>
18 #include <cstdarg>
19 #if defined(BOOST_SPIRIT_DEBUG)
20 #include <iostream>
21 #endif // defined(BOOST_SPIRIT_DEBUG)
22 
23 #include <boost/concept_check.hpp>
24 #include <boost/assert.hpp>
25 #include <boost/spirit/include/classic_core.hpp>
26 
27 #include <boost/wave/wave_config.hpp>
28 #include <boost/wave/language_support.hpp>
29 #include <boost/wave/token_ids.hpp>
30 #include <boost/wave/util/file_position.hpp>
31 #include <boost/wave/cpplexer/validate_universal_char.hpp>
32 #include <boost/wave/cpplexer/cpplexer_exceptions.hpp>
33 #include <boost/wave/cpplexer/token_cache.hpp>
34 #include <boost/wave/cpplexer/convert_trigraphs.hpp>
35 
36 #include <boost/wave/cpplexer/cpp_lex_interface.hpp>
37 #include <boost/wave/cpplexer/re2clex/scanner.hpp>
38 #include <boost/wave/cpplexer/re2clex/cpp_re.hpp>
39 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
40 #include <boost/wave/cpplexer/detect_include_guards.hpp>
41 #endif
42 
43 #include <boost/wave/cpplexer/cpp_lex_interface_generator.hpp>
44 
45 // this must occur after all of the includes and before any code appears
46 #ifdef BOOST_HAS_ABI_HEADERS
47 #include BOOST_ABI_PREFIX
48 #endif
49 
50 ///////////////////////////////////////////////////////////////////////////////
51 namespace boost {
52 namespace wave {
53 namespace cpplexer {
54 namespace re2clex {
55 
56 ///////////////////////////////////////////////////////////////////////////////
57 //
58 //  encapsulation of the re2c based cpp lexer
59 //
60 ///////////////////////////////////////////////////////////////////////////////
61 
62 template <typename IteratorT,
63     typename PositionT = boost::wave::util::file_position_type,
64     typename TokenT = lex_token<PositionT> >
65 class lexer
66 {
67 public:
68     typedef TokenT token_type;
69     typedef typename token_type::string_type  string_type;
70 
71     lexer(IteratorT const &first, IteratorT const &last,
72         PositionT const &pos, boost::wave::language_support language_);
73     ~lexer();
74 
75     token_type& get(token_type&);
set_position(PositionT const & pos)76     void set_position(PositionT const &pos)
77     {
78         // set position has to change the file name and line number only
79         filename = pos.get_file();
80         scanner.line = pos.get_line();
81 //        scanner.column = scanner.curr_column = pos.get_column();
82         scanner.file_name = filename.c_str();
83     }
84 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
has_include_guards(std::string & guard_name) const85     bool has_include_guards(std::string& guard_name) const
86     {
87         return guards.detected(guard_name);
88     }
89 #endif
90 
91 // error reporting from the re2c generated lexer
92     static int report_error(Scanner<IteratorT> const* s, int code, char const *, ...);
93 
94 private:
95     static char const *tok_names[];
96 
97     Scanner<IteratorT> scanner;
98     string_type filename;
99     string_type value;
100     bool at_eof;
101     boost::wave::language_support language;
102 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
103     include_guards<token_type> guards;
104 #endif
105 
106 #if BOOST_WAVE_SUPPORT_THREADING == 0
107     static token_cache<string_type> const cache;
108 #else
109     token_cache<string_type> const cache;
110 #endif
111 };
112 
113 ///////////////////////////////////////////////////////////////////////////////
114 // initialize cpp lexer
115 template <typename IteratorT, typename PositionT, typename TokenT>
116 inline
lexer(IteratorT const & first,IteratorT const & last,PositionT const & pos,boost::wave::language_support language_)117 lexer<IteratorT, PositionT, TokenT>::lexer(IteratorT const &first,
118         IteratorT const &last, PositionT const &pos,
119         boost::wave::language_support language_)
120     : scanner(first, last),
121       filename(pos.get_file()), at_eof(false), language(language_)
122 #if BOOST_WAVE_SUPPORT_THREADING != 0
123   , cache()
124 #endif
125 {
126     using namespace std;        // some systems have memset in std
127     scanner.line = pos.get_line();
128     scanner.column = scanner.curr_column = pos.get_column();
129     scanner.error_proc = report_error;
130     scanner.file_name = filename.c_str();
131 
132 #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
133     scanner.enable_ms_extensions = true;
134 #else
135     scanner.enable_ms_extensions = false;
136 #endif
137 
138 #if BOOST_WAVE_SUPPORT_VARIADICS_PLACEMARKERS != 0
139     scanner.act_in_c99_mode = boost::wave::need_c99(language_);
140 #endif
141 
142 #if BOOST_WAVE_SUPPORT_IMPORT_KEYWORD != 0
143     scanner.enable_import_keyword = !boost::wave::need_c99(language_);
144 #else
145     scanner.enable_import_keyword = false;
146 #endif
147 
148     scanner.detect_pp_numbers = boost::wave::need_prefer_pp_numbers(language_);
149     scanner.single_line_only = boost::wave::need_single_line(language_);
150 
151 #if BOOST_WAVE_SUPPORT_CPP0X != 0
152     scanner.act_in_cpp0x_mode = boost::wave::need_cpp0x(language_);
153 #else
154     scanner.act_in_cpp0x_mode = false;
155 #endif
156 }
157 
158 template <typename IteratorT, typename PositionT, typename TokenT>
159 inline
~lexer()160 lexer<IteratorT, PositionT, TokenT>::~lexer()
161 {
162     using namespace std;        // some systems have free in std
163     free(scanner.bot);
164 }
165 
166 ///////////////////////////////////////////////////////////////////////////////
167 //  get the next token from the input stream
168 template <typename IteratorT, typename PositionT, typename TokenT>
169 inline TokenT&
get(TokenT & result)170 lexer<IteratorT, PositionT, TokenT>::get(TokenT& result)
171 {
172     if (at_eof)
173         return result = token_type();  // return T_EOI
174 
175     std::size_t actline = scanner.line;
176     token_id id = token_id(scan(&scanner));
177 
178     switch (id) {
179     case T_IDENTIFIER:
180     // test identifier characters for validity (throws if invalid chars found)
181         value = string_type((char const *)scanner.tok,
182             scanner.cur-scanner.tok);
183         if (!boost::wave::need_no_character_validation(language))
184             impl::validate_identifier_name(value, actline, scanner.column, filename);
185         break;
186 
187     case T_STRINGLIT:
188     case T_CHARLIT:
189     case T_RAWSTRINGLIT:
190     // test literal characters for validity (throws if invalid chars found)
191         value = string_type((char const *)scanner.tok,
192             scanner.cur-scanner.tok);
193         if (boost::wave::need_convert_trigraphs(language))
194             value = impl::convert_trigraphs(value);
195         if (!boost::wave::need_no_character_validation(language))
196             impl::validate_literal(value, actline, scanner.column, filename);
197         break;
198 
199     case T_PP_HHEADER:
200     case T_PP_QHEADER:
201     case T_PP_INCLUDE:
202     // convert to the corresponding ..._next token, if appropriate
203       {
204           value = string_type((char const *)scanner.tok,
205               scanner.cur-scanner.tok);
206 
207 #if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
208       // Skip '#' and whitespace and see whether we find an 'include_next' here.
209           typename string_type::size_type start = value.find("include");
210           if (value.compare(start, 12, "include_next", 12) == 0)
211               id = token_id(id | AltTokenType);
212 #endif
213           break;
214       }
215 
216     case T_LONGINTLIT:  // supported in C++11, C99 and long_long mode
217         value = string_type((char const *)scanner.tok,
218             scanner.cur-scanner.tok);
219         if (!boost::wave::need_long_long(language)) {
220         // syntax error: not allowed in C++ mode
221             BOOST_WAVE_LEXER_THROW(lexing_exception, invalid_long_long_literal,
222                 value.c_str(), actline, scanner.column, filename.c_str());
223         }
224         break;
225 
226     case T_OCTALINT:
227     case T_DECIMALINT:
228     case T_HEXAINT:
229     case T_INTLIT:
230     case T_FLOATLIT:
231     case T_FIXEDPOINTLIT:
232     case T_CCOMMENT:
233     case T_CPPCOMMENT:
234     case T_SPACE:
235     case T_SPACE2:
236     case T_ANY:
237     case T_PP_NUMBER:
238         value = string_type((char const *)scanner.tok,
239             scanner.cur-scanner.tok);
240         break;
241 
242     case T_EOF:
243     // T_EOF is returned as a valid token, the next call will return T_EOI,
244     // i.e. the actual end of input
245         at_eof = true;
246         value.clear();
247         break;
248 
249     case T_OR_TRIGRAPH:
250     case T_XOR_TRIGRAPH:
251     case T_LEFTBRACE_TRIGRAPH:
252     case T_RIGHTBRACE_TRIGRAPH:
253     case T_LEFTBRACKET_TRIGRAPH:
254     case T_RIGHTBRACKET_TRIGRAPH:
255     case T_COMPL_TRIGRAPH:
256     case T_POUND_TRIGRAPH:
257         if (boost::wave::need_convert_trigraphs(language)) {
258             value = cache.get_token_value(BASEID_FROM_TOKEN(id));
259         }
260         else {
261             value = string_type((char const *)scanner.tok,
262                 scanner.cur-scanner.tok);
263         }
264         break;
265 
266     case T_ANY_TRIGRAPH:
267         if (boost::wave::need_convert_trigraphs(language)) {
268             value = impl::convert_trigraph(
269                 string_type((char const *)scanner.tok));
270         }
271         else {
272             value = string_type((char const *)scanner.tok,
273                 scanner.cur-scanner.tok);
274         }
275         break;
276 
277     default:
278         if (CATEGORY_FROM_TOKEN(id) != EXTCATEGORY_FROM_TOKEN(id) ||
279             IS_CATEGORY(id, UnknownTokenType))
280         {
281             value = string_type((char const *)scanner.tok,
282                 scanner.cur-scanner.tok);
283         }
284         else {
285             value = cache.get_token_value(id);
286         }
287         break;
288     }
289 
290 //     std::cerr << boost::wave::get_token_name(id) << ": " << value << std::endl;
291 
292     // the re2c lexer reports the new line number for newline tokens
293     result = token_type(id, value, PositionT(filename, actline, scanner.column));
294 
295 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
296     return guards.detect_guard(result);
297 #else
298     return result;
299 #endif
300 }
301 
302 template <typename IteratorT, typename PositionT, typename TokenT>
303 inline int
report_error(Scanner<IteratorT> const * s,int errcode,char const * msg,...)304 lexer<IteratorT, PositionT, TokenT>::report_error(Scanner<IteratorT> const *s, int errcode,
305     char const *msg, ...)
306 {
307     BOOST_ASSERT(0 != s);
308     BOOST_ASSERT(0 != msg);
309 
310     using namespace std;    // some system have vsprintf in namespace std
311 
312     char buffer[200];           // should be large enough
313     va_list params;
314     va_start(params, msg);
315     vsprintf(buffer, msg, params);
316     va_end(params);
317 
318     BOOST_WAVE_LEXER_THROW_VAR(lexing_exception, errcode, buffer, s->line,
319         s->column, s->file_name);
320 //    BOOST_UNREACHABLE_RETURN(0);
321     return 0;
322 }
323 
324 ///////////////////////////////////////////////////////////////////////////////
325 //
326 //  lex_functor
327 //
328 ///////////////////////////////////////////////////////////////////////////////
329 
330 template <typename IteratorT,
331     typename PositionT = boost::wave::util::file_position_type,
332     typename TokenT = typename lexer<IteratorT, PositionT>::token_type>
333 class lex_functor
334 :   public lex_input_interface_generator<TokenT>
335 {
336 public:
337     typedef TokenT token_type;
338 
lex_functor(IteratorT const & first,IteratorT const & last,PositionT const & pos,boost::wave::language_support language)339     lex_functor(IteratorT const &first, IteratorT const &last,
340             PositionT const &pos, boost::wave::language_support language)
341     :   re2c_lexer(first, last, pos, language)
342     {}
~lex_functor()343     virtual ~lex_functor() {}
344 
345 // get the next token from the input stream
get(token_type & result)346     token_type& get(token_type& result) BOOST_OVERRIDE { return re2c_lexer.get(result); }
set_position(PositionT const & pos)347     void set_position(PositionT const &pos) BOOST_OVERRIDE { re2c_lexer.set_position(pos); }
348 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
has_include_guards(std::string & guard_name) const349     bool has_include_guards(std::string& guard_name) const BOOST_OVERRIDE
350         { return re2c_lexer.has_include_guards(guard_name); }
351 #endif
352 
353 private:
354     lexer<IteratorT, PositionT, TokenT> re2c_lexer;
355 };
356 
357 #if BOOST_WAVE_SUPPORT_THREADING == 0
358 ///////////////////////////////////////////////////////////////////////////////
359 template <typename IteratorT, typename PositionT, typename TokenT>
360 token_cache<typename lexer<IteratorT, PositionT, TokenT>::string_type> const
361     lexer<IteratorT, PositionT, TokenT>::cache =
362         token_cache<typename lexer<IteratorT, PositionT, TokenT>::string_type>();
363 #endif
364 
365 }   // namespace re2clex
366 
367 ///////////////////////////////////////////////////////////////////////////////
368 //
369 //  The new_lexer_gen<>::new_lexer function (declared in cpp_lex_interface.hpp)
370 //  should be defined inline, if the lex_functor shouldn't be instantiated
371 //  separately from the lex_iterator.
372 //
373 //  Separate (explicit) instantiation helps to reduce compilation time.
374 //
375 ///////////////////////////////////////////////////////////////////////////////
376 
377 #if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION != 0
378 #define BOOST_WAVE_RE2C_NEW_LEXER_INLINE
379 #else
380 #define BOOST_WAVE_RE2C_NEW_LEXER_INLINE inline
381 #endif
382 
383 ///////////////////////////////////////////////////////////////////////////////
384 //
385 //  The 'new_lexer' function allows the opaque generation of a new lexer object.
386 //  It is coupled to the iterator type to allow to decouple the lexer/iterator
387 //  configurations at compile time.
388 //
389 //  This function is declared inside the cpp_lex_token.hpp file, which is
390 //  referenced by the source file calling the lexer and the source file, which
391 //  instantiates the lex_functor. But it is defined here, so it will be
392 //  instantiated only while compiling the source file, which instantiates the
393 //  lex_functor. While the cpp_re2c_token.hpp file may be included everywhere,
394 //  this file (cpp_re2c_lexer.hpp) should be included only once. This allows
395 //  to decouple the lexer interface from the lexer implementation and reduces
396 //  compilation time.
397 //
398 ///////////////////////////////////////////////////////////////////////////////
399 
400 template <typename IteratorT, typename PositionT, typename TokenT>
401 BOOST_WAVE_RE2C_NEW_LEXER_INLINE
402 lex_input_interface<TokenT> *
new_lexer(IteratorT const & first,IteratorT const & last,PositionT const & pos,boost::wave::language_support language)403 new_lexer_gen<IteratorT, PositionT, TokenT>::new_lexer(IteratorT const &first,
404     IteratorT const &last, PositionT const &pos,
405     boost::wave::language_support language)
406 {
407     using re2clex::lex_functor;
408     return new lex_functor<IteratorT, PositionT, TokenT>(first, last, pos, language);
409 }
410 
411 #undef BOOST_WAVE_RE2C_NEW_LEXER_INLINE
412 
413 ///////////////////////////////////////////////////////////////////////////////
414 }   // namespace cpplexer
415 }   // namespace wave
416 }   // namespace boost
417 
418 // the suffix header occurs after all of the code
419 #ifdef BOOST_HAS_ABI_HEADERS
420 #include BOOST_ABI_SUFFIX
421 #endif
422 
423 #endif // !defined(BOOST_CPP_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED)
424