1 /*=============================================================================
2 Boost.Wave: A Standard compliant C++ preprocessor library
3
4 Re2C based IDL lexer
5
6 http://www.boost.org/
7
8 Copyright (c) 2001-2010 Hartmut Kaiser. Distributed under the Boost
9 Software License, Version 1.0. (See accompanying file
10 LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
11 =============================================================================*/
12
13 #if !defined(BOOST_IDL_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED)
14 #define BOOST_IDL_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED
15
16 #include <string>
17 #include <cstdio>
18 #include <cstdarg>
19 #if defined(BOOST_SPIRIT_DEBUG)
20 #include <iostream>
21 #endif // defined(BOOST_SPIRIT_DEBUG)
22
23 #include <boost/concept_check.hpp>
24 #include <boost/assert.hpp>
25 #include <boost/spirit/include/classic_core.hpp>
26
27 #include <boost/wave/token_ids.hpp>
28 #include <boost/wave/language_support.hpp>
29 #include <boost/wave/util/file_position.hpp>
30 #include <boost/wave/cpplexer/validate_universal_char.hpp>
31 #include <boost/wave/cpplexer/cpplexer_exceptions.hpp>
32
33 // reuse the default token type and re2c lexer helpers
34 #include <boost/wave/cpplexer/cpp_lex_token.hpp>
35 #include <boost/wave/cpplexer/cpp_lex_interface.hpp>
36 #include <boost/wave/cpplexer/re2clex/scanner.hpp>
37
38 #include "idl_re.hpp"
39
40 ///////////////////////////////////////////////////////////////////////////////
41 namespace boost {
42 namespace wave {
43 namespace idllexer {
44 namespace re2clex {
45
46 ///////////////////////////////////////////////////////////////////////////////
47 //
48 // encapsulation of the re2c based idl lexer
49 //
50 ///////////////////////////////////////////////////////////////////////////////
51
52 template <
53 typename IteratorT,
54 typename PositionT = boost::wave::util::file_position_type
55 >
56 class lexer
57 {
58 typedef boost::wave::cpplexer::re2clex::Scanner<IteratorT> scanner_t;
59
60 public:
61
62 typedef char char_t;
63 typedef boost::wave::cpplexer::re2clex::Scanner<IteratorT> base_t;
64 typedef boost::wave::cpplexer::lex_token<PositionT> token_type;
65 typedef typename token_type::string_type string_type;
66
67 lexer(IteratorT const &first, IteratorT const &last,
68 PositionT const &pos, boost::wave::language_support language);
69 ~lexer();
70
71 token_type& get(token_type& t);
set_position(PositionT const & pos)72 void set_position(PositionT const &pos)
73 {
74 // set position has to change the file name and line number only
75 filename = pos.get_file();
76 scanner.line = pos.get_line();
77 scanner.file_name = filename.c_str();
78 }
79
80 // error reporting from the re2c generated lexer
81 static int report_error(scanner_t const *s, int code, char const *, ...);
82
83 private:
84 static char const *tok_names[];
85
86 scanner_t scanner;
87 string_type filename;
88 bool at_eof;
89 boost::wave::language_support language;
90 };
91
92 ///////////////////////////////////////////////////////////////////////////////
93 // initialize cpp lexer
94 template <typename IteratorT, typename PositionT>
95 inline
lexer(IteratorT const & first,IteratorT const & last,PositionT const & pos,boost::wave::language_support language)96 lexer<IteratorT, PositionT>::lexer(IteratorT const &first,
97 IteratorT const &last, PositionT const &pos,
98 boost::wave::language_support language)
99 : scanner(first, last), filename(pos.get_file()), at_eof(false), language(language)
100 {
101 using namespace std; // some systems have memset in std
102 using namespace boost::wave::cpplexer::re2clex;
103
104 scanner.line = pos.get_line();
105 scanner.error_proc = report_error;
106 scanner.file_name = filename.c_str();
107
108 // not used by the lexer
109 scanner.enable_ms_extensions = 0;
110 scanner.act_in_c99_mode = 0;
111
112 boost::ignore_unused_variable_warning(language);
113 }
114
115 template <typename IteratorT, typename PositionT>
116 inline
~lexer()117 lexer<IteratorT, PositionT>::~lexer()
118 {
119 free(scanner.bot);
120 }
121
122 ///////////////////////////////////////////////////////////////////////////////
123 // get the next token from the input stream
124 template <typename IteratorT, typename PositionT>
125 inline boost::wave::cpplexer::lex_token<PositionT>&
get(boost::wave::cpplexer::lex_token<PositionT> & t)126 lexer<IteratorT, PositionT>::get(boost::wave::cpplexer::lex_token<PositionT>& t)
127 {
128 using namespace boost::wave; // to import token ids to this scope
129
130 if (at_eof)
131 return t = boost::wave::cpplexer::lex_token<PositionT>(); // return T_EOI
132
133 token_id id = token_id(scan(&scanner));
134 string_type value((char const *)scanner.tok, scanner.cur-scanner.tok);
135
136 if (T_IDENTIFIER == id) {
137 // test identifier characters for validity (throws if invalid chars found)
138 if (!boost::wave::need_no_character_validation(language)) {
139 boost::wave::cpplexer::impl::validate_identifier_name(value,
140 scanner.line, -1, filename);
141 }
142 }
143 else if (T_STRINGLIT == id || T_CHARLIT == id) {
144 // test literal characters for validity (throws if invalid chars found)
145 if (!boost::wave::need_no_character_validation(language)) {
146 boost::wave::cpplexer::impl::validate_literal(value, scanner.line,
147 -1, filename);
148 }
149 }
150 else if (T_EOF == id) {
151 // T_EOF is returned as a valid token, the next call will return T_EOI,
152 // i.e. the actual end of input
153 at_eof = true;
154 value.clear();
155 }
156 return t = boost::wave::cpplexer::lex_token<PositionT>(id, value,
157 PositionT(filename, scanner.line, -1));
158 }
159
160 template <typename IteratorT, typename PositionT>
161 inline int
report_error(scanner_t const * s,int errcode,char const * msg,...)162 lexer<IteratorT, PositionT>::report_error(scanner_t const *s, int errcode,
163 char const* msg, ...)
164 {
165 BOOST_ASSERT(0 != s);
166 BOOST_ASSERT(0 != msg);
167
168 using namespace std; // some system have vsprintf in namespace std
169
170 char buffer[200]; // should be large enough
171 va_list params;
172 va_start(params, msg);
173 vsprintf(buffer, msg, params);
174 va_end(params);
175
176 BOOST_WAVE_LEXER_THROW_VAR(boost::wave::cpplexer::lexing_exception,
177 errcode, buffer, s->line, -1, s->file_name);
178 return 0;
179 }
180
181 ///////////////////////////////////////////////////////////////////////////////
182 //
183 // lex_functor
184 //
185 ///////////////////////////////////////////////////////////////////////////////
186
187 template <
188 typename IteratorT,
189 typename PositionT = boost::wave::util::file_position_type
190 >
191 class lex_functor
192 : public lex_input_interface_generator<
193 typename lexer<IteratorT, PositionT>::token_type
194 >
195 {
196 public:
197
198 typedef typename lexer<IteratorT, PositionT>::token_type token_type;
199
lex_functor(IteratorT const & first,IteratorT const & last,PositionT const & pos,boost::wave::language_support language)200 lex_functor(IteratorT const &first, IteratorT const &last,
201 PositionT const &pos, boost::wave::language_support language)
202 : re2c_lexer(first, last, pos, language)
203 {}
~lex_functor()204 virtual ~lex_functor() {}
205
206 // get the next token from the input stream
get(token_type & t)207 token_type& get(token_type& t) { return re2c_lexer.get(t); }
set_position(PositionT const & pos)208 void set_position(PositionT const &pos)
209 { re2c_lexer.set_position(pos); }
210 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
has_include_guards(std::string &) const211 bool has_include_guards(std::string&) const { return false; }
212 #endif
213
214 private:
215 lexer<IteratorT, PositionT> re2c_lexer;
216 };
217
218 } // namespace re2clex
219
220 ///////////////////////////////////////////////////////////////////////////////
221 //
222 // The new_lexer_gen<>::new_lexer function (declared in cpp_slex_token.hpp)
223 // should be defined inline, if the lex_functor shouldn't be instantiated
224 // separately from the lex_iterator.
225 //
226 // Separate (explicit) instantiation helps to reduce compilation time.
227 //
228 ///////////////////////////////////////////////////////////////////////////////
229
230 #if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION != 0
231 #define BOOST_WAVE_RE2C_NEW_LEXER_INLINE
232 #else
233 #define BOOST_WAVE_RE2C_NEW_LEXER_INLINE inline
234 #endif
235
236 ///////////////////////////////////////////////////////////////////////////////
237 //
238 // The 'new_lexer' function allows the opaque generation of a new lexer object.
239 // It is coupled to the iterator type to allow to decouple the lexer/iterator
240 // configurations at compile time.
241 //
242 // This function is declared inside the cpp_slex_token.hpp file, which is
243 // referenced by the source file calling the lexer and the source file, which
244 // instantiates the lex_functor. But it is defined here, so it will be
245 // instantiated only while compiling the source file, which instantiates the
246 // lex_functor. While the cpp_re2c_token.hpp file may be included everywhere,
247 // this file (cpp_re2c_lexer.hpp) should be included only once. This allows
248 // to decouple the lexer interface from the lexer implementation and reduces
249 // compilation time.
250 //
251 ///////////////////////////////////////////////////////////////////////////////
252
253 template <typename IteratorT, typename PositionT>
254 BOOST_WAVE_RE2C_NEW_LEXER_INLINE
255 cpplexer::lex_input_interface<cpplexer::lex_token<PositionT> > *
new_lexer(IteratorT const & first,IteratorT const & last,PositionT const & pos,wave::language_support language)256 new_lexer_gen<IteratorT, PositionT>::new_lexer(IteratorT const &first,
257 IteratorT const &last, PositionT const &pos,
258 wave::language_support language)
259 {
260 return new re2clex::lex_functor<IteratorT, PositionT>(first, last, pos,
261 language);
262 }
263
264 #undef BOOST_WAVE_RE2C_NEW_LEXER_INLINE
265
266 ///////////////////////////////////////////////////////////////////////////////
267 } // namespace idllexer
268 } // namespace wave
269 } // namespace boost
270
271 #endif // !defined(BOOST_IDL_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED)
272