• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //  Copyright (c) 2001-2011 Hartmut Kaiser
2 //
3 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
4 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5 
6 #if !defined(BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM)
7 #define BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM
8 
9 #if defined(_MSC_VER)
10 #pragma once
11 #endif
12 
13 #include <boost/spirit/home/support/info.hpp>
14 #include <boost/spirit/home/qi/skip_over.hpp>
15 #include <boost/spirit/home/qi/parser.hpp>
16 #include <boost/spirit/home/qi/detail/assign_to.hpp>
17 #include <boost/spirit/home/lex/reference.hpp>
18 #include <boost/spirit/home/lex/meta_compiler.hpp>
19 #include <boost/spirit/home/lex/lexer_type.hpp>
20 #include <boost/spirit/home/lex/lexer/token_def.hpp>
21 #include <boost/assert.hpp>
22 #include <boost/noncopyable.hpp>
23 #include <boost/fusion/include/vector.hpp>
24 #include <boost/mpl/assert.hpp>
25 #include <boost/proto/extends.hpp>
26 #include <boost/proto/traits.hpp>
27 #include <boost/range/iterator_range_core.hpp>
28 #include <iterator> // for std::iterator_traits
29 #include <string>
30 
31 namespace boost { namespace spirit { namespace lex
32 {
33     ///////////////////////////////////////////////////////////////////////////
34     namespace detail
35     {
36         ///////////////////////////////////////////////////////////////////////
37         template <typename LexerDef>
38         struct lexer_def_
39           : proto::extends<
40                 typename proto::terminal<
41                    lex::reference<lexer_def_<LexerDef> const>
42                 >::type
43               , lexer_def_<LexerDef> >
44           , qi::parser<lexer_def_<LexerDef> >
45           , lex::lexer_type<lexer_def_<LexerDef> >
46         {
47         private:
48             // avoid warnings about using 'this' in constructor
this_boost::spirit::lex::detail::lexer_def_49             lexer_def_& this_() { return *this; }
50 
51             typedef typename LexerDef::char_type char_type;
52             typedef typename LexerDef::string_type string_type;
53             typedef typename LexerDef::id_type id_type;
54 
55             typedef lex::reference<lexer_def_ const> reference_;
56             typedef typename proto::terminal<reference_>::type terminal_type;
57             typedef proto::extends<terminal_type, lexer_def_> proto_base_type;
58 
aliasboost::spirit::lex::detail::lexer_def_59             reference_ alias() const
60             {
61                 return reference_(*this);
62             }
63 
64         public:
65             // Qi interface: metafunction calculating parser attribute type
66             template <typename Context, typename Iterator>
67             struct attribute
68             {
69                 //  the return value of a token set contains the matched token
70                 //  id, and the corresponding pair of iterators
71                 typedef typename Iterator::base_iterator_type iterator_type;
72                 typedef
73                     fusion::vector2<id_type, iterator_range<iterator_type> >
74                 type;
75             };
76 
77             // Qi interface: parse functionality
78             template <typename Iterator, typename Context
79               , typename Skipper, typename Attribute>
parseboost::spirit::lex::detail::lexer_def_80             bool parse(Iterator& first, Iterator const& last
81               , Context& /*context*/, Skipper const& skipper
82               , Attribute& attr) const
83             {
84                 qi::skip_over(first, last, skipper);   // always do a pre-skip
85 
86                 if (first != last) {
87                     typedef typename
88                         std::iterator_traits<Iterator>::value_type
89                     token_type;
90 
91                     token_type const& t = *first;
92                     if (token_is_valid(t) && t.state() == first.get_state()) {
93                     // any of the token definitions matched
94                         spirit::traits::assign_to(t, attr);
95                         ++first;
96                         return true;
97                     }
98                 }
99                 return false;
100             }
101 
102             // Qi interface: 'what' functionality
103             template <typename Context>
whatboost::spirit::lex::detail::lexer_def_104             info what(Context& /*context*/) const
105             {
106                 return info("lexer");
107             }
108 
109         private:
110             // allow to use the lexer.self.add("regex1", id1)("regex2", id2);
111             // syntax
112             struct adder
113             {
adderboost::spirit::lex::detail::lexer_def_::adder114                 adder(lexer_def_& def_)
115                   : def(def_) {}
116 
117                 // Add a token definition based on a single character as given
118                 // by the first parameter, the second parameter allows to
119                 // specify the token id to use for the new token. If no token
120                 // id is given the character code is used.
operator ()boost::spirit::lex::detail::lexer_def_::adder121                 adder const& operator()(char_type c
122                   , id_type token_id = id_type()) const
123                 {
124                     if (id_type() == token_id)
125                         token_id = static_cast<id_type>(c);
126                     def.def.add_token (def.state.c_str(), c, token_id
127                         , def.targetstate.empty() ? 0 : def.targetstate.c_str());
128                     return *this;
129                 }
130 
131                 // Add a token definition based on a character sequence as
132                 // given by the first parameter, the second parameter allows to
133                 // specify the token id to use for the new token. If no token
134                 // id is given this function will generate a unique id to be
135                 // used as the token's id.
operator ()boost::spirit::lex::detail::lexer_def_::adder136                 adder const& operator()(string_type const& s
137                   , id_type token_id = id_type()) const
138                 {
139                     if (id_type() == token_id)
140                         token_id = def.def.get_next_id();
141                     def.def.add_token (def.state.c_str(), s, token_id
142                         , def.targetstate.empty() ? 0 : def.targetstate.c_str());
143                     return *this;
144                 }
145 
146                 template <typename Attribute>
operator ()boost::spirit::lex::detail::lexer_def_::adder147                 adder const& operator()(
148                     token_def<Attribute, char_type, id_type>& tokdef
149                   , id_type token_id = id_type()) const
150                 {
151                     // make sure we have a token id
152                     if (id_type() == token_id) {
153                         if (id_type() == tokdef.id()) {
154                             token_id = def.def.get_next_id();
155                             tokdef.id(token_id);
156                         }
157                         else {
158                             token_id = tokdef.id();
159                         }
160                     }
161                     else {
162                     // the following assertion makes sure that the token_def
163                     // instance has not been assigned a different id earlier
164                         BOOST_ASSERT(id_type() == tokdef.id()
165                                   || token_id == tokdef.id());
166                         tokdef.id(token_id);
167                     }
168 
169                     def.define(tokdef);
170                     return *this;
171                 }
172 
173 //                 template <typename F>
174 //                 adder const& operator()(char_type c, id_type token_id, F act) const
175 //                 {
176 //                     if (id_type() == token_id)
177 //                         token_id = def.def.get_next_id();
178 //                     std::size_t unique_id =
179 //                         def.def.add_token (def.state.c_str(), s, token_id);
180 //                     def.def.add_action(unique_id, def.state.c_str(), act);
181 //                     return *this;
182 //                 }
183 
184                 lexer_def_& def;
185 
186                 // silence MSVC warning C4512: assignment operator could not be generated
187                 BOOST_DELETED_FUNCTION(adder& operator= (adder const&))
188             };
189             friend struct adder;
190 
191             // allow to use lexer.self.add_pattern("pattern1", "regex1")(...);
192             // syntax
193             struct pattern_adder
194             {
pattern_adderboost::spirit::lex::detail::lexer_def_::pattern_adder195                 pattern_adder(lexer_def_& def_)
196                   : def(def_) {}
197 
operator ()boost::spirit::lex::detail::lexer_def_::pattern_adder198                 pattern_adder const& operator()(string_type const& p
199                   , string_type const& s) const
200                 {
201                     def.def.add_pattern (def.state.c_str(), p, s);
202                     return *this;
203                 }
204 
205                 lexer_def_& def;
206 
207                 // silence MSVC warning C4512: assignment operator could not be generated
208                 BOOST_DELETED_FUNCTION(pattern_adder& operator= (pattern_adder const&))
209             };
210             friend struct pattern_adder;
211 
212         private:
213             // Helper function to invoke the necessary 2 step compilation
214             // process on token definition expressions
215             template <typename TokenExpr>
compile2passboost::spirit::lex::detail::lexer_def_216             void compile2pass(TokenExpr const& expr)
217             {
218                 expr.collect(def, state, targetstate);
219                 expr.add_actions(def);
220             }
221 
222         public:
223             ///////////////////////////////////////////////////////////////////
224             template <typename Expr>
defineboost::spirit::lex::detail::lexer_def_225             void define(Expr const& expr)
226             {
227                 compile2pass(compile<lex::domain>(expr));
228             }
229 
lexer_def_boost::spirit::lex::detail::lexer_def_230             lexer_def_(LexerDef& def_, string_type const& state_
231                   , string_type const& targetstate_ = string_type())
232               : proto_base_type(terminal_type::make(alias()))
233               , add(this_()), add_pattern(this_()), def(def_)
234               , state(state_), targetstate(targetstate_)
235             {}
236 
237             // allow to switch states
operator ()boost::spirit::lex::detail::lexer_def_238             lexer_def_ operator()(char_type const* state) const
239             {
240                 return lexer_def_(def, state);
241             }
operator ()boost::spirit::lex::detail::lexer_def_242             lexer_def_ operator()(char_type const* state
243               , char_type const* targetstate) const
244             {
245                 return lexer_def_(def, state, targetstate);
246             }
operator ()boost::spirit::lex::detail::lexer_def_247             lexer_def_ operator()(string_type const& state
248               , string_type const& targetstate = string_type()) const
249             {
250                 return lexer_def_(def, state, targetstate);
251             }
252 
253             // allow to assign a token definition expression
254             template <typename Expr>
operator =boost::spirit::lex::detail::lexer_def_255             lexer_def_& operator= (Expr const& xpr)
256             {
257                 // Report invalid expression error as early as possible.
258                 // If you got an error_invalid_expression error message here,
259                 // then the expression (expr) is not a valid spirit lex
260                 // expression.
261                 BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr);
262 
263                 def.clear(state.c_str());
264                 define(xpr);
265                 return *this;
266             }
267 
268             // explicitly tell the lexer that the given state will be defined
269             // (useful in conjunction with "*")
add_stateboost::spirit::lex::detail::lexer_def_270             std::size_t add_state(char_type const* state = 0)
271             {
272                 return def.add_state(state ? state : def.initial_state().c_str());
273             }
274 
275             adder add;
276             pattern_adder add_pattern;
277 
278         private:
279             LexerDef& def;
280             string_type state;
281             string_type targetstate;
282 
283             // silence MSVC warning C4512: assignment operator could not be generated
284             BOOST_DELETED_FUNCTION(lexer_def_& operator= (lexer_def_ const&))
285         };
286 
287 #if defined(BOOST_NO_CXX11_RVALUE_REFERENCES)
288         // allow to assign a token definition expression
289         template <typename LexerDef, typename Expr>
290         inline lexer_def_<LexerDef>&
operator +=(lexer_def_<LexerDef> & lexdef,Expr & xpr)291         operator+= (lexer_def_<LexerDef>& lexdef, Expr& xpr)
292         {
293             // Report invalid expression error as early as possible.
294             // If you got an error_invalid_expression error message here,
295             // then the expression (expr) is not a valid spirit lex
296             // expression.
297             BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr);
298 
299             lexdef.define(xpr);
300             return lexdef;
301         }
302 #else
303         // allow to assign a token definition expression
304         template <typename LexerDef, typename Expr>
305         inline lexer_def_<LexerDef>&
operator +=(lexer_def_<LexerDef> & lexdef,Expr && xpr)306         operator+= (lexer_def_<LexerDef>& lexdef, Expr&& xpr)
307         {
308             // Report invalid expression error as early as possible.
309             // If you got an error_invalid_expression error message here,
310             // then the expression (expr) is not a valid spirit lex
311             // expression.
312             BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr);
313 
314             lexdef.define(xpr);
315             return lexdef;
316         }
317 #endif
318 
319         template <typename LexerDef, typename Expr>
320         inline lexer_def_<LexerDef>&
operator +=(lexer_def_<LexerDef> & lexdef,Expr const & xpr)321         operator+= (lexer_def_<LexerDef>& lexdef, Expr const& xpr)
322         {
323             // Report invalid expression error as early as possible.
324             // If you got an error_invalid_expression error message here,
325             // then the expression (expr) is not a valid spirit lex
326             // expression.
327             BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr);
328 
329             lexdef.define(xpr);
330             return lexdef;
331         }
332     }
333 
334     ///////////////////////////////////////////////////////////////////////////
335     //  The match_flags flags are used to influence different matching
336     //  modes of the lexer
337     struct match_flags
338     {
339         enum enum_type
340         {
341             match_default = 0,          // no flags
342             match_not_dot_newline = 1,  // the regex '.' doesn't match newlines
343             match_icase = 2             // all matching operations are case insensitive
344         };
345     };
346 
347     ///////////////////////////////////////////////////////////////////////////
348     //  This represents a lexer object
349     ///////////////////////////////////////////////////////////////////////////
350 
351     ///////////////////////////////////////////////////////////////////////////
352     // This is the first token id automatically assigned by the library
353     // if needed
354     enum tokenids
355     {
356         min_token_id = 0x10000
357     };
358 
359     template <typename Lexer>
360     class lexer : public Lexer
361     {
362     private:
363         // avoid warnings about using 'this' in constructor
this_()364         lexer& this_() { return *this; }
365 
366         std::size_t next_token_id;   // has to be an integral type
367 
368     public:
369         typedef Lexer lexer_type;
370         typedef typename Lexer::id_type id_type;
371         typedef typename Lexer::char_type char_type;
372         typedef typename Lexer::iterator_type iterator_type;
373         typedef lexer base_type;
374 
375         typedef detail::lexer_def_<lexer> lexer_def;
376         typedef std::basic_string<char_type> string_type;
377 
378         // if `id_type` was specified but `first_id` is not provided
379         // the `min_token_id` value may be out of range for `id_type`,
380         // but it will be a problem only if unique ids feature is in use.
lexer(unsigned int flags=match_flags::match_default)381         lexer(unsigned int flags = match_flags::match_default)
382           : lexer_type(flags)
383           , next_token_id(min_token_id)
384           , self(this_(), lexer_type::initial_state())
385         {}
386 
lexer(unsigned int flags,id_type first_id)387         lexer(unsigned int flags, id_type first_id)
388           : lexer_type(flags)
389           , next_token_id(first_id)
390           , self(this_(), lexer_type::initial_state())
391         {}
392 
393         // access iterator interface
394         template <typename Iterator>
begin(Iterator & first,Iterator const & last,char_type const * initial_state=0) const395         iterator_type begin(Iterator& first, Iterator const& last
396                 , char_type const* initial_state = 0) const
397             { return this->lexer_type::begin(first, last, initial_state); }
end() const398         iterator_type end() const
399             { return this->lexer_type::end(); }
400 
map_state(char_type const * state)401         std::size_t map_state(char_type const* state)
402             { return this->lexer_type::add_state(state); }
403 
404         //  create a unique token id
get_next_id()405         id_type get_next_id() { return id_type(next_token_id++); }
406 
407         lexer_def self;  // allow for easy token definition
408     };
409 
410 }}}
411 
412 #endif
413