1 // Copyright (c) 2001-2011 Hartmut Kaiser 2 // 3 // Distributed under the Boost Software License, Version 1.0. (See accompanying 4 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 5 6 #if !defined(BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM) 7 #define BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM 8 9 #if defined(_MSC_VER) 10 #pragma once 11 #endif 12 13 #include <boost/spirit/home/support/info.hpp> 14 #include <boost/spirit/home/qi/skip_over.hpp> 15 #include <boost/spirit/home/qi/parser.hpp> 16 #include <boost/spirit/home/qi/detail/assign_to.hpp> 17 #include <boost/spirit/home/lex/reference.hpp> 18 #include <boost/spirit/home/lex/meta_compiler.hpp> 19 #include <boost/spirit/home/lex/lexer_type.hpp> 20 #include <boost/spirit/home/lex/lexer/token_def.hpp> 21 #include <boost/assert.hpp> 22 #include <boost/noncopyable.hpp> 23 #include <boost/fusion/include/vector.hpp> 24 #include <boost/mpl/assert.hpp> 25 #include <boost/proto/extends.hpp> 26 #include <boost/proto/traits.hpp> 27 #include <boost/range/iterator_range_core.hpp> 28 #include <iterator> // for std::iterator_traits 29 #include <string> 30 31 namespace boost { namespace spirit { namespace lex 32 { 33 /////////////////////////////////////////////////////////////////////////// 34 namespace detail 35 { 36 /////////////////////////////////////////////////////////////////////// 37 template <typename LexerDef> 38 struct lexer_def_ 39 : proto::extends< 40 typename proto::terminal< 41 lex::reference<lexer_def_<LexerDef> const> 42 >::type 43 , lexer_def_<LexerDef> > 44 , qi::parser<lexer_def_<LexerDef> > 45 , lex::lexer_type<lexer_def_<LexerDef> > 46 { 47 private: 48 // avoid warnings about using 'this' in constructor this_boost::spirit::lex::detail::lexer_def_49 lexer_def_& this_() { return *this; } 50 51 typedef typename LexerDef::char_type char_type; 52 typedef typename LexerDef::string_type string_type; 53 typedef typename LexerDef::id_type id_type; 54 55 typedef lex::reference<lexer_def_ const> reference_; 56 typedef typename proto::terminal<reference_>::type terminal_type; 57 typedef proto::extends<terminal_type, lexer_def_> proto_base_type; 58 aliasboost::spirit::lex::detail::lexer_def_59 reference_ alias() const 60 { 61 return reference_(*this); 62 } 63 64 public: 65 // Qi interface: metafunction calculating parser attribute type 66 template <typename Context, typename Iterator> 67 struct attribute 68 { 69 // the return value of a token set contains the matched token 70 // id, and the corresponding pair of iterators 71 typedef typename Iterator::base_iterator_type iterator_type; 72 typedef 73 fusion::vector2<id_type, iterator_range<iterator_type> > 74 type; 75 }; 76 77 // Qi interface: parse functionality 78 template <typename Iterator, typename Context 79 , typename Skipper, typename Attribute> parseboost::spirit::lex::detail::lexer_def_80 bool parse(Iterator& first, Iterator const& last 81 , Context& /*context*/, Skipper const& skipper 82 , Attribute& attr) const 83 { 84 qi::skip_over(first, last, skipper); // always do a pre-skip 85 86 if (first != last) { 87 typedef typename 88 std::iterator_traits<Iterator>::value_type 89 token_type; 90 91 token_type const& t = *first; 92 if (token_is_valid(t) && t.state() == first.get_state()) { 93 // any of the token definitions matched 94 spirit::traits::assign_to(t, attr); 95 ++first; 96 return true; 97 } 98 } 99 return false; 100 } 101 102 // Qi interface: 'what' functionality 103 template <typename Context> whatboost::spirit::lex::detail::lexer_def_104 info what(Context& /*context*/) const 105 { 106 return info("lexer"); 107 } 108 109 private: 110 // allow to use the lexer.self.add("regex1", id1)("regex2", id2); 111 // syntax 112 struct adder 113 { adderboost::spirit::lex::detail::lexer_def_::adder114 adder(lexer_def_& def_) 115 : def(def_) {} 116 117 // Add a token definition based on a single character as given 118 // by the first parameter, the second parameter allows to 119 // specify the token id to use for the new token. If no token 120 // id is given the character code is used. operator ()boost::spirit::lex::detail::lexer_def_::adder121 adder const& operator()(char_type c 122 , id_type token_id = id_type()) const 123 { 124 if (id_type() == token_id) 125 token_id = static_cast<id_type>(c); 126 def.def.add_token (def.state.c_str(), c, token_id 127 , def.targetstate.empty() ? 0 : def.targetstate.c_str()); 128 return *this; 129 } 130 131 // Add a token definition based on a character sequence as 132 // given by the first parameter, the second parameter allows to 133 // specify the token id to use for the new token. If no token 134 // id is given this function will generate a unique id to be 135 // used as the token's id. operator ()boost::spirit::lex::detail::lexer_def_::adder136 adder const& operator()(string_type const& s 137 , id_type token_id = id_type()) const 138 { 139 if (id_type() == token_id) 140 token_id = def.def.get_next_id(); 141 def.def.add_token (def.state.c_str(), s, token_id 142 , def.targetstate.empty() ? 0 : def.targetstate.c_str()); 143 return *this; 144 } 145 146 template <typename Attribute> operator ()boost::spirit::lex::detail::lexer_def_::adder147 adder const& operator()( 148 token_def<Attribute, char_type, id_type>& tokdef 149 , id_type token_id = id_type()) const 150 { 151 // make sure we have a token id 152 if (id_type() == token_id) { 153 if (id_type() == tokdef.id()) { 154 token_id = def.def.get_next_id(); 155 tokdef.id(token_id); 156 } 157 else { 158 token_id = tokdef.id(); 159 } 160 } 161 else { 162 // the following assertion makes sure that the token_def 163 // instance has not been assigned a different id earlier 164 BOOST_ASSERT(id_type() == tokdef.id() 165 || token_id == tokdef.id()); 166 tokdef.id(token_id); 167 } 168 169 def.define(tokdef); 170 return *this; 171 } 172 173 // template <typename F> 174 // adder const& operator()(char_type c, id_type token_id, F act) const 175 // { 176 // if (id_type() == token_id) 177 // token_id = def.def.get_next_id(); 178 // std::size_t unique_id = 179 // def.def.add_token (def.state.c_str(), s, token_id); 180 // def.def.add_action(unique_id, def.state.c_str(), act); 181 // return *this; 182 // } 183 184 lexer_def_& def; 185 186 // silence MSVC warning C4512: assignment operator could not be generated 187 BOOST_DELETED_FUNCTION(adder& operator= (adder const&)) 188 }; 189 friend struct adder; 190 191 // allow to use lexer.self.add_pattern("pattern1", "regex1")(...); 192 // syntax 193 struct pattern_adder 194 { pattern_adderboost::spirit::lex::detail::lexer_def_::pattern_adder195 pattern_adder(lexer_def_& def_) 196 : def(def_) {} 197 operator ()boost::spirit::lex::detail::lexer_def_::pattern_adder198 pattern_adder const& operator()(string_type const& p 199 , string_type const& s) const 200 { 201 def.def.add_pattern (def.state.c_str(), p, s); 202 return *this; 203 } 204 205 lexer_def_& def; 206 207 // silence MSVC warning C4512: assignment operator could not be generated 208 BOOST_DELETED_FUNCTION(pattern_adder& operator= (pattern_adder const&)) 209 }; 210 friend struct pattern_adder; 211 212 private: 213 // Helper function to invoke the necessary 2 step compilation 214 // process on token definition expressions 215 template <typename TokenExpr> compile2passboost::spirit::lex::detail::lexer_def_216 void compile2pass(TokenExpr const& expr) 217 { 218 expr.collect(def, state, targetstate); 219 expr.add_actions(def); 220 } 221 222 public: 223 /////////////////////////////////////////////////////////////////// 224 template <typename Expr> defineboost::spirit::lex::detail::lexer_def_225 void define(Expr const& expr) 226 { 227 compile2pass(compile<lex::domain>(expr)); 228 } 229 lexer_def_boost::spirit::lex::detail::lexer_def_230 lexer_def_(LexerDef& def_, string_type const& state_ 231 , string_type const& targetstate_ = string_type()) 232 : proto_base_type(terminal_type::make(alias())) 233 , add(this_()), add_pattern(this_()), def(def_) 234 , state(state_), targetstate(targetstate_) 235 {} 236 237 // allow to switch states operator ()boost::spirit::lex::detail::lexer_def_238 lexer_def_ operator()(char_type const* state) const 239 { 240 return lexer_def_(def, state); 241 } operator ()boost::spirit::lex::detail::lexer_def_242 lexer_def_ operator()(char_type const* state 243 , char_type const* targetstate) const 244 { 245 return lexer_def_(def, state, targetstate); 246 } operator ()boost::spirit::lex::detail::lexer_def_247 lexer_def_ operator()(string_type const& state 248 , string_type const& targetstate = string_type()) const 249 { 250 return lexer_def_(def, state, targetstate); 251 } 252 253 // allow to assign a token definition expression 254 template <typename Expr> operator =boost::spirit::lex::detail::lexer_def_255 lexer_def_& operator= (Expr const& xpr) 256 { 257 // Report invalid expression error as early as possible. 258 // If you got an error_invalid_expression error message here, 259 // then the expression (expr) is not a valid spirit lex 260 // expression. 261 BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); 262 263 def.clear(state.c_str()); 264 define(xpr); 265 return *this; 266 } 267 268 // explicitly tell the lexer that the given state will be defined 269 // (useful in conjunction with "*") add_stateboost::spirit::lex::detail::lexer_def_270 std::size_t add_state(char_type const* state = 0) 271 { 272 return def.add_state(state ? state : def.initial_state().c_str()); 273 } 274 275 adder add; 276 pattern_adder add_pattern; 277 278 private: 279 LexerDef& def; 280 string_type state; 281 string_type targetstate; 282 283 // silence MSVC warning C4512: assignment operator could not be generated 284 BOOST_DELETED_FUNCTION(lexer_def_& operator= (lexer_def_ const&)) 285 }; 286 287 #if defined(BOOST_NO_CXX11_RVALUE_REFERENCES) 288 // allow to assign a token definition expression 289 template <typename LexerDef, typename Expr> 290 inline lexer_def_<LexerDef>& operator +=(lexer_def_<LexerDef> & lexdef,Expr & xpr)291 operator+= (lexer_def_<LexerDef>& lexdef, Expr& xpr) 292 { 293 // Report invalid expression error as early as possible. 294 // If you got an error_invalid_expression error message here, 295 // then the expression (expr) is not a valid spirit lex 296 // expression. 297 BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); 298 299 lexdef.define(xpr); 300 return lexdef; 301 } 302 #else 303 // allow to assign a token definition expression 304 template <typename LexerDef, typename Expr> 305 inline lexer_def_<LexerDef>& operator +=(lexer_def_<LexerDef> & lexdef,Expr && xpr)306 operator+= (lexer_def_<LexerDef>& lexdef, Expr&& xpr) 307 { 308 // Report invalid expression error as early as possible. 309 // If you got an error_invalid_expression error message here, 310 // then the expression (expr) is not a valid spirit lex 311 // expression. 312 BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); 313 314 lexdef.define(xpr); 315 return lexdef; 316 } 317 #endif 318 319 template <typename LexerDef, typename Expr> 320 inline lexer_def_<LexerDef>& operator +=(lexer_def_<LexerDef> & lexdef,Expr const & xpr)321 operator+= (lexer_def_<LexerDef>& lexdef, Expr const& xpr) 322 { 323 // Report invalid expression error as early as possible. 324 // If you got an error_invalid_expression error message here, 325 // then the expression (expr) is not a valid spirit lex 326 // expression. 327 BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); 328 329 lexdef.define(xpr); 330 return lexdef; 331 } 332 } 333 334 /////////////////////////////////////////////////////////////////////////// 335 // The match_flags flags are used to influence different matching 336 // modes of the lexer 337 struct match_flags 338 { 339 enum enum_type 340 { 341 match_default = 0, // no flags 342 match_not_dot_newline = 1, // the regex '.' doesn't match newlines 343 match_icase = 2 // all matching operations are case insensitive 344 }; 345 }; 346 347 /////////////////////////////////////////////////////////////////////////// 348 // This represents a lexer object 349 /////////////////////////////////////////////////////////////////////////// 350 351 /////////////////////////////////////////////////////////////////////////// 352 // This is the first token id automatically assigned by the library 353 // if needed 354 enum tokenids 355 { 356 min_token_id = 0x10000 357 }; 358 359 template <typename Lexer> 360 class lexer : public Lexer 361 { 362 private: 363 // avoid warnings about using 'this' in constructor this_()364 lexer& this_() { return *this; } 365 366 std::size_t next_token_id; // has to be an integral type 367 368 public: 369 typedef Lexer lexer_type; 370 typedef typename Lexer::id_type id_type; 371 typedef typename Lexer::char_type char_type; 372 typedef typename Lexer::iterator_type iterator_type; 373 typedef lexer base_type; 374 375 typedef detail::lexer_def_<lexer> lexer_def; 376 typedef std::basic_string<char_type> string_type; 377 378 // if `id_type` was specified but `first_id` is not provided 379 // the `min_token_id` value may be out of range for `id_type`, 380 // but it will be a problem only if unique ids feature is in use. lexer(unsigned int flags=match_flags::match_default)381 lexer(unsigned int flags = match_flags::match_default) 382 : lexer_type(flags) 383 , next_token_id(min_token_id) 384 , self(this_(), lexer_type::initial_state()) 385 {} 386 lexer(unsigned int flags,id_type first_id)387 lexer(unsigned int flags, id_type first_id) 388 : lexer_type(flags) 389 , next_token_id(first_id) 390 , self(this_(), lexer_type::initial_state()) 391 {} 392 393 // access iterator interface 394 template <typename Iterator> begin(Iterator & first,Iterator const & last,char_type const * initial_state=0) const395 iterator_type begin(Iterator& first, Iterator const& last 396 , char_type const* initial_state = 0) const 397 { return this->lexer_type::begin(first, last, initial_state); } end() const398 iterator_type end() const 399 { return this->lexer_type::end(); } 400 map_state(char_type const * state)401 std::size_t map_state(char_type const* state) 402 { return this->lexer_type::add_state(state); } 403 404 // create a unique token id get_next_id()405 id_type get_next_id() { return id_type(next_token_id++); } 406 407 lexer_def self; // allow for easy token definition 408 }; 409 410 }}} 411 412 #endif 413