1 /*=============================================================================
2 Boost.Wave: A Standard compliant C++ preprocessor library
3
4 Xpressive based generic lexer
5
6 http://www.boost.org/
7
8 Copyright (c) 2001-2010 Hartmut Kaiser. Distributed under the Boost
9 Software License, Version 1.0. (See accompanying file
10 LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
11 =============================================================================*/
12
13 #if !defined(BOOST_XPRESSIVE_LEXER_HPP)
14 #define BOOST_XPRESSIVE_LEXER_HPP
15
16 #include <string>
17 #include <vector>
18 #include <utility>
19 #include <iterator>
20 #include <algorithm>
21
22 #include <boost/xpressive/xpressive.hpp>
23
24 namespace boost {
25 namespace wave {
26 namespace cpplexer {
27 namespace xlex {
28
29 ///////////////////////////////////////////////////////////////////////////////
30 template <
31 typename Iterator = char const*,
32 typename Token = int,
33 typename Callback = bool (*)(
34 Iterator const&, Iterator&, Iterator const&, Token const&)
35 >
36 class xpressive_lexer
37 {
38 private:
39 typedef typename std::iterator_traits<Iterator>::value_type
40 char_type;
41 typedef std::basic_string<char_type> string_type;
42
43 // this represents a single token to match
44 struct regex_info
45 {
46 typedef boost::xpressive::basic_regex<Iterator> regex_type;
47
48 string_type str;
49 Token token;
50 regex_type regex;
51 Callback callback;
52
regex_infoboost::wave::cpplexer::xlex::xpressive_lexer::regex_info53 regex_info(string_type const& str, Token const& token,
54 Callback const& callback)
55 : str(str), token(token),
56 regex(regex_type::compile(str)),
57 callback(callback)
58 {}
59
60 // these structures are to be ordered by the token id
operator <(regex_info const & lhs,regex_info const & rhs)61 friend bool operator< (regex_info const& lhs, regex_info const& rhs)
62 {
63 return lhs.token < rhs.token;
64 }
65 };
66
67 typedef std::vector<regex_info> regex_list_type;
68
69 public:
70 typedef Callback callback_type;
71
xpressive_lexer()72 xpressive_lexer() {}
73
74 // register a the regex with the lexer
75 void register_regex(string_type const& regex, Token const& id,
76 Callback const& cb = Callback());
77
78 // match the given input and return the next recognized token
79 Token next_token(Iterator &first, Iterator const& last, string_type& token);
80
81 private:
82 regex_list_type regex_list;
83 };
84
85 ///////////////////////////////////////////////////////////////////////////////
86 template <typename Iterator, typename Token, typename Callback>
87 inline void
register_regex(string_type const & regex,Token const & id,Callback const & cb)88 xpressive_lexer<Iterator, Token, Callback>::register_regex(
89 string_type const& regex, Token const& id, Callback const& cb)
90 {
91 regex_list.push_back(regex_info(regex, id, cb));
92 }
93
94 ///////////////////////////////////////////////////////////////////////////////
95 template <typename Iterator, typename Token, typename Callback>
96 inline Token
next_token(Iterator & first,Iterator const & last,string_type & token)97 xpressive_lexer<Iterator, Token, Callback>::next_token(
98 Iterator &first, Iterator const& last, string_type& token)
99 {
100 typedef typename regex_list_type::iterator iterator;
101
102 xpressive::match_results<Iterator> regex_result;
103 for (iterator it = regex_list.begin(), end = regex_list.end(); it != end; ++it)
104 {
105 namespace xpressive = boost::xpressive;
106
107 // regex_info const& curr_regex = *it;
108 // xpressive::match_results<Iterator> regex_result;
109 if (xpressive::regex_search(first, last, regex_result, (*it).regex,
110 xpressive::regex_constants::match_continuous))
111 {
112 Iterator saved = first;
113 Token rval = (*it).token;
114
115 std::advance(first, regex_result.length());
116 token = string_type(saved, first);
117
118 if (NULL != (*it).callback) {
119 // execute corresponding callback
120 if ((*it).callback(saved, first, last, (*it).token))
121 rval = next_token(first, last, token);
122 }
123
124 return rval;
125 }
126 }
127 return Token(-1); // TODO: change this to use token_traits<Token>
128 }
129
130 ///////////////////////////////////////////////////////////////////////////////
131 }}}} // boost::wave::cpplexer::xlex
132
133 #endif // !defined(BOOST_XPRESSIVE_LEXER_HPP)
134
135
136