1 // Copyright (c) 2001-2010 Hartmut Kaiser
2 //
3 // Distributed under the Boost Software License, Version 1.0. (See accompanying
4 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5
6 // This example shows how to create a simple lexer recognizing a couple of
7 // different tokens aimed at a simple language and how to use this lexer with
8 // a grammar. It shows how to associate attributes to tokens and how to access
9 // the token attributes from inside the grammar.
10 //
11 // We use explicit token attribute types, making the corresponding token instances
12 // carry convert the matched input into an instance of that type. The token
13 // attribute is exposed as the parser attribute if this token is used as a
14 // parser component somewhere in a grammar.
15 //
16 // Additionally, this example demonstrates, how to define a token set usable
17 // as the skip parser during parsing, allowing to define several tokens to be
18 // ignored.
19 //
20 // This example recognizes a very simple programming language having
21 // assignment statements and if and while control structures. Look at the file
22 // example4.input for an example.
23
24 #include <boost/config/warning_disable.hpp>
25 #include <boost/spirit/include/qi.hpp>
26 #include <boost/spirit/include/lex_lexertl.hpp>
27 #include <boost/spirit/include/phoenix_operator.hpp>
28
29 #include <iostream>
30 #include <fstream>
31 #include <string>
32
33 #include "example.hpp"
34
35 using namespace boost::spirit;
36 using boost::phoenix::val;
37
38 ///////////////////////////////////////////////////////////////////////////////
39 // Token definition
40 ///////////////////////////////////////////////////////////////////////////////
41 template <typename Lexer>
42 struct example4_tokens : lex::lexer<Lexer>
43 {
example4_tokensexample4_tokens44 example4_tokens()
45 {
46 // define the tokens to match
47 identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
48 constant = "[0-9]+";
49 if_ = "if";
50 else_ = "else";
51 while_ = "while";
52
53 // associate the tokens and the token set with the lexer
54 this->self = lex::token_def<>('(') | ')' | '{' | '}' | '=' | ';' | constant;
55 this->self += if_ | else_ | while_ | identifier;
56
57 // define the whitespace to ignore (spaces, tabs, newlines and C-style
58 // comments)
59 this->self("WS")
60 = lex::token_def<>("[ \\t\\n]+")
61 | "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/"
62 ;
63 }
64
65 //[example4_token_def
66 // these tokens expose the iterator_range of the matched input sequence
67 lex::token_def<> if_, else_, while_;
68
69 // The following two tokens have an associated attribute type, 'identifier'
70 // carries a string (the identifier name) and 'constant' carries the
71 // matched integer value.
72 //
73 // Note: any token attribute type explicitly specified in a token_def<>
74 // declaration needs to be listed during token type definition as
75 // well (see the typedef for the token_type below).
76 //
77 // The conversion of the matched input to an instance of this type occurs
78 // once (on first access), which makes token attributes as efficient as
79 // possible. Moreover, token instances are constructed once by the lexer
80 // library. From this point on tokens are passed by reference only,
81 // avoiding them being copied around.
82 lex::token_def<std::string> identifier;
83 lex::token_def<unsigned int> constant;
84 //]
85 };
86
87 ///////////////////////////////////////////////////////////////////////////////
88 // Grammar definition
89 ///////////////////////////////////////////////////////////////////////////////
90 template <typename Iterator, typename Lexer>
91 struct example4_grammar
92 : qi::grammar<Iterator, qi::in_state_skipper<Lexer> >
93 {
94 template <typename TokenDef>
example4_grammarexample4_grammar95 example4_grammar(TokenDef const& tok)
96 : example4_grammar::base_type(program)
97 {
98 using boost::spirit::_val;
99
100 program
101 = +block
102 ;
103
104 block
105 = '{' >> *statement >> '}'
106 ;
107
108 statement
109 = assignment
110 | if_stmt
111 | while_stmt
112 ;
113
114 assignment
115 = (tok.identifier >> '=' >> expression >> ';')
116 [
117 std::cout << val("assignment statement to: ") << _1 << "\n"
118 ]
119 ;
120
121 if_stmt
122 = ( tok.if_ >> '(' >> expression >> ')' >> block
123 >> -(tok.else_ >> block)
124 )
125 [
126 std::cout << val("if expression: ") << _2 << "\n"
127 ]
128 ;
129
130 while_stmt
131 = (tok.while_ >> '(' >> expression >> ')' >> block)
132 [
133 std::cout << val("while expression: ") << _2 << "\n"
134 ]
135 ;
136
137 // since expression has a variant return type accommodating for
138 // std::string and unsigned integer, both possible values may be
139 // returned to the calling rule
140 expression
141 = tok.identifier [ _val = _1 ]
142 | tok.constant [ _val = _1 ]
143 ;
144 }
145
146 typedef boost::variant<unsigned int, std::string> expression_type;
147
148 qi::rule<Iterator, qi::in_state_skipper<Lexer> > program, block, statement;
149 qi::rule<Iterator, qi::in_state_skipper<Lexer> > assignment, if_stmt;
150 qi::rule<Iterator, qi::in_state_skipper<Lexer> > while_stmt;
151
152 // the expression is the only rule having a return value
153 qi::rule<Iterator, expression_type(), qi::in_state_skipper<Lexer> > expression;
154 };
155
156 ///////////////////////////////////////////////////////////////////////////////
main()157 int main()
158 {
159 // iterator type used to expose the underlying input stream
160 typedef std::string::iterator base_iterator_type;
161
162 //[example4_token
163 // This is the lexer token type to use. The second template parameter lists
164 // all attribute types used for token_def's during token definition (see
165 // calculator_tokens<> above). Here we use the predefined lexertl token
166 // type, but any compatible token type may be used instead.
167 //
168 // If you don't list any token attribute types in the following declaration
169 // (or just use the default token type: lexertl_token<base_iterator_type>)
170 // it will compile and work just fine, just a bit less efficient. This is
171 // because the token attribute will be generated from the matched input
172 // sequence every time it is requested. But as soon as you specify at
173 // least one token attribute type you'll have to list all attribute types
174 // used for token_def<> declarations in the token definition class above,
175 // otherwise compilation errors will occur.
176 typedef lex::lexertl::token<
177 base_iterator_type, boost::mpl::vector<unsigned int, std::string>
178 > token_type;
179 //]
180 // Here we use the lexertl based lexer engine.
181 typedef lex::lexertl::lexer<token_type> lexer_type;
182
183 // This is the token definition type (derived from the given lexer type).
184 typedef example4_tokens<lexer_type> example4_tokens;
185
186 // this is the iterator type exposed by the lexer
187 typedef example4_tokens::iterator_type iterator_type;
188
189 // this is the type of the grammar to parse
190 typedef example4_grammar<iterator_type, example4_tokens::lexer_def> example4_grammar;
191
192 // now we use the types defined above to create the lexer and grammar
193 // object instances needed to invoke the parsing process
194 example4_tokens tokens; // Our lexer
195 example4_grammar calc(tokens); // Our parser
196
197 std::string str (read_from_file("example4.input"));
198
199 // At this point we generate the iterator pair used to expose the
200 // tokenized input stream.
201 std::string::iterator it = str.begin();
202 iterator_type iter = tokens.begin(it, str.end());
203 iterator_type end = tokens.end();
204
205 // Parsing is done based on the token stream, not the character
206 // stream read from the input.
207 // Note how we use the lexer defined above as the skip parser. It must
208 // be explicitly wrapped inside a state directive, switching the lexer
209 // state for the duration of skipping whitespace.
210 bool r = qi::phrase_parse(iter, end, calc, qi::in_state("WS")[tokens.self]);
211
212 if (r && iter == end)
213 {
214 std::cout << "-------------------------\n";
215 std::cout << "Parsing succeeded\n";
216 std::cout << "-------------------------\n";
217 }
218 else
219 {
220 std::cout << "-------------------------\n";
221 std::cout << "Parsing failed\n";
222 std::cout << "-------------------------\n";
223 }
224
225 std::cout << "Bye... :-) \n\n";
226 return 0;
227 }
228