• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //  Copyright (c) 2001-2010 Hartmut Kaiser
2 //
3 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
4 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5 
6 //  This example shows how to create a simple lexer recognizing a couple of
7 //  different tokens aimed at a simple language and how to use this lexer with
8 //  a grammar. It shows how to associate attributes to tokens and how to access
9 //  the token attributes from inside the grammar.
10 //
11 //  We use explicit token attribute types, making the corresponding token instances
12 //  carry convert the matched input into an instance of that type. The token
13 //  attribute is exposed as the parser attribute if this token is used as a
14 //  parser component somewhere in a grammar.
15 //
16 //  Additionally, this example demonstrates, how to define a token set usable
17 //  as the skip parser during parsing, allowing to define several tokens to be
18 //  ignored.
19 //
20 //  This example recognizes a very simple programming language having
21 //  assignment statements and if and while control structures. Look at the file
22 //  example4.input for an example.
23 
24 #include <boost/config/warning_disable.hpp>
25 #include <boost/spirit/include/qi.hpp>
26 #include <boost/spirit/include/lex_lexertl.hpp>
27 #include <boost/spirit/include/phoenix_operator.hpp>
28 
29 #include <iostream>
30 #include <fstream>
31 #include <string>
32 
33 #include "example.hpp"
34 
35 using namespace boost::spirit;
36 using boost::phoenix::val;
37 
38 ///////////////////////////////////////////////////////////////////////////////
39 //  Token definition
40 ///////////////////////////////////////////////////////////////////////////////
41 template <typename Lexer>
42 struct example4_tokens : lex::lexer<Lexer>
43 {
example4_tokensexample4_tokens44     example4_tokens()
45     {
46         // define the tokens to match
47         identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
48         constant = "[0-9]+";
49         if_ = "if";
50         else_ = "else";
51         while_ = "while";
52 
53         // associate the tokens and the token set with the lexer
54         this->self = lex::token_def<>('(') | ')' | '{' | '}' | '=' | ';' | constant;
55         this->self += if_ | else_ | while_ | identifier;
56 
57         // define the whitespace to ignore (spaces, tabs, newlines and C-style
58         // comments)
59         this->self("WS")
60             =   lex::token_def<>("[ \\t\\n]+")
61             |   "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/"
62             ;
63     }
64 
65 //[example4_token_def
66     // these tokens expose the iterator_range of the matched input sequence
67     lex::token_def<> if_, else_, while_;
68 
69     // The following two tokens have an associated attribute type, 'identifier'
70     // carries a string (the identifier name) and 'constant' carries the
71     // matched integer value.
72     //
73     // Note: any token attribute type explicitly specified in a token_def<>
74     //       declaration needs to be listed during token type definition as
75     //       well (see the typedef for the token_type below).
76     //
77     // The conversion of the matched input to an instance of this type occurs
78     // once (on first access), which makes token attributes as efficient as
79     // possible. Moreover, token instances are constructed once by the lexer
80     // library. From this point on tokens are passed by reference only,
81     // avoiding them being copied around.
82     lex::token_def<std::string> identifier;
83     lex::token_def<unsigned int> constant;
84 //]
85 };
86 
87 ///////////////////////////////////////////////////////////////////////////////
88 //  Grammar definition
89 ///////////////////////////////////////////////////////////////////////////////
90 template <typename Iterator, typename Lexer>
91 struct example4_grammar
92   : qi::grammar<Iterator, qi::in_state_skipper<Lexer> >
93 {
94     template <typename TokenDef>
example4_grammarexample4_grammar95     example4_grammar(TokenDef const& tok)
96       : example4_grammar::base_type(program)
97     {
98         using boost::spirit::_val;
99 
100         program
101             =  +block
102             ;
103 
104         block
105             =   '{' >> *statement >> '}'
106             ;
107 
108         statement
109             =   assignment
110             |   if_stmt
111             |   while_stmt
112             ;
113 
114         assignment
115             =   (tok.identifier >> '=' >> expression >> ';')
116                 [
117                     std::cout << val("assignment statement to: ") << _1 << "\n"
118                 ]
119             ;
120 
121         if_stmt
122             =   (   tok.if_ >> '(' >> expression >> ')' >> block
123                     >> -(tok.else_ >> block)
124                 )
125                 [
126                     std::cout << val("if expression: ") << _2 << "\n"
127                 ]
128             ;
129 
130         while_stmt
131             =   (tok.while_ >> '(' >> expression >> ')' >> block)
132                 [
133                     std::cout << val("while expression: ") << _2 << "\n"
134                 ]
135             ;
136 
137         //  since expression has a variant return type accommodating for
138         //  std::string and unsigned integer, both possible values may be
139         //  returned to the calling rule
140         expression
141             =   tok.identifier [ _val = _1 ]
142             |   tok.constant   [ _val = _1 ]
143             ;
144     }
145 
146     typedef boost::variant<unsigned int, std::string> expression_type;
147 
148     qi::rule<Iterator, qi::in_state_skipper<Lexer> > program, block, statement;
149     qi::rule<Iterator, qi::in_state_skipper<Lexer> > assignment, if_stmt;
150     qi::rule<Iterator, qi::in_state_skipper<Lexer> > while_stmt;
151 
152     //  the expression is the only rule having a return value
153     qi::rule<Iterator, expression_type(), qi::in_state_skipper<Lexer> >  expression;
154 };
155 
156 ///////////////////////////////////////////////////////////////////////////////
main()157 int main()
158 {
159     // iterator type used to expose the underlying input stream
160     typedef std::string::iterator base_iterator_type;
161 
162 //[example4_token
163     // This is the lexer token type to use. The second template parameter lists
164     // all attribute types used for token_def's during token definition (see
165     // calculator_tokens<> above). Here we use the predefined lexertl token
166     // type, but any compatible token type may be used instead.
167     //
168     // If you don't list any token attribute types in the following declaration
169     // (or just use the default token type: lexertl_token<base_iterator_type>)
170     // it will compile and work just fine, just a bit less efficient. This is
171     // because the token attribute will be generated from the matched input
172     // sequence every time it is requested. But as soon as you specify at
173     // least one token attribute type you'll have to list all attribute types
174     // used for token_def<> declarations in the token definition class above,
175     // otherwise compilation errors will occur.
176     typedef lex::lexertl::token<
177         base_iterator_type, boost::mpl::vector<unsigned int, std::string>
178     > token_type;
179 //]
180     // Here we use the lexertl based lexer engine.
181     typedef lex::lexertl::lexer<token_type> lexer_type;
182 
183     // This is the token definition type (derived from the given lexer type).
184     typedef example4_tokens<lexer_type> example4_tokens;
185 
186     // this is the iterator type exposed by the lexer
187     typedef example4_tokens::iterator_type iterator_type;
188 
189     // this is the type of the grammar to parse
190     typedef example4_grammar<iterator_type, example4_tokens::lexer_def> example4_grammar;
191 
192     // now we use the types defined above to create the lexer and grammar
193     // object instances needed to invoke the parsing process
194     example4_tokens tokens;                         // Our lexer
195     example4_grammar calc(tokens);                  // Our parser
196 
197     std::string str (read_from_file("example4.input"));
198 
199     // At this point we generate the iterator pair used to expose the
200     // tokenized input stream.
201     std::string::iterator it = str.begin();
202     iterator_type iter = tokens.begin(it, str.end());
203     iterator_type end = tokens.end();
204 
205     // Parsing is done based on the token stream, not the character
206     // stream read from the input.
207     // Note how we use the lexer defined above as the skip parser. It must
208     // be explicitly wrapped inside a state directive, switching the lexer
209     // state for the duration of skipping whitespace.
210     bool r = qi::phrase_parse(iter, end, calc, qi::in_state("WS")[tokens.self]);
211 
212     if (r && iter == end)
213     {
214         std::cout << "-------------------------\n";
215         std::cout << "Parsing succeeded\n";
216         std::cout << "-------------------------\n";
217     }
218     else
219     {
220         std::cout << "-------------------------\n";
221         std::cout << "Parsing failed\n";
222         std::cout << "-------------------------\n";
223     }
224 
225     std::cout << "Bye... :-) \n\n";
226     return 0;
227 }
228