1 // Copyright (c) 2001-2010 Hartmut Kaiser
2 //
3 // Distributed under the Boost Software License, Version 1.0. (See accompanying
4 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5
6 // This example shows how to create a simple lexer recognizing a couple of
7 // different tokens and how to use this with a grammar. This example has a
8 // heavily backtracking grammar which makes it a candidate for lexer based
9 // parsing (all tokens are scanned and generated only once, even if
10 // backtracking is required) which speeds up the overall parsing process
11 // considerably, out-weighting the overhead needed for setting up the lexer.
12 //
13 // Additionally, this example demonstrates, how to define a token set usable
14 // as the skip parser during parsing, allowing to define several tokens to be
15 // ignored.
16 //
17 // This example recognizes couplets, which are sequences of numbers enclosed
18 // in matching pairs of parenthesis. See the comments below to for details
19 // and examples.
20
21 // #define BOOST_SPIRIT_LEXERTL_DEBUG
22 // #define BOOST_SPIRIT_DEBUG
23
24 #include <boost/config/warning_disable.hpp>
25 #include <boost/spirit/include/qi.hpp>
26 #include <boost/spirit/include/lex_lexertl.hpp>
27
28 #include <iostream>
29 #include <fstream>
30 #include <string>
31
32 #include "example.hpp"
33
34 using namespace boost::spirit;
35
36 ///////////////////////////////////////////////////////////////////////////////
37 // Token definition
38 ///////////////////////////////////////////////////////////////////////////////
39 template <typename Lexer>
40 struct example3_tokens : lex::lexer<Lexer>
41 {
example3_tokensexample3_tokens42 example3_tokens()
43 {
44 // define the tokens to match
45 ellipses = "\\.\\.\\.";
46 number = "[0-9]+";
47
48 // associate the tokens and the token set with the lexer
49 this->self = ellipses | '(' | ')' | number;
50
51 // define the whitespace to ignore (spaces, tabs, newlines and C-style
52 // comments)
53 this->self("WS")
54 = lex::token_def<>("[ \\t\\n]+") // whitespace
55 | "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/" // C style comments
56 ;
57 }
58
59 // these tokens expose the iterator_range of the matched input sequence
60 lex::token_def<> ellipses, identifier, number;
61 };
62
63 ///////////////////////////////////////////////////////////////////////////////
64 // Grammar definition
65 ///////////////////////////////////////////////////////////////////////////////
66 template <typename Iterator, typename Lexer>
67 struct example3_grammar
68 : qi::grammar<Iterator, qi::in_state_skipper<Lexer> >
69 {
70 template <typename TokenDef>
example3_grammarexample3_grammar71 example3_grammar(TokenDef const& tok)
72 : example3_grammar::base_type(start)
73 {
74 start
75 = +(couplet | tok.ellipses)
76 ;
77
78 // A couplet matches nested left and right parenthesis.
79 // For example:
80 // (1) (1 2) (1 2 3) ...
81 // ((1)) ((1 2)(3 4)) (((1) (2 3) (1 2 (3) 4))) ...
82 // (((1))) ...
83 couplet
84 = tok.number
85 | '(' >> +couplet >> ')'
86 ;
87
88 BOOST_SPIRIT_DEBUG_NODE(start);
89 BOOST_SPIRIT_DEBUG_NODE(couplet);
90 }
91
92 qi::rule<Iterator, qi::in_state_skipper<Lexer> > start, couplet;
93 };
94
95 ///////////////////////////////////////////////////////////////////////////////
main()96 int main()
97 {
98 // iterator type used to expose the underlying input stream
99 typedef std::string::iterator base_iterator_type;
100
101 // This is the token type to return from the lexer iterator
102 typedef lex::lexertl::token<base_iterator_type> token_type;
103
104 // This is the lexer type to use to tokenize the input.
105 // Here we use the lexertl based lexer engine.
106 typedef lex::lexertl::lexer<token_type> lexer_type;
107
108 // This is the token definition type (derived from the given lexer type).
109 typedef example3_tokens<lexer_type> example3_tokens;
110
111 // this is the iterator type exposed by the lexer
112 typedef example3_tokens::iterator_type iterator_type;
113
114 // this is the type of the grammar to parse
115 typedef example3_grammar<iterator_type, example3_tokens::lexer_def> example3_grammar;
116
117 // now we use the types defined above to create the lexer and grammar
118 // object instances needed to invoke the parsing process
119 example3_tokens tokens; // Our lexer
120 example3_grammar calc(tokens); // Our parser
121
122 std::string str (read_from_file("example3.input"));
123
124 // At this point we generate the iterator pair used to expose the
125 // tokenized input stream.
126 std::string::iterator it = str.begin();
127 iterator_type iter = tokens.begin(it, str.end());
128 iterator_type end = tokens.end();
129
130 // Parsing is done based on the token stream, not the character
131 // stream read from the input.
132 // Note how we use the lexer defined above as the skip parser.
133 bool r = qi::phrase_parse(iter, end, calc, qi::in_state("WS")[tokens.self]);
134
135 if (r && iter == end)
136 {
137 std::cout << "-------------------------\n";
138 std::cout << "Parsing succeeded\n";
139 std::cout << "-------------------------\n";
140 }
141 else
142 {
143 std::cout << "-------------------------\n";
144 std::cout << "Parsing failed\n";
145 std::cout << "-------------------------\n";
146 }
147
148 std::cout << "Bye... :-) \n\n";
149 return 0;
150 }
151