• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //  Copyright (c) 2001-2010 Hartmut Kaiser
2 //
3 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
4 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5 
6 //  This example shows how to create a simple lexer recognizing a couple of
7 //  different tokens and how to use this with a grammar. This example has a
8 //  heavily backtracking grammar which makes it a candidate for lexer based
9 //  parsing (all tokens are scanned and generated only once, even if
10 //  backtracking is required) which speeds up the overall parsing process
11 //  considerably, out-weighting the overhead needed for setting up the lexer.
12 //
13 //  Additionally, this example demonstrates, how to define a token set usable
14 //  as the skip parser during parsing, allowing to define several tokens to be
15 //  ignored.
16 //
17 //  This example recognizes couplets, which are sequences of numbers enclosed
18 //  in matching pairs of parenthesis. See the comments below to for details
19 //  and examples.
20 
21 // #define BOOST_SPIRIT_LEXERTL_DEBUG
22 // #define BOOST_SPIRIT_DEBUG
23 
24 #include <boost/config/warning_disable.hpp>
25 #include <boost/spirit/include/qi.hpp>
26 #include <boost/spirit/include/lex_lexertl.hpp>
27 
28 #include <iostream>
29 #include <fstream>
30 #include <string>
31 
32 #include "example.hpp"
33 
34 using namespace boost::spirit;
35 
36 ///////////////////////////////////////////////////////////////////////////////
37 //  Token definition
38 ///////////////////////////////////////////////////////////////////////////////
39 template <typename Lexer>
40 struct example3_tokens : lex::lexer<Lexer>
41 {
example3_tokensexample3_tokens42     example3_tokens()
43     {
44         // define the tokens to match
45         ellipses = "\\.\\.\\.";
46         number = "[0-9]+";
47 
48         // associate the tokens and the token set with the lexer
49         this->self = ellipses | '(' | ')' | number;
50 
51         // define the whitespace to ignore (spaces, tabs, newlines and C-style
52         // comments)
53         this->self("WS")
54             =   lex::token_def<>("[ \\t\\n]+")          // whitespace
55             |   "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/"   // C style comments
56             ;
57     }
58 
59     // these tokens expose the iterator_range of the matched input sequence
60     lex::token_def<> ellipses, identifier, number;
61 };
62 
63 ///////////////////////////////////////////////////////////////////////////////
64 //  Grammar definition
65 ///////////////////////////////////////////////////////////////////////////////
66 template <typename Iterator, typename Lexer>
67 struct example3_grammar
68   : qi::grammar<Iterator, qi::in_state_skipper<Lexer> >
69 {
70     template <typename TokenDef>
example3_grammarexample3_grammar71     example3_grammar(TokenDef const& tok)
72       : example3_grammar::base_type(start)
73     {
74         start
75             =  +(couplet | tok.ellipses)
76             ;
77 
78         //  A couplet matches nested left and right parenthesis.
79         //  For example:
80         //    (1) (1 2) (1 2 3) ...
81         //    ((1)) ((1 2)(3 4)) (((1) (2 3) (1 2 (3) 4))) ...
82         //    (((1))) ...
83         couplet
84             =   tok.number
85             |   '(' >> +couplet >> ')'
86             ;
87 
88         BOOST_SPIRIT_DEBUG_NODE(start);
89         BOOST_SPIRIT_DEBUG_NODE(couplet);
90     }
91 
92     qi::rule<Iterator, qi::in_state_skipper<Lexer> > start, couplet;
93 };
94 
95 ///////////////////////////////////////////////////////////////////////////////
main()96 int main()
97 {
98     // iterator type used to expose the underlying input stream
99     typedef std::string::iterator base_iterator_type;
100 
101     // This is the token type to return from the lexer iterator
102     typedef lex::lexertl::token<base_iterator_type> token_type;
103 
104     // This is the lexer type to use to tokenize the input.
105     // Here we use the lexertl based lexer engine.
106     typedef lex::lexertl::lexer<token_type> lexer_type;
107 
108     // This is the token definition type (derived from the given lexer type).
109     typedef example3_tokens<lexer_type> example3_tokens;
110 
111     // this is the iterator type exposed by the lexer
112     typedef example3_tokens::iterator_type iterator_type;
113 
114     // this is the type of the grammar to parse
115     typedef example3_grammar<iterator_type, example3_tokens::lexer_def> example3_grammar;
116 
117     // now we use the types defined above to create the lexer and grammar
118     // object instances needed to invoke the parsing process
119     example3_tokens tokens;                         // Our lexer
120     example3_grammar calc(tokens);                  // Our parser
121 
122     std::string str (read_from_file("example3.input"));
123 
124     // At this point we generate the iterator pair used to expose the
125     // tokenized input stream.
126     std::string::iterator it = str.begin();
127     iterator_type iter = tokens.begin(it, str.end());
128     iterator_type end = tokens.end();
129 
130     // Parsing is done based on the token stream, not the character
131     // stream read from the input.
132     // Note how we use the lexer defined above as the skip parser.
133     bool r = qi::phrase_parse(iter, end, calc, qi::in_state("WS")[tokens.self]);
134 
135     if (r && iter == end)
136     {
137         std::cout << "-------------------------\n";
138         std::cout << "Parsing succeeded\n";
139         std::cout << "-------------------------\n";
140     }
141     else
142     {
143         std::cout << "-------------------------\n";
144         std::cout << "Parsing failed\n";
145         std::cout << "-------------------------\n";
146     }
147 
148     std::cout << "Bye... :-) \n\n";
149     return 0;
150 }
151