1 // Copyright (c) 2001-2010 Hartmut Kaiser
2 //
3 // Distributed under the Boost Software License, Version 1.0. (See accompanying
4 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5
6 // Simple lexer/parser to test the Spirit installation.
7 //
8 // This example shows, how to create a simple lexer recognizing 5 different
9 // tokens, and how to use a single token definition as the skip parser during
10 // the parsing. Additionally, it demonstrates how to use one of the defined
11 // tokens as a parser component in the grammar.
12 //
13 // The grammar recognizes a simple input structure, for instance:
14 //
15 // {
16 // hello world, hello it is me
17 // }
18 //
19 // Any number of simple sentences (optionally comma separated) inside a pair
20 // of curly braces will be matched.
21
22 // #define BOOST_SPIRIT_LEXERTL_DEBUG
23
24 #include <boost/config/warning_disable.hpp>
25 #include <boost/spirit/include/qi.hpp>
26 #include <boost/spirit/include/lex_lexertl.hpp>
27
28 #include <iostream>
29 #include <fstream>
30 #include <string>
31
32 #include "example.hpp"
33
34 using namespace boost::spirit;
35
36 ///////////////////////////////////////////////////////////////////////////////
37 // Token definition
38 ///////////////////////////////////////////////////////////////////////////////
39 template <typename Lexer>
40 struct example1_tokens : lex::lexer<Lexer>
41 {
example1_tokensexample1_tokens42 example1_tokens()
43 {
44 // define tokens and associate them with the lexer
45 identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
46 this->self = lex::char_(',') | '{' | '}' | identifier;
47
48 // any token definition to be used as the skip parser during parsing
49 // has to be associated with a separate lexer state (here 'WS')
50 this->white_space = "[ \\t\\n]+";
51 this->self("WS") = white_space;
52 }
53
54 lex::token_def<> identifier, white_space;
55 };
56
57 ///////////////////////////////////////////////////////////////////////////////
58 // Grammar definition
59 ///////////////////////////////////////////////////////////////////////////////
60 template <typename Iterator>
61 struct example1_grammar
62 : qi::grammar<Iterator, qi::in_state_skipper<lex::token_def<> > >
63 {
64 template <typename TokenDef>
example1_grammarexample1_grammar65 example1_grammar(TokenDef const& tok)
66 : example1_grammar::base_type(start)
67 {
68 start = '{' >> *(tok.identifier >> -ascii::char_(',')) >> '}';
69 }
70
71 qi::rule<Iterator, qi::in_state_skipper<lex::token_def<> > > start;
72 };
73
74 ///////////////////////////////////////////////////////////////////////////////
main()75 int main()
76 {
77 // iterator type used to expose the underlying input stream
78 typedef std::string::iterator base_iterator_type;
79
80 // This is the token type to return from the lexer iterator
81 typedef lex::lexertl::token<base_iterator_type> token_type;
82
83 // This is the lexer type to use to tokenize the input.
84 // We use the lexertl based lexer engine.
85 typedef lex::lexertl::lexer<token_type> lexer_type;
86
87 // This is the lexer type (derived from the given lexer type).
88 typedef example1_tokens<lexer_type> example1_lex;
89
90 // This is the iterator type exposed by the lexer
91 typedef example1_lex::iterator_type iterator_type;
92
93 // This is the type of the grammar to parse
94 typedef example1_grammar<iterator_type> example1_grammar;
95
96 // now we use the types defined above to create the lexer and grammar
97 // object instances needed to invoke the parsing process
98 example1_lex lex; // Our lexer
99 example1_grammar calc(lex); // Our grammar definition
100
101 std::string str (read_from_file("example1.input"));
102
103 // At this point we generate the iterator pair used to expose the
104 // tokenized input stream.
105 std::string::iterator it = str.begin();
106 iterator_type iter = lex.begin(it, str.end());
107 iterator_type end = lex.end();
108
109 // Parsing is done based on the token stream, not the character
110 // stream read from the input.
111 // Note, how we use the token_def defined above as the skip parser. It must
112 // be explicitly wrapped inside a state directive, switching the lexer
113 // state for the duration of skipping whitespace.
114 bool r = qi::phrase_parse(iter, end, calc, qi::in_state("WS")[lex.white_space]);
115
116 if (r && iter == end)
117 {
118 std::cout << "-------------------------\n";
119 std::cout << "Parsing succeeded\n";
120 std::cout << "-------------------------\n";
121 }
122 else
123 {
124 std::string rest(iter, end);
125 std::cout << "-------------------------\n";
126 std::cout << "Parsing failed\n";
127 std::cout << "stopped at: \"" << rest << "\"\n";
128 std::cout << "-------------------------\n";
129 }
130
131 std::cout << "Bye... :-) \n\n";
132 return 0;
133 }
134