• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //  Copyright (c) 2001-2010 Hartmut Kaiser
2 //
3 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
4 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5 
6 //  This example shows how to create a simple lexer recognizing a couple of
7 //  different tokens and how to use this with a grammar. This example has a
8 //  heavily backtracking grammar which makes it a candidate for lexer based
9 //  parsing (all tokens are scanned and generated only once, even if
10 //  backtracking is required) which speeds up the overall parsing process
11 //  considerably, out-weighting the overhead needed for setting up the lexer.
12 //  Additionally it demonstrates how to use one of the defined tokens as a
13 //  parser component in the grammar.
14 //
15 //  The grammar recognizes a simple input structure: any number of English
16 //  simple sentences (statements, questions and commands) are recognized and
17 //  are being counted separately.
18 
19 // #define BOOST_SPIRIT_DEBUG
20 // #define BOOST_SPIRIT_LEXERTL_DEBUG
21 
22 #include <boost/config/warning_disable.hpp>
23 #include <boost/spirit/include/qi.hpp>
24 #include <boost/spirit/include/lex_lexertl.hpp>
25 #include <boost/spirit/include/phoenix_operator.hpp>
26 
27 #include <iostream>
28 #include <fstream>
29 #include <string>
30 
31 #include "example.hpp"
32 
33 using namespace boost::spirit;
34 using namespace boost::spirit::ascii;
35 using boost::phoenix::ref;
36 
37 ///////////////////////////////////////////////////////////////////////////////
38 //  Token definition
39 ///////////////////////////////////////////////////////////////////////////////
40 template <typename Lexer>
41 struct example2_tokens : lex::lexer<Lexer>
42 {
example2_tokensexample2_tokens43     example2_tokens()
44     {
45         //  A 'word' is comprised of one or more letters and an optional
46         //  apostrophe. If it contains an apostrophe, there may only be one and
47         //  the apostrophe must be preceded and succeeded by at least 1 letter.
48         //  For example, "I'm" and "doesn't" meet the definition of 'word' we
49         //  define below.
50         word = "[a-zA-Z]+('[a-zA-Z]+)?";
51 
52         // Associate the tokens and the token set with the lexer. Note that
53         // single character token definitions as used below always get
54         // interpreted literally and never as special regex characters. This is
55         // done to be able to assign single characters the id of their character
56         // code value, allowing to reference those as literals in Qi grammars.
57         this->self = lex::token_def<>(',') | '!' | '.' | '?' | ' ' | '\n' | word;
58     }
59 
60     lex::token_def<> word;
61 };
62 
63 ///////////////////////////////////////////////////////////////////////////////
64 //  Grammar definition
65 ///////////////////////////////////////////////////////////////////////////////
66 template <typename Iterator>
67 struct example2_grammar : qi::grammar<Iterator>
68 {
69     template <typename TokenDef>
example2_grammarexample2_grammar70     example2_grammar(TokenDef const& tok)
71       : example2_grammar::base_type(story)
72       , paragraphs(0), commands(0), questions(0), statements(0)
73     {
74         story
75             =  +paragraph
76             ;
77 
78         paragraph
79             =   (  +(   command [ ++ref(commands) ]
80                     |   question [ ++ref(questions) ]
81                     |   statement [ ++ref(statements) ]
82                     )
83                     >> *char_(' ') >> +char_('\n')
84                 )
85                 [ ++ref(paragraphs) ]
86             ;
87 
88         command
89             =  +(tok.word | ' ' | ',') >> '!'
90             ;
91 
92         question
93             =  +(tok.word | ' ' | ',') >> '?'
94             ;
95 
96         statement
97             =  +(tok.word | ' ' | ',') >> '.'
98             ;
99 
100         BOOST_SPIRIT_DEBUG_NODE(story);
101         BOOST_SPIRIT_DEBUG_NODE(paragraph);
102         BOOST_SPIRIT_DEBUG_NODE(command);
103         BOOST_SPIRIT_DEBUG_NODE(question);
104         BOOST_SPIRIT_DEBUG_NODE(statement);
105     }
106 
107     qi::rule<Iterator> story, paragraph, command, question, statement;
108     int paragraphs, commands, questions, statements;
109 };
110 
111 ///////////////////////////////////////////////////////////////////////////////
main()112 int main()
113 {
114     // iterator type used to expose the underlying input stream
115     typedef std::string::iterator base_iterator_type;
116 
117     // This is the token type to return from the lexer iterator
118     typedef lex::lexertl::token<base_iterator_type> token_type;
119 
120     // This is the lexer type to use to tokenize the input.
121     // Here we use the lexertl based lexer engine.
122     typedef lex::lexertl::lexer<token_type> lexer_type;
123 
124     // This is the token definition type (derived from the given lexer type).
125     typedef example2_tokens<lexer_type> example2_tokens;
126 
127     // this is the iterator type exposed by the lexer
128     typedef example2_tokens::iterator_type iterator_type;
129 
130     // this is the type of the grammar to parse
131     typedef example2_grammar<iterator_type> example2_grammar;
132 
133     // now we use the types defined above to create the lexer and grammar
134     // object instances needed to invoke the parsing process
135     example2_tokens tokens;                         // Our lexer
136     example2_grammar calc(tokens);                  // Our parser
137 
138     std::string str (read_from_file("example2.input"));
139 
140     // At this point we generate the iterator pair used to expose the
141     // tokenized input stream.
142     std::string::iterator it = str.begin();
143     iterator_type iter = tokens.begin(it, str.end());
144     iterator_type end = tokens.end();
145 
146     // Parsing is done based on the token stream, not the character
147     // stream read from the input.
148     bool r = qi::parse(iter, end, calc);
149 
150     if (r && iter == end)
151     {
152         std::cout << "-------------------------\n";
153         std::cout << "Parsing succeeded\n";
154         std::cout << "There were "
155                   << calc.commands << " commands, "
156                   << calc.questions << " questions, and "
157                   << calc.statements << " statements.\n";
158         std::cout << "-------------------------\n";
159     }
160     else
161     {
162         std::cout << "-------------------------\n";
163         std::cout << "Parsing failed\n";
164         std::cout << "-------------------------\n";
165     }
166 
167     std::cout << "Bye... :-) \n\n";
168     return 0;
169 }
170