• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //  Copyright (c) 2001-2010 Hartmut Kaiser
2 //
3 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
4 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5 
6 //  This example is the equivalent to the following lex program:
7 /*
8 //[wcp_flex_version
9     %{
10         int c = 0, w = 0, l = 0;
11     %}
12     word   [^ \t\n]+
13     eol    \n
14     %%
15     {word} { ++w; c += yyleng; }
16     {eol}  { ++c; ++l; }
17     .      { ++c; }
18     %%
19     main()
20     {
21         yylex();
22         printf("%d %d %d\n", l, w, c);
23     }
24 //]
25 */
26 //  Its purpose is to do the word count function of the wc command in UNIX. It
27 //  prints the number of lines, words and characters in a file.
28 //
29 //  The example additionally demonstrates how to use the add_pattern(...)(...)
30 //  syntax to define lexer patterns. These patterns are essentially parameter-
31 //  less 'macros' for regular expressions, allowing to simplify their
32 //  definition.
33 
34 // #define BOOST_SPIRIT_LEXERTL_DEBUG
35 #define BOOST_VARIANT_MINIMIZE_SIZE
36 
37 #include <boost/config/warning_disable.hpp>
38 //[wcp_includes
39 #include <boost/spirit/include/qi.hpp>
40 #include <boost/spirit/include/lex_lexertl.hpp>
41 #include <boost/spirit/include/phoenix_operator.hpp>
42 #include <boost/spirit/include/phoenix_statement.hpp>
43 #include <boost/spirit/include/phoenix_container.hpp>
44 //]
45 
46 #include <iostream>
47 #include <string>
48 
49 #include "example.hpp"
50 
51 //[wcp_namespaces
52 using namespace boost::spirit;
53 using namespace boost::spirit::ascii;
54 //]
55 
56 ///////////////////////////////////////////////////////////////////////////////
57 //  Token definition: We use the lexertl based lexer engine as the underlying
58 //                    lexer type.
59 ///////////////////////////////////////////////////////////////////////////////
60 //[wcp_token_ids
61 enum tokenids
62 {
63     IDANY = lex::min_token_id + 10
64 };
65 //]
66 
67 //[wcp_token_definition
68 template <typename Lexer>
69 struct word_count_tokens : lex::lexer<Lexer>
70 {
word_count_tokensword_count_tokens71     word_count_tokens()
72     {
73         // define patterns (lexer macros) to be used during token definition
74         // below
75         this->self.add_pattern
76             ("WORD", "[^ \t\n]+")
77         ;
78 
79         // define tokens and associate them with the lexer
80         word = "{WORD}";    // reference the pattern 'WORD' as defined above
81 
82         // this lexer will recognize 3 token types: words, newlines, and
83         // everything else
84         this->self.add
85             (word)          // no token id is needed here
86             ('\n')          // characters are usable as tokens as well
87             (".", IDANY)    // string literals will not be escaped by the library
88         ;
89     }
90 
91     // the token 'word' exposes the matched string as its parser attribute
92     lex::token_def<std::string> word;
93 };
94 //]
95 
96 ///////////////////////////////////////////////////////////////////////////////
97 //  Grammar definition
98 ///////////////////////////////////////////////////////////////////////////////
99 //[wcp_grammar_definition
100 template <typename Iterator>
101 struct word_count_grammar : qi::grammar<Iterator>
102 {
103     template <typename TokenDef>
word_count_grammarword_count_grammar104     word_count_grammar(TokenDef const& tok)
105       : word_count_grammar::base_type(start)
106       , c(0), w(0), l(0)
107     {
108         using boost::phoenix::ref;
109         using boost::phoenix::size;
110 
111         start =  *(   tok.word          [++ref(w), ref(c) += size(_1)]
112                   |   lit('\n')         [++ref(c), ++ref(l)]
113                   |   qi::token(IDANY)  [++ref(c)]
114                   )
115               ;
116     }
117 
118     std::size_t c, w, l;
119     qi::rule<Iterator> start;
120 };
121 //]
122 
123 ///////////////////////////////////////////////////////////////////////////////
124 //[wcp_main
main(int argc,char * argv[])125 int main(int argc, char* argv[])
126 {
127 /*<  Define the token type to be used: `std::string` is available as the
128      type of the token attribute
129 >*/  typedef lex::lexertl::token<
130         char const*, boost::mpl::vector<std::string>
131     > token_type;
132 
133 /*<  Define the lexer type to use implementing the state machine
134 >*/  typedef lex::lexertl::lexer<token_type> lexer_type;
135 
136 /*<  Define the iterator type exposed by the lexer type
137 >*/  typedef word_count_tokens<lexer_type>::iterator_type iterator_type;
138 
139     // now we use the types defined above to create the lexer and grammar
140     // object instances needed to invoke the parsing process
141     word_count_tokens<lexer_type> word_count;          // Our lexer
142     word_count_grammar<iterator_type> g (word_count);  // Our parser
143 
144     // read in the file int memory
145     std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1]));
146     char const* first = str.c_str();
147     char const* last = &first[str.size()];
148 
149 /*<  Parsing is done based on the token stream, not the character
150      stream read from the input. The function `tokenize_and_parse()` wraps
151      the passed iterator range `[first, last)` by the lexical analyzer and
152      uses its exposed iterators to parse the token stream.
153 >*/  bool r = lex::tokenize_and_parse(first, last, word_count, g);
154 
155     if (r) {
156         std::cout << "lines: " << g.l << ", words: " << g.w
157                   << ", characters: " << g.c << "\n";
158     }
159     else {
160         std::string rest(first, last);
161         std::cerr << "Parsing failed\n" << "stopped at: \""
162                   << rest << "\"\n";
163     }
164     return 0;
165 }
166 //]
167