1 // Copyright (c) 2001-2010 Hartmut Kaiser
2 //
3 // Distributed under the Boost Software License, Version 1.0. (See accompanying
4 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5
6 // This example is the equivalent to the following lex program:
7 /*
8 //[wcp_flex_version
9 %{
10 int c = 0, w = 0, l = 0;
11 %}
12 word [^ \t\n]+
13 eol \n
14 %%
15 {word} { ++w; c += yyleng; }
16 {eol} { ++c; ++l; }
17 . { ++c; }
18 %%
19 main()
20 {
21 yylex();
22 printf("%d %d %d\n", l, w, c);
23 }
24 //]
25 */
26 // Its purpose is to do the word count function of the wc command in UNIX. It
27 // prints the number of lines, words and characters in a file.
28 //
29 // The example additionally demonstrates how to use the add_pattern(...)(...)
30 // syntax to define lexer patterns. These patterns are essentially parameter-
31 // less 'macros' for regular expressions, allowing to simplify their
32 // definition.
33
34 // #define BOOST_SPIRIT_LEXERTL_DEBUG
35 #define BOOST_VARIANT_MINIMIZE_SIZE
36
37 #include <boost/config/warning_disable.hpp>
38 //[wcp_includes
39 #include <boost/spirit/include/qi.hpp>
40 #include <boost/spirit/include/lex_lexertl.hpp>
41 #include <boost/spirit/include/phoenix_operator.hpp>
42 #include <boost/spirit/include/phoenix_statement.hpp>
43 #include <boost/spirit/include/phoenix_container.hpp>
44 //]
45
46 #include <iostream>
47 #include <string>
48
49 #include "example.hpp"
50
51 //[wcp_namespaces
52 using namespace boost::spirit;
53 using namespace boost::spirit::ascii;
54 //]
55
56 ///////////////////////////////////////////////////////////////////////////////
57 // Token definition: We use the lexertl based lexer engine as the underlying
58 // lexer type.
59 ///////////////////////////////////////////////////////////////////////////////
60 //[wcp_token_ids
61 enum tokenids
62 {
63 IDANY = lex::min_token_id + 10
64 };
65 //]
66
67 //[wcp_token_definition
68 template <typename Lexer>
69 struct word_count_tokens : lex::lexer<Lexer>
70 {
word_count_tokensword_count_tokens71 word_count_tokens()
72 {
73 // define patterns (lexer macros) to be used during token definition
74 // below
75 this->self.add_pattern
76 ("WORD", "[^ \t\n]+")
77 ;
78
79 // define tokens and associate them with the lexer
80 word = "{WORD}"; // reference the pattern 'WORD' as defined above
81
82 // this lexer will recognize 3 token types: words, newlines, and
83 // everything else
84 this->self.add
85 (word) // no token id is needed here
86 ('\n') // characters are usable as tokens as well
87 (".", IDANY) // string literals will not be escaped by the library
88 ;
89 }
90
91 // the token 'word' exposes the matched string as its parser attribute
92 lex::token_def<std::string> word;
93 };
94 //]
95
96 ///////////////////////////////////////////////////////////////////////////////
97 // Grammar definition
98 ///////////////////////////////////////////////////////////////////////////////
99 //[wcp_grammar_definition
100 template <typename Iterator>
101 struct word_count_grammar : qi::grammar<Iterator>
102 {
103 template <typename TokenDef>
word_count_grammarword_count_grammar104 word_count_grammar(TokenDef const& tok)
105 : word_count_grammar::base_type(start)
106 , c(0), w(0), l(0)
107 {
108 using boost::phoenix::ref;
109 using boost::phoenix::size;
110
111 start = *( tok.word [++ref(w), ref(c) += size(_1)]
112 | lit('\n') [++ref(c), ++ref(l)]
113 | qi::token(IDANY) [++ref(c)]
114 )
115 ;
116 }
117
118 std::size_t c, w, l;
119 qi::rule<Iterator> start;
120 };
121 //]
122
123 ///////////////////////////////////////////////////////////////////////////////
124 //[wcp_main
main(int argc,char * argv[])125 int main(int argc, char* argv[])
126 {
127 /*< Define the token type to be used: `std::string` is available as the
128 type of the token attribute
129 >*/ typedef lex::lexertl::token<
130 char const*, boost::mpl::vector<std::string>
131 > token_type;
132
133 /*< Define the lexer type to use implementing the state machine
134 >*/ typedef lex::lexertl::lexer<token_type> lexer_type;
135
136 /*< Define the iterator type exposed by the lexer type
137 >*/ typedef word_count_tokens<lexer_type>::iterator_type iterator_type;
138
139 // now we use the types defined above to create the lexer and grammar
140 // object instances needed to invoke the parsing process
141 word_count_tokens<lexer_type> word_count; // Our lexer
142 word_count_grammar<iterator_type> g (word_count); // Our parser
143
144 // read in the file int memory
145 std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1]));
146 char const* first = str.c_str();
147 char const* last = &first[str.size()];
148
149 /*< Parsing is done based on the token stream, not the character
150 stream read from the input. The function `tokenize_and_parse()` wraps
151 the passed iterator range `[first, last)` by the lexical analyzer and
152 uses its exposed iterators to parse the token stream.
153 >*/ bool r = lex::tokenize_and_parse(first, last, word_count, g);
154
155 if (r) {
156 std::cout << "lines: " << g.l << ", words: " << g.w
157 << ", characters: " << g.c << "\n";
158 }
159 else {
160 std::string rest(first, last);
161 std::cerr << "Parsing failed\n" << "stopped at: \""
162 << rest << "\"\n";
163 }
164 return 0;
165 }
166 //]
167