• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //  Copyright (c) 2001-2010 Hartmut Kaiser
2 //
3 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
4 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5 
6 //  This example is the equivalent to the following lex program:
7 /*
8 //[wcl_flex_version
9     %{
10         int c = 0, w = 0, l = 0;
11     %}
12     %%
13     [^ \t\n]+  { ++w; c += yyleng; }
14     \n         { ++c; ++l; }
15     .          { ++c; }
16     %%
17     main()
18     {
19         yylex();
20         printf("%d %d %d\n", l, w, c);
21     }
22 //]
23 */
24 //  Its purpose is to do the word count function of the wc command in UNIX. It
25 //  prints the number of lines, words and characters in a file.
26 //
27 //  This examples shows how to use semantic actions associated with token
28 //  definitions to directly attach actions to tokens. These get executed
29 //  whenever the corresponding token got matched in the input sequence. Note,
30 //  how this example implements all functionality directly in the lexer
31 //  definition without any need for a parser.
32 
33 // #define BOOST_SPIRIT_LEXERTL_DEBUG
34 
35 #include <boost/config/warning_disable.hpp>
36 //[wcl_includes
37 #include <boost/spirit/include/lex_lexertl.hpp>
38 #include <boost/spirit/include/phoenix_operator.hpp>
39 #include <boost/spirit/include/phoenix_statement.hpp>
40 #include <boost/spirit/include/phoenix_algorithm.hpp>
41 #include <boost/spirit/include/phoenix_core.hpp>
42 //]
43 
44 #include <iostream>
45 #include <string>
46 
47 #include "example.hpp"
48 
49 //[wcl_namespaces
50 namespace lex = boost::spirit::lex;
51 //]
52 
53 ///////////////////////////////////////////////////////////////////////////////
54 //  Token definition: We use the lexertl based lexer engine as the underlying
55 //                    lexer type.
56 //
57 //  Note, the token definition type is derived from the 'lexertl_actor_lexer'
58 //  template, which is a necessary to being able to use lexer semantic actions.
59 ///////////////////////////////////////////////////////////////////////////////
60 struct distance_func
61 {
62     template <typename Iterator1, typename Iterator2>
63     struct result : boost::iterator_difference<Iterator1> {};
64 
65     template <typename Iterator1, typename Iterator2>
66     typename result<Iterator1, Iterator2>::type
operator ()distance_func67     operator()(Iterator1 const& begin, Iterator2 const& end) const
68     {
69         return std::distance(begin, end);
70     }
71 };
72 boost::phoenix::function<distance_func> const distance = distance_func();
73 
74 //[wcl_token_definition
75 template <typename Lexer>
76 struct word_count_tokens : lex::lexer<Lexer>
77 {
word_count_tokensword_count_tokens78     word_count_tokens()
79       : c(0), w(0), l(0)
80       , word("[^ \t\n]+")     // define tokens
81       , eol("\n")
82       , any(".")
83     {
84         using boost::spirit::lex::_start;
85         using boost::spirit::lex::_end;
86         using boost::phoenix::ref;
87 
88         // associate tokens with the lexer
89         this->self
90             =   word  [++ref(w), ref(c) += distance(_start, _end)]
91             |   eol   [++ref(c), ++ref(l)]
92             |   any   [++ref(c)]
93             ;
94     }
95 
96     std::size_t c, w, l;
97     lex::token_def<> word, eol, any;
98 };
99 //]
100 
101 ///////////////////////////////////////////////////////////////////////////////
102 //[wcl_main
main(int argc,char * argv[])103 int main(int argc, char* argv[])
104 {
105 
106 /*<  Specifying `omit` as the token attribute type generates a token class
107      not holding any token attribute at all (not even the iterator range of the
108      matched input sequence), therefore optimizing the token, the lexer, and
109      possibly the parser implementation as much as possible. Specifying
110      `mpl::false_` as the 3rd template parameter generates a token
111      type and an iterator, both holding no lexer state, allowing for even more
112      aggressive optimizations. As a result the token instances contain the token
113      ids as the only data member.
114 >*/  typedef
115         lex::lexertl::token<char const*, lex::omit, boost::mpl::false_>
116      token_type;
117 
118 /*<  This defines the lexer type to use
119 >*/  typedef lex::lexertl::actor_lexer<token_type> lexer_type;
120 
121 /*<  Create the lexer object instance needed to invoke the lexical analysis
122 >*/  word_count_tokens<lexer_type> word_count_lexer;
123 
124 /*<  Read input from the given file, tokenize all the input, while discarding
125      all generated tokens
126 >*/  std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1]));
127     char const* first = str.c_str();
128     char const* last = &first[str.size()];
129 
130 /*<  Create a pair of iterators returning the sequence of generated tokens
131 >*/  lexer_type::iterator_type iter = word_count_lexer.begin(first, last);
132     lexer_type::iterator_type end = word_count_lexer.end();
133 
134 /*<  Here we simply iterate over all tokens, making sure to break the loop
135      if an invalid token gets returned from the lexer
136 >*/  while (iter != end && token_is_valid(*iter))
137         ++iter;
138 
139     if (iter == end) {
140         std::cout << "lines: " << word_count_lexer.l
141                   << ", words: " << word_count_lexer.w
142                   << ", characters: " << word_count_lexer.c
143                   << "\n";
144     }
145     else {
146         std::string rest(first, last);
147         std::cout << "Lexical analysis failed\n" << "stopped at: \""
148                   << rest << "\"\n";
149     }
150     return 0;
151 }
152 //]
153