• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //  Copyright (c) 2001-2011 Hartmut Kaiser
2 //
3 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
4 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5 
6 #include <boost/detail/lightweight_test.hpp>
7 #include <boost/spirit/include/phoenix_object.hpp>
8 #include <boost/spirit/include/phoenix_operator.hpp>
9 #include <boost/spirit/include/phoenix_container.hpp>
10 #include <boost/spirit/include/lex_lexertl.hpp>
11 #include <boost/foreach.hpp>
12 
13 using namespace boost::spirit;
14 
15 ///////////////////////////////////////////////////////////////////////////////
16 // semantic action analyzing leading whitespace
17 enum tokenids
18 {
19     ID_INDENT = 1000,
20     ID_DEDENT
21 };
22 
23 struct handle_whitespace
24 {
handle_whitespacehandle_whitespace25     handle_whitespace(std::stack<unsigned int>& indents)
26       : indents_(indents) {}
27 
28     template <typename Iterator, typename IdType, typename Context>
operator ()handle_whitespace29     void operator()(Iterator& start, Iterator& end
30       , BOOST_SCOPED_ENUM(lex::pass_flags)& pass, IdType& id
31       , Context& ctx)
32     {
33         unsigned int level = 0;
34         if (is_indent(start, end, level)) {
35             id = ID_INDENT;
36             ctx.set_value(level);
37         }
38         else if (is_dedent(start, end, level)) {
39             id = ID_DEDENT;
40             ctx.set_value(level);
41         }
42         else {
43             pass = lex::pass_flags::pass_ignore;
44         }
45     }
46 
47     // Get indentation level, for now (no tabs) we just count the spaces
48     // once we allow tabs in the regex this needs to be expanded
49     template <typename Iterator>
get_indenthandle_whitespace50     unsigned int get_indent(Iterator& start, Iterator& end)
51     {
52         return static_cast<unsigned int>(std::distance(start, end));
53     }
54 
55     template <typename Iterator>
is_dedenthandle_whitespace56     bool is_dedent(Iterator& start, Iterator& end, unsigned int& level)
57     {
58         unsigned int newindent = get_indent(start, end);
59         while (!indents_.empty() && newindent < indents_.top()) {
60             level++;        // dedent one more level
61             indents_.pop();
62         }
63         return level > 0;
64     }
65 
66     // Handle additional indentation
67     template <typename Iterator>
is_indenthandle_whitespace68     bool is_indent(Iterator& start, Iterator& end, unsigned int& level)
69     {
70         unsigned int newindent = get_indent(start, end);
71         if (indents_.empty() || newindent > indents_.top()) {
72             level = 1;      // indent one more level
73             indents_.push(newindent);
74             return true;
75         }
76         return false;
77     }
78 
79     std::stack<unsigned int>& indents_;
80 
81     // silence MSVC warning C4512: assignment operator could not be generated
82     BOOST_DELETED_FUNCTION(handle_whitespace& operator= (handle_whitespace const&));
83 };
84 
85 ///////////////////////////////////////////////////////////////////////////////
86 //  Token definition
87 template <typename Lexer>
88 struct set_token_value : boost::spirit::lex::lexer<Lexer>
89 {
set_token_valueset_token_value90     set_token_value()
91     {
92         using lex::_pass;
93 
94         // define tokens and associate them with the lexer
95         whitespace = "^[ ]+";
96         newline = '\n';
97 
98         this->self = whitespace[ handle_whitespace(indents) ];
99         this->self += newline[ _pass = lex::pass_flags::pass_ignore ];
100     }
101 
102     lex::token_def<unsigned int> whitespace;
103     lex::token_def<> newline;
104     std::stack<unsigned int> indents;
105 };
106 
107 ///////////////////////////////////////////////////////////////////////////////
108 struct token_data
109 {
110     int id;
111     unsigned int value;
112 };
113 
114 template <typename Token>
115 inline
test_tokens(token_data const * d,std::vector<Token> const & tokens)116 bool test_tokens(token_data const* d, std::vector<Token> const& tokens)
117 {
118     BOOST_FOREACH(Token const& t, tokens)
119     {
120         if (d->id == -1)
121             return false;           // reached end of expected data
122 
123         typename Token::token_value_type const& value (t.value());
124         if (t.id() != static_cast<std::size_t>(d->id))        // token id must match
125             return false;
126         if (value.which() != 1)     // must have an integer value
127             return false;
128         if (boost::get<unsigned int>(value) != d->value)  // value must match
129             return false;
130         ++d;
131     }
132 
133     return (d->id == -1) ? true : false;
134 }
135 
136 inline
test_indents(int * i,std::stack<unsigned int> & indents)137 bool test_indents(int *i, std::stack<unsigned int>& indents)
138 {
139     while (!indents.empty())
140     {
141         if (*i == -1)
142             return false;           // reached end of expected data
143         if (indents.top() != static_cast<unsigned int>(*i))
144             return false;           // value must match
145 
146         ++i;
147         indents.pop();
148     }
149 
150     return (*i == -1) ? true : false;
151 }
152 
153 ///////////////////////////////////////////////////////////////////////////////
main()154 int main()
155 {
156     namespace lex = boost::spirit::lex;
157     namespace phoenix = boost::phoenix;
158 
159     typedef std::string::iterator base_iterator_type;
160     typedef boost::mpl::vector<unsigned int> token_value_types;
161     typedef lex::lexertl::token<base_iterator_type, token_value_types> token_type;
162     typedef lex::lexertl::actor_lexer<token_type> lexer_type;
163 
164     // test simple indent
165     {
166         set_token_value<lexer_type> lexer;
167         std::vector<token_type> tokens;
168         std::string input("    ");
169         base_iterator_type first = input.begin();
170 
171         using phoenix::arg_names::_1;
172         BOOST_TEST(lex::tokenize(first, input.end(), lexer
173           , phoenix::push_back(phoenix::ref(tokens), _1)));
174 
175         int i[] = { 4, -1 };
176         BOOST_TEST(test_indents(i, lexer.indents));
177 
178         token_data d[] = { { ID_INDENT, 1 }, { -1, 0 } };
179         BOOST_TEST(test_tokens(d, tokens));
180     }
181 
182     // test two indents
183     {
184         set_token_value<lexer_type> lexer;
185         std::vector<token_type> tokens;
186         std::string input(
187             "    \n"
188             "        \n");
189         base_iterator_type first = input.begin();
190 
191         using phoenix::arg_names::_1;
192         BOOST_TEST(lex::tokenize(first, input.end(), lexer
193           , phoenix::push_back(phoenix::ref(tokens), _1)));
194 
195         int i[] = { 8, 4, -1 };
196         BOOST_TEST(test_indents(i, lexer.indents));
197 
198         token_data d[] = {
199             { ID_INDENT, 1 }, { ID_INDENT, 1 }
200           , { -1, 0 } };
201         BOOST_TEST(test_tokens(d, tokens));
202     }
203 
204     // test one dedent
205     {
206         set_token_value<lexer_type> lexer;
207         std::vector<token_type> tokens;
208         std::string input(
209             "    \n"
210             "        \n"
211             "    \n");
212         base_iterator_type first = input.begin();
213 
214         using phoenix::arg_names::_1;
215         BOOST_TEST(lex::tokenize(first, input.end(), lexer
216           , phoenix::push_back(phoenix::ref(tokens), _1)));
217 
218         int i[] = { 4, -1 };
219         BOOST_TEST(test_indents(i, lexer.indents));
220 
221         token_data d[] = {
222             { ID_INDENT, 1 }, { ID_INDENT, 1 }
223           , { ID_DEDENT, 1 }
224           , { -1, 0 } };
225         BOOST_TEST(test_tokens(d, tokens));
226     }
227 
228     // test two dedents
229     {
230         set_token_value<lexer_type> lexer;
231         std::vector<token_type> tokens;
232         std::string input(
233             "    \n"
234             "        \n"
235             "            \n"
236             "    \n");
237         base_iterator_type first = input.begin();
238 
239         using phoenix::arg_names::_1;
240         BOOST_TEST(lex::tokenize(first, input.end(), lexer
241           , phoenix::push_back(phoenix::ref(tokens), _1)));
242 
243         int i[] = { 4, -1 };
244         BOOST_TEST(test_indents(i, lexer.indents));
245 
246         token_data d[] = {
247             { ID_INDENT, 1 }, { ID_INDENT, 1 }, { ID_INDENT, 1 }
248           , { ID_DEDENT, 2 }
249           , { -1, 0 } };
250         BOOST_TEST(test_tokens(d, tokens));
251     }
252 
253     return boost::report_errors();
254 }
255 
256