1 // Copyright (c) 2001-2011 Hartmut Kaiser
2 //
3 // Distributed under the Boost Software License, Version 1.0. (See accompanying
4 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5
6 #include <boost/detail/lightweight_test.hpp>
7 #include <boost/spirit/include/phoenix_object.hpp>
8 #include <boost/spirit/include/phoenix_operator.hpp>
9 #include <boost/spirit/include/phoenix_container.hpp>
10 #include <boost/spirit/include/lex_lexertl.hpp>
11 #include <boost/foreach.hpp>
12
13 using namespace boost::spirit;
14
15 ///////////////////////////////////////////////////////////////////////////////
16 // semantic action analyzing leading whitespace
17 enum tokenids
18 {
19 ID_INDENT = 1000,
20 ID_DEDENT
21 };
22
23 struct handle_whitespace
24 {
handle_whitespacehandle_whitespace25 handle_whitespace(std::stack<unsigned int>& indents)
26 : indents_(indents) {}
27
28 template <typename Iterator, typename IdType, typename Context>
operator ()handle_whitespace29 void operator()(Iterator& start, Iterator& end
30 , BOOST_SCOPED_ENUM(lex::pass_flags)& pass, IdType& id
31 , Context& ctx)
32 {
33 unsigned int level = 0;
34 if (is_indent(start, end, level)) {
35 id = ID_INDENT;
36 ctx.set_value(level);
37 }
38 else if (is_dedent(start, end, level)) {
39 id = ID_DEDENT;
40 ctx.set_value(level);
41 }
42 else {
43 pass = lex::pass_flags::pass_ignore;
44 }
45 }
46
47 // Get indentation level, for now (no tabs) we just count the spaces
48 // once we allow tabs in the regex this needs to be expanded
49 template <typename Iterator>
get_indenthandle_whitespace50 unsigned int get_indent(Iterator& start, Iterator& end)
51 {
52 return static_cast<unsigned int>(std::distance(start, end));
53 }
54
55 template <typename Iterator>
is_dedenthandle_whitespace56 bool is_dedent(Iterator& start, Iterator& end, unsigned int& level)
57 {
58 unsigned int newindent = get_indent(start, end);
59 while (!indents_.empty() && newindent < indents_.top()) {
60 level++; // dedent one more level
61 indents_.pop();
62 }
63 return level > 0;
64 }
65
66 // Handle additional indentation
67 template <typename Iterator>
is_indenthandle_whitespace68 bool is_indent(Iterator& start, Iterator& end, unsigned int& level)
69 {
70 unsigned int newindent = get_indent(start, end);
71 if (indents_.empty() || newindent > indents_.top()) {
72 level = 1; // indent one more level
73 indents_.push(newindent);
74 return true;
75 }
76 return false;
77 }
78
79 std::stack<unsigned int>& indents_;
80
81 // silence MSVC warning C4512: assignment operator could not be generated
82 BOOST_DELETED_FUNCTION(handle_whitespace& operator= (handle_whitespace const&));
83 };
84
85 ///////////////////////////////////////////////////////////////////////////////
86 // Token definition
87 template <typename Lexer>
88 struct set_token_value : boost::spirit::lex::lexer<Lexer>
89 {
set_token_valueset_token_value90 set_token_value()
91 {
92 using lex::_pass;
93
94 // define tokens and associate them with the lexer
95 whitespace = "^[ ]+";
96 newline = '\n';
97
98 this->self = whitespace[ handle_whitespace(indents) ];
99 this->self += newline[ _pass = lex::pass_flags::pass_ignore ];
100 }
101
102 lex::token_def<unsigned int> whitespace;
103 lex::token_def<> newline;
104 std::stack<unsigned int> indents;
105 };
106
107 ///////////////////////////////////////////////////////////////////////////////
108 struct token_data
109 {
110 int id;
111 unsigned int value;
112 };
113
114 template <typename Token>
115 inline
test_tokens(token_data const * d,std::vector<Token> const & tokens)116 bool test_tokens(token_data const* d, std::vector<Token> const& tokens)
117 {
118 BOOST_FOREACH(Token const& t, tokens)
119 {
120 if (d->id == -1)
121 return false; // reached end of expected data
122
123 typename Token::token_value_type const& value (t.value());
124 if (t.id() != static_cast<std::size_t>(d->id)) // token id must match
125 return false;
126 if (value.which() != 1) // must have an integer value
127 return false;
128 if (boost::get<unsigned int>(value) != d->value) // value must match
129 return false;
130 ++d;
131 }
132
133 return (d->id == -1) ? true : false;
134 }
135
136 inline
test_indents(int * i,std::stack<unsigned int> & indents)137 bool test_indents(int *i, std::stack<unsigned int>& indents)
138 {
139 while (!indents.empty())
140 {
141 if (*i == -1)
142 return false; // reached end of expected data
143 if (indents.top() != static_cast<unsigned int>(*i))
144 return false; // value must match
145
146 ++i;
147 indents.pop();
148 }
149
150 return (*i == -1) ? true : false;
151 }
152
153 ///////////////////////////////////////////////////////////////////////////////
main()154 int main()
155 {
156 namespace lex = boost::spirit::lex;
157 namespace phoenix = boost::phoenix;
158
159 typedef std::string::iterator base_iterator_type;
160 typedef boost::mpl::vector<unsigned int> token_value_types;
161 typedef lex::lexertl::token<base_iterator_type, token_value_types> token_type;
162 typedef lex::lexertl::actor_lexer<token_type> lexer_type;
163
164 // test simple indent
165 {
166 set_token_value<lexer_type> lexer;
167 std::vector<token_type> tokens;
168 std::string input(" ");
169 base_iterator_type first = input.begin();
170
171 using phoenix::arg_names::_1;
172 BOOST_TEST(lex::tokenize(first, input.end(), lexer
173 , phoenix::push_back(phoenix::ref(tokens), _1)));
174
175 int i[] = { 4, -1 };
176 BOOST_TEST(test_indents(i, lexer.indents));
177
178 token_data d[] = { { ID_INDENT, 1 }, { -1, 0 } };
179 BOOST_TEST(test_tokens(d, tokens));
180 }
181
182 // test two indents
183 {
184 set_token_value<lexer_type> lexer;
185 std::vector<token_type> tokens;
186 std::string input(
187 " \n"
188 " \n");
189 base_iterator_type first = input.begin();
190
191 using phoenix::arg_names::_1;
192 BOOST_TEST(lex::tokenize(first, input.end(), lexer
193 , phoenix::push_back(phoenix::ref(tokens), _1)));
194
195 int i[] = { 8, 4, -1 };
196 BOOST_TEST(test_indents(i, lexer.indents));
197
198 token_data d[] = {
199 { ID_INDENT, 1 }, { ID_INDENT, 1 }
200 , { -1, 0 } };
201 BOOST_TEST(test_tokens(d, tokens));
202 }
203
204 // test one dedent
205 {
206 set_token_value<lexer_type> lexer;
207 std::vector<token_type> tokens;
208 std::string input(
209 " \n"
210 " \n"
211 " \n");
212 base_iterator_type first = input.begin();
213
214 using phoenix::arg_names::_1;
215 BOOST_TEST(lex::tokenize(first, input.end(), lexer
216 , phoenix::push_back(phoenix::ref(tokens), _1)));
217
218 int i[] = { 4, -1 };
219 BOOST_TEST(test_indents(i, lexer.indents));
220
221 token_data d[] = {
222 { ID_INDENT, 1 }, { ID_INDENT, 1 }
223 , { ID_DEDENT, 1 }
224 , { -1, 0 } };
225 BOOST_TEST(test_tokens(d, tokens));
226 }
227
228 // test two dedents
229 {
230 set_token_value<lexer_type> lexer;
231 std::vector<token_type> tokens;
232 std::string input(
233 " \n"
234 " \n"
235 " \n"
236 " \n");
237 base_iterator_type first = input.begin();
238
239 using phoenix::arg_names::_1;
240 BOOST_TEST(lex::tokenize(first, input.end(), lexer
241 , phoenix::push_back(phoenix::ref(tokens), _1)));
242
243 int i[] = { 4, -1 };
244 BOOST_TEST(test_indents(i, lexer.indents));
245
246 token_data d[] = {
247 { ID_INDENT, 1 }, { ID_INDENT, 1 }, { ID_INDENT, 1 }
248 , { ID_DEDENT, 2 }
249 , { -1, 0 } };
250 BOOST_TEST(test_tokens(d, tokens));
251 }
252
253 return boost::report_errors();
254 }
255
256