1 /*=============================================================================
2 Boost.Wave: A Standard compliant C++ preprocessor library
3
4 Sample: Re2C based IDL lexer
5
6 http://www.boost.org/
7
8 Copyright (c) 2001-2010 Hartmut Kaiser. Distributed under the Boost
9 Software License, Version 1.0. (See accompanying file
10 LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
11 =============================================================================*/
12
13 #if !defined(BOOST_IDL_RE_HPP_BD62775D_1659_4684_872C_03C02543C9A5_INCLUDED)
14 #define BOOST_IDL_RE_HPP_BD62775D_1659_4684_872C_03C02543C9A5_INCLUDED
15
16 #include <cstdio>
17
18 #include <string>
19 #include <boost/config.hpp>
20
21 #if defined(BOOST_HAS_UNISTD_H)
22 #include <unistd.h>
23 #else
24 #include <io.h>
25 #endif
26
27 #include <boost/assert.hpp>
28 #include <boost/detail/workaround.hpp>
29
30 // reuse the token ids and re2c helper functions from the default C++ lexer
31 #include <boost/wave/token_ids.hpp>
32 #include <boost/wave/cpplexer/re2clex/aq.hpp>
33 #include <boost/wave/cpplexer/re2clex/scanner.hpp>
34 #include <boost/wave/cpplexer/cpplexer_exceptions.hpp>
35
36 #define BOOST_WAVE_BSIZE 196608
37
38 #define RE2C_ASSERT BOOST_ASSERT
39
40 #if defined(_MSC_VER) && !defined(__COMO__)
41 #pragma warning (disable: 4101) // 'foo' : unreferenced local variable
42 #pragma warning (disable: 4102) // 'foo' : unreferenced label
43 #endif
44
45 #define YYCTYPE uchar
46 #define YYCURSOR cursor
47 #define YYLIMIT s->lim
48 #define YYMARKER s->ptr
49 #define YYFILL(n) {cursor = fill(s, cursor);}
50
51 //#define BOOST_WAVE_RET(i) {s->cur = cursor; return (i);}
52 #define BOOST_WAVE_RET(i) \
53 { \
54 s->line += count_backslash_newlines(s, cursor); \
55 s->cur = cursor; \
56 return (i); \
57 } \
58 /**/
59
60
61
62 ///////////////////////////////////////////////////////////////////////////////
63 namespace boost {
64 namespace wave {
65 namespace idllexer {
66 namespace re2clex {
67
68 template<typename Iterator>
69 int
get_one_char(boost::wave::cpplexer::re2clex::Scanner<Iterator> * s)70 get_one_char(boost::wave::cpplexer::re2clex::Scanner<Iterator> *s)
71 {
72 using namespace boost::wave::cpplexer::re2clex;
73 RE2C_ASSERT(s->first <= s->act && s->act <= s->last);
74 if (s->act < s->last)
75 return *(s->act)++;
76 return -1;
77 }
78
79 template<typename Iterator>
80 std::ptrdiff_t
rewind_stream(boost::wave::cpplexer::re2clex::Scanner<Iterator> * s,int cnt)81 rewind_stream (boost::wave::cpplexer::re2clex::Scanner<Iterator> *s, int cnt)
82 {
83 s->act += cnt;
84 RE2C_ASSERT(s->first <= s->act && s->act <= s->last);
85 return s->act - s->first;
86 }
87
88 template<typename Iterator>
89 std::size_t
get_first_eol_offset(boost::wave::cpplexer::re2clex::Scanner<Iterator> * s)90 get_first_eol_offset(boost::wave::cpplexer::re2clex::Scanner<Iterator>* s)
91 {
92 if (!AQ_EMPTY(s->eol_offsets))
93 {
94 return s->eol_offsets->queue[s->eol_offsets->head];
95 }
96 else
97 {
98 return (unsigned int)-1;
99 }
100 }
101
102 template<typename Iterator>
103 void
adjust_eol_offsets(boost::wave::cpplexer::re2clex::Scanner<Iterator> * s,std::size_t adjustment)104 adjust_eol_offsets(boost::wave::cpplexer::re2clex::Scanner<Iterator>* s,
105 std::size_t adjustment)
106 {
107 boost::wave::cpplexer::re2clex::aq_queue q;
108 std::size_t i;
109
110 if (!s->eol_offsets)
111 s->eol_offsets = boost::wave::cpplexer::re2clex::aq_create();
112
113 q = s->eol_offsets;
114
115 if (AQ_EMPTY(q))
116 return;
117
118 i = q->head;
119 while (i != q->tail)
120 {
121 if (adjustment > q->queue[i])
122 q->queue[i] = 0;
123 else
124 q->queue[i] -= adjustment;
125 ++i;
126 if (i == q->max_size)
127 i = 0;
128 }
129 if (adjustment > q->queue[i])
130 q->queue[i] = 0;
131 else
132 q->queue[i] -= adjustment;
133 }
134
135 template<typename Iterator>
136 int
count_backslash_newlines(boost::wave::cpplexer::re2clex::Scanner<Iterator> * s,boost::wave::cpplexer::re2clex::uchar * cursor)137 count_backslash_newlines(boost::wave::cpplexer::re2clex::Scanner<Iterator> *s,
138 boost::wave::cpplexer::re2clex::uchar *cursor)
139 {
140 using namespace boost::wave::cpplexer::re2clex;
141
142 std::size_t diff, offset;
143 int skipped = 0;
144
145 /* figure out how many backslash-newlines skipped over unknowingly. */
146 diff = cursor - s->bot;
147 offset = get_first_eol_offset(s);
148 while (offset <= diff && offset != (unsigned int)-1)
149 {
150 skipped++;
151 boost::wave::cpplexer::re2clex::aq_pop(s->eol_offsets);
152 offset = get_first_eol_offset(s);
153 }
154 return skipped;
155 }
156
157 bool
158 is_backslash(
159 boost::wave::cpplexer::re2clex::uchar *p,
160 boost::wave::cpplexer::re2clex::uchar *end, int &len);
161
162 template<typename Iterator>
163 boost::wave::cpplexer::re2clex::uchar *
fill(boost::wave::cpplexer::re2clex::Scanner<Iterator> * s,boost::wave::cpplexer::re2clex::uchar * cursor)164 fill(boost::wave::cpplexer::re2clex::Scanner<Iterator> *s,
165 boost::wave::cpplexer::re2clex::uchar *cursor)
166 {
167 using namespace std; // some systems have memcpy etc. in namespace std
168 using namespace boost::wave::cpplexer::re2clex;
169
170 if(!s->eof)
171 {
172 uchar* p;
173 std::ptrdiff_t cnt = s->tok - s->bot;
174 if(cnt)
175 {
176 memcpy(s->bot, s->tok, s->lim - s->tok);
177 s->tok = s->bot;
178 s->ptr -= cnt;
179 cursor -= cnt;
180 s->lim -= cnt;
181 adjust_eol_offsets(s, cnt);
182 }
183
184 if((s->top - s->lim) < BOOST_WAVE_BSIZE)
185 {
186 uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BOOST_WAVE_BSIZE)*sizeof(uchar));
187 if (buf == 0)
188 {
189 using namespace std; // some systems have printf in std
190 if (0 != s->error_proc) {
191 (*s->error_proc)(s,
192 cpplexer::lexing_exception::unexpected_error,
193 "Out of memory!");
194 }
195 else
196 printf("Out of memory!\n");
197
198 /* get the scanner to stop */
199 *cursor = 0;
200 return cursor;
201 }
202
203 memcpy(buf, s->tok, s->lim - s->tok);
204 s->tok = buf;
205 s->ptr = &buf[s->ptr - s->bot];
206 cursor = &buf[cursor - s->bot];
207 s->lim = &buf[s->lim - s->bot];
208 s->top = &s->lim[BOOST_WAVE_BSIZE];
209 free(s->bot);
210 s->bot = buf;
211 }
212
213 cnt = std::distance(s->act, s->last);
214 if (cnt > BOOST_WAVE_BSIZE)
215 cnt = BOOST_WAVE_BSIZE;
216 uchar * dst = s->lim;
217 for (std::ptrdiff_t idx = 0; idx < cnt; ++idx)
218 {
219 *dst++ = *s->act++;
220 }
221 if (cnt != BOOST_WAVE_BSIZE) {
222 s->eof = &s->lim[cnt];
223 *(s->eof)++ = '\0';
224 }
225
226 /* backslash-newline erasing time */
227
228 /* first scan for backslash-newline and erase them */
229 for (p = s->lim; p < s->lim + cnt - 2; ++p)
230 {
231 int len = 0;
232 if (is_backslash(p, s->lim + cnt, len))
233 {
234 if (*(p+len) == '\n')
235 {
236 int offset = len + 1;
237 memmove(p, p + offset, s->lim + cnt - p - offset);
238 cnt -= offset;
239 --p;
240 aq_enqueue(s->eol_offsets, p - s->bot + 1);
241 }
242 else if (*(p+len) == '\r')
243 {
244 if (*(p+len+1) == '\n')
245 {
246 int offset = len + 2;
247 memmove(p, p + offset, s->lim + cnt - p - offset);
248 cnt -= offset;
249 --p;
250 }
251 else
252 {
253 int offset = len + 1;
254 memmove(p, p + offset, s->lim + cnt - p - offset);
255 cnt -= offset;
256 --p;
257 }
258 aq_enqueue(s->eol_offsets, p - s->bot + 1);
259 }
260 }
261 }
262
263 /* FIXME: the following code should be fixed to recognize correctly the
264 trigraph backslash token */
265
266 /* check to see if what we just read ends in a backslash */
267 if (cnt >= 2)
268 {
269 uchar last = s->lim[cnt-1];
270 uchar last2 = s->lim[cnt-2];
271 /* check \ EOB */
272 if (last == '\\')
273 {
274 int next = get_one_char(s);
275 /* check for \ \n or \ \r or \ \r \n straddling the border */
276 if (next == '\n')
277 {
278 --cnt; /* chop the final \, we've already read the \n. */
279 boost::wave::cpplexer::re2clex::aq_enqueue(s->eol_offsets,
280 cnt + (s->lim - s->bot));
281 }
282 else if (next == '\r')
283 {
284 int next2 = get_one_char(s);
285 if (next2 == '\n')
286 {
287 --cnt; /* skip the backslash */
288 }
289 else
290 {
291 /* rewind one, and skip one char */
292 rewind_stream(s, -1);
293 --cnt;
294 }
295 boost::wave::cpplexer::re2clex::aq_enqueue(s->eol_offsets,
296 cnt + (s->lim - s->bot));
297 }
298 else if (next != -1) /* -1 means end of file */
299 {
300 /* next was something else, so rewind the stream */
301 rewind_stream(s, -1);
302 }
303 }
304 /* check \ \r EOB */
305 else if (last == '\r' && last2 == '\\')
306 {
307 int next = get_one_char(s);
308 if (next == '\n')
309 {
310 cnt -= 2; /* skip the \ \r */
311 }
312 else
313 {
314 /* rewind one, and skip two chars */
315 rewind_stream(s, -1);
316 cnt -= 2;
317 }
318 boost::wave::cpplexer::re2clex::aq_enqueue(s->eol_offsets,
319 cnt + (s->lim - s->bot));
320 }
321 /* check \ \n EOB */
322 else if (last == '\n' && last2 == '\\')
323 {
324 cnt -= 2;
325 boost::wave::cpplexer::re2clex::aq_enqueue(s->eol_offsets,
326 cnt + (s->lim - s->bot));
327 }
328 }
329
330 s->lim += cnt;
331 if (s->eof) /* eof needs adjusting if we erased backslash-newlines */
332 {
333 s->eof = s->lim;
334 *(s->eof)++ = '\0';
335 }
336 }
337 return cursor;
338 }
339
340 ///////////////////////////////////////////////////////////////////////////////
341 // The scanner function to call whenever a new token is requested
342 template<typename Iterator>
scan(boost::wave::cpplexer::re2clex::Scanner<Iterator> * s)343 BOOST_WAVE_DECL boost::wave::token_id scan(
344 boost::wave::cpplexer::re2clex::Scanner<Iterator> *s)
345 {
346
347 using namespace boost::wave::cpplexer::re2clex;
348
349 uchar *cursor = s->tok = s->cur;
350
351 #include "idl.inc"
352
353
354 } /* end of scan */
355
356 ///////////////////////////////////////////////////////////////////////////////
357 } // namespace re2clex
358 } // namespace idllexer
359 } // namespace wave
360 } // namespace boost
361
362 #undef RE2C_ASSERT
363
364 #endif // !defined(BOOST_IDL_RE_HPP_BD62775D_1659_4684_872C_03C02543C9A5_INCLUDED)
365