• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*=============================================================================
2     Boost.Wave: A Standard compliant C++ preprocessor library
3 
4     Sample: Re2C based IDL lexer
5 
6     http://www.boost.org/
7 
8     Copyright (c) 2001-2010 Hartmut Kaiser. Distributed under the Boost
9     Software License, Version 1.0. (See accompanying file
10     LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
11 =============================================================================*/
12 
13 #if !defined(BOOST_IDL_RE_HPP_BD62775D_1659_4684_872C_03C02543C9A5_INCLUDED)
14 #define BOOST_IDL_RE_HPP_BD62775D_1659_4684_872C_03C02543C9A5_INCLUDED
15 
16 #include <cstdio>
17 
18 #include <string>
19 #include <boost/config.hpp>
20 
21 #if defined(BOOST_HAS_UNISTD_H)
22 #include <unistd.h>
23 #else
24 #include <io.h>
25 #endif
26 
27 #include <boost/assert.hpp>
28 #include <boost/detail/workaround.hpp>
29 
30 // reuse the token ids and re2c helper functions from the default C++ lexer
31 #include <boost/wave/token_ids.hpp>
32 #include <boost/wave/cpplexer/re2clex/aq.hpp>
33 #include <boost/wave/cpplexer/re2clex/scanner.hpp>
34 #include <boost/wave/cpplexer/cpplexer_exceptions.hpp>
35 
36 #define BOOST_WAVE_BSIZE     196608
37 
38 #define RE2C_ASSERT BOOST_ASSERT
39 
40 #if defined(_MSC_VER) && !defined(__COMO__)
41 #pragma warning (disable: 4101)     // 'foo' : unreferenced local variable
42 #pragma warning (disable: 4102)     // 'foo' : unreferenced label
43 #endif
44 
45 #define YYCTYPE   uchar
46 #define YYCURSOR  cursor
47 #define YYLIMIT   s->lim
48 #define YYMARKER  s->ptr
49 #define YYFILL(n) {cursor = fill(s, cursor);}
50 
51 //#define BOOST_WAVE_RET(i)    {s->cur = cursor; return (i);}
52 #define BOOST_WAVE_RET(i)    \
53     { \
54         s->line += count_backslash_newlines(s, cursor); \
55         s->cur = cursor; \
56         return (i); \
57     } \
58     /**/
59 
60 
61 
62 ///////////////////////////////////////////////////////////////////////////////
63 namespace boost {
64 namespace wave {
65 namespace idllexer {
66 namespace re2clex {
67 
68 template<typename Iterator>
69 int
get_one_char(boost::wave::cpplexer::re2clex::Scanner<Iterator> * s)70 get_one_char(boost::wave::cpplexer::re2clex::Scanner<Iterator> *s)
71 {
72     using namespace boost::wave::cpplexer::re2clex;
73     RE2C_ASSERT(s->first <= s->act && s->act <= s->last);
74     if (s->act < s->last)
75         return *(s->act)++;
76     return -1;
77 }
78 
79 template<typename Iterator>
80 std::ptrdiff_t
rewind_stream(boost::wave::cpplexer::re2clex::Scanner<Iterator> * s,int cnt)81 rewind_stream (boost::wave::cpplexer::re2clex::Scanner<Iterator> *s, int cnt)
82 {
83     s->act += cnt;
84     RE2C_ASSERT(s->first <= s->act && s->act <= s->last);
85     return s->act - s->first;
86 }
87 
88 template<typename Iterator>
89 std::size_t
get_first_eol_offset(boost::wave::cpplexer::re2clex::Scanner<Iterator> * s)90 get_first_eol_offset(boost::wave::cpplexer::re2clex::Scanner<Iterator>* s)
91 {
92     if (!AQ_EMPTY(s->eol_offsets))
93     {
94         return s->eol_offsets->queue[s->eol_offsets->head];
95     }
96     else
97     {
98         return (unsigned int)-1;
99     }
100 }
101 
102 template<typename Iterator>
103 void
adjust_eol_offsets(boost::wave::cpplexer::re2clex::Scanner<Iterator> * s,std::size_t adjustment)104 adjust_eol_offsets(boost::wave::cpplexer::re2clex::Scanner<Iterator>* s,
105     std::size_t adjustment)
106 {
107     boost::wave::cpplexer::re2clex::aq_queue q;
108     std::size_t i;
109 
110     if (!s->eol_offsets)
111         s->eol_offsets = boost::wave::cpplexer::re2clex::aq_create();
112 
113     q = s->eol_offsets;
114 
115     if (AQ_EMPTY(q))
116         return;
117 
118     i = q->head;
119     while (i != q->tail)
120     {
121         if (adjustment > q->queue[i])
122             q->queue[i] = 0;
123         else
124             q->queue[i] -= adjustment;
125         ++i;
126         if (i == q->max_size)
127             i = 0;
128     }
129     if (adjustment > q->queue[i])
130         q->queue[i] = 0;
131     else
132         q->queue[i] -= adjustment;
133 }
134 
135 template<typename Iterator>
136 int
count_backslash_newlines(boost::wave::cpplexer::re2clex::Scanner<Iterator> * s,boost::wave::cpplexer::re2clex::uchar * cursor)137 count_backslash_newlines(boost::wave::cpplexer::re2clex::Scanner<Iterator> *s,
138     boost::wave::cpplexer::re2clex::uchar *cursor)
139 {
140     using namespace boost::wave::cpplexer::re2clex;
141 
142     std::size_t diff, offset;
143     int skipped = 0;
144 
145     /* figure out how many backslash-newlines skipped over unknowingly. */
146     diff = cursor - s->bot;
147     offset = get_first_eol_offset(s);
148     while (offset <= diff && offset != (unsigned int)-1)
149     {
150         skipped++;
151         boost::wave::cpplexer::re2clex::aq_pop(s->eol_offsets);
152         offset = get_first_eol_offset(s);
153     }
154     return skipped;
155 }
156 
157 bool
158 is_backslash(
159     boost::wave::cpplexer::re2clex::uchar *p,
160     boost::wave::cpplexer::re2clex::uchar *end, int &len);
161 
162 template<typename Iterator>
163 boost::wave::cpplexer::re2clex::uchar *
fill(boost::wave::cpplexer::re2clex::Scanner<Iterator> * s,boost::wave::cpplexer::re2clex::uchar * cursor)164 fill(boost::wave::cpplexer::re2clex::Scanner<Iterator> *s,
165     boost::wave::cpplexer::re2clex::uchar *cursor)
166 {
167     using namespace std;    // some systems have memcpy etc. in namespace std
168     using namespace boost::wave::cpplexer::re2clex;
169 
170     if(!s->eof)
171     {
172         uchar* p;
173         std::ptrdiff_t cnt = s->tok - s->bot;
174         if(cnt)
175         {
176             memcpy(s->bot, s->tok, s->lim - s->tok);
177             s->tok = s->bot;
178             s->ptr -= cnt;
179             cursor -= cnt;
180             s->lim -= cnt;
181             adjust_eol_offsets(s, cnt);
182         }
183 
184         if((s->top - s->lim) < BOOST_WAVE_BSIZE)
185         {
186             uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BOOST_WAVE_BSIZE)*sizeof(uchar));
187             if (buf == 0)
188             {
189                 using namespace std;      // some systems have printf in std
190                 if (0 != s->error_proc) {
191                     (*s->error_proc)(s,
192                         cpplexer::lexing_exception::unexpected_error,
193                         "Out of memory!");
194                 }
195                 else
196                     printf("Out of memory!\n");
197 
198                 /* get the scanner to stop */
199                 *cursor = 0;
200                 return cursor;
201             }
202 
203             memcpy(buf, s->tok, s->lim - s->tok);
204             s->tok = buf;
205             s->ptr = &buf[s->ptr - s->bot];
206             cursor = &buf[cursor - s->bot];
207             s->lim = &buf[s->lim - s->bot];
208             s->top = &s->lim[BOOST_WAVE_BSIZE];
209             free(s->bot);
210             s->bot = buf;
211         }
212 
213         cnt = std::distance(s->act, s->last);
214         if (cnt > BOOST_WAVE_BSIZE)
215             cnt = BOOST_WAVE_BSIZE;
216         uchar * dst = s->lim;
217         for (std::ptrdiff_t idx = 0; idx < cnt; ++idx)
218         {
219             *dst++ = *s->act++;
220         }
221         if (cnt != BOOST_WAVE_BSIZE) {
222             s->eof = &s->lim[cnt];
223             *(s->eof)++ = '\0';
224         }
225 
226         /* backslash-newline erasing time */
227 
228         /* first scan for backslash-newline and erase them */
229         for (p = s->lim; p < s->lim + cnt - 2; ++p)
230         {
231             int len = 0;
232             if (is_backslash(p, s->lim + cnt, len))
233             {
234                 if (*(p+len) == '\n')
235                 {
236                     int offset = len + 1;
237                     memmove(p, p + offset, s->lim + cnt - p - offset);
238                     cnt -= offset;
239                     --p;
240                     aq_enqueue(s->eol_offsets, p - s->bot + 1);
241                 }
242                 else if (*(p+len) == '\r')
243                 {
244                     if (*(p+len+1) == '\n')
245                     {
246                         int offset = len + 2;
247                         memmove(p, p + offset, s->lim + cnt - p - offset);
248                         cnt -= offset;
249                         --p;
250                     }
251                     else
252                     {
253                         int offset = len + 1;
254                         memmove(p, p + offset, s->lim + cnt - p - offset);
255                         cnt -= offset;
256                         --p;
257                     }
258                     aq_enqueue(s->eol_offsets, p - s->bot + 1);
259                 }
260             }
261         }
262 
263         /* FIXME: the following code should be fixed to recognize correctly the
264                   trigraph backslash token */
265 
266         /* check to see if what we just read ends in a backslash */
267         if (cnt >= 2)
268         {
269             uchar last = s->lim[cnt-1];
270             uchar last2 = s->lim[cnt-2];
271             /* check \ EOB */
272             if (last == '\\')
273             {
274                 int next = get_one_char(s);
275                 /* check for \ \n or \ \r or \ \r \n straddling the border */
276                 if (next == '\n')
277                 {
278                     --cnt; /* chop the final \, we've already read the \n. */
279                     boost::wave::cpplexer::re2clex::aq_enqueue(s->eol_offsets,
280                         cnt + (s->lim - s->bot));
281                 }
282                 else if (next == '\r')
283                 {
284                     int next2 = get_one_char(s);
285                     if (next2 == '\n')
286                     {
287                         --cnt; /* skip the backslash */
288                     }
289                     else
290                     {
291                         /* rewind one, and skip one char */
292                         rewind_stream(s, -1);
293                         --cnt;
294                     }
295                     boost::wave::cpplexer::re2clex::aq_enqueue(s->eol_offsets,
296                         cnt + (s->lim - s->bot));
297                 }
298                 else if (next != -1) /* -1 means end of file */
299                 {
300                     /* next was something else, so rewind the stream */
301                     rewind_stream(s, -1);
302                 }
303             }
304             /* check \ \r EOB */
305             else if (last == '\r' && last2 == '\\')
306             {
307                 int next = get_one_char(s);
308                 if (next == '\n')
309                 {
310                     cnt -= 2; /* skip the \ \r */
311                 }
312                 else
313                 {
314                     /* rewind one, and skip two chars */
315                     rewind_stream(s, -1);
316                     cnt -= 2;
317                 }
318                 boost::wave::cpplexer::re2clex::aq_enqueue(s->eol_offsets,
319                     cnt + (s->lim - s->bot));
320             }
321             /* check \ \n EOB */
322             else if (last == '\n' && last2 == '\\')
323             {
324                 cnt -= 2;
325                 boost::wave::cpplexer::re2clex::aq_enqueue(s->eol_offsets,
326                     cnt + (s->lim - s->bot));
327             }
328         }
329 
330         s->lim += cnt;
331         if (s->eof) /* eof needs adjusting if we erased backslash-newlines */
332         {
333             s->eof = s->lim;
334             *(s->eof)++ = '\0';
335         }
336     }
337     return cursor;
338 }
339 
340 ///////////////////////////////////////////////////////////////////////////////
341 //  The scanner function to call whenever a new token is requested
342 template<typename Iterator>
scan(boost::wave::cpplexer::re2clex::Scanner<Iterator> * s)343 BOOST_WAVE_DECL boost::wave::token_id scan(
344     boost::wave::cpplexer::re2clex::Scanner<Iterator> *s)
345 {
346 
347     using namespace boost::wave::cpplexer::re2clex;
348 
349     uchar *cursor = s->tok = s->cur;
350 
351 #include "idl.inc"
352 
353 
354 } /* end of scan */
355 
356 ///////////////////////////////////////////////////////////////////////////////
357 }   // namespace re2clex
358 }   // namespace idllexer
359 }   // namespace wave
360 }   // namespace boost
361 
362 #undef RE2C_ASSERT
363 
364 #endif // !defined(BOOST_IDL_RE_HPP_BD62775D_1659_4684_872C_03C02543C9A5_INCLUDED)
365