• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*=============================================================================
2     Boost.Wave: A Standard compliant C++ preprocessor library
3 
4     Spirit based lexer
5 
6     http://www.boost.org/
7 
8     Copyright (c) 2001, Daniel C. Nuffer.
9     Copyright (c) 2001-2012 Hartmut Kaiser.
10     Distributed under the Boost Software License, Version 1.0. (See accompanying
11     file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
12 
13     TODO List:
14         X callback objects (called when a match is made.)
15         X    callback passed first & last iterator, and
16                 a reference to a lexercontrol object that supports some
17                 operations on the lexer.
18                     set state
19                     terminate
20                     state stack (push, pop, top)
21                     set new token return value
22                     ignore the current token
23                     yymore
24                     get length of matched token
25                     get current lexer state
26         X DFA serialization to save recomputing the DFA.
27 
28         lexer states.
29         organize the file into hpp and ipp. arrange the classes in a logical order.
30         use buffering - iterator only needs be an input iterator,
31            lexer & callback are not templatized on iterator type, but char type.
32            next_token is templatized on iterator type.
33         beginning/ending contexts.
34         ^ and $
35         DFA minimization.
36         DFA table compression.
37 
38 =============================================================================*/
39 #ifndef BOOST_SPIRIT_LEXER_HPP
40 #define BOOST_SPIRIT_LEXER_HPP
41 
42 ///////////////////////////////////////////////////////////////////////////////
43 #include <boost/config.hpp>
44 #include <boost/throw_exception.hpp>
45 
46 #include <boost/spirit/include/classic_core.hpp>
47 #include <boost/spirit/include/classic_symbols.hpp>
48 #include <boost/spirit/include/classic_chset.hpp>
49 #include <boost/spirit/include/classic_escape_char.hpp>
50 
51 #include <set>
52 #include <map>
53 #include <memory> // for auto_ptr/unique_ptr
54 #include <vector>
55 #include <stack>
56 #include <utility> // for pair
57 #if defined(BOOST_SPIRIT_DEBUG)
58 #include <iostream>
59 #endif
60 #include <fstream>
61 #include <boost/assert.hpp>
62 #include <boost/limits.hpp>
63 
64 #if defined(BOOST_NO_STD_ITERATOR_TRAITS)
65 #define BOOST_SPIRIT_IT_NS impl
66 #else
67 #define BOOST_SPIRIT_IT_NS std
68 #endif
69 
70 ///////////////////////////////////////////////////////////////////////////////
71 namespace boost {
72 namespace spirit {
73 namespace classic {
74 
75 typedef unsigned char uchar;
76 typedef unsigned int node_id_t;
77 const node_id_t invalid_node = node_id_t(-1);
78 typedef std::set<node_id_t> node_set;
79 typedef std::vector<uchar> uchar_vector;
80 typedef std::map<node_id_t, node_set> followpos_t;
81 typedef std::vector<uchar_vector> state_match_t;
82 
83 template <typename TokenT>
84 class lexer_control;
85 
86 class bad_regex : public std::exception
87 {
88 };
89 
90 namespace lexerimpl
91 {
92 
93 class node
94 {
95 public:
96 
~node()97     virtual ~node() {}
98 
99     virtual node* clone() const = 0;
100     virtual bool nullable() const = 0;
101     virtual node_set firstpos() const = 0;
102     virtual node_set lastpos() const = 0;
103     virtual void compute_followpos(followpos_t& followpos) const = 0;
104     virtual void compute_state_match(state_match_t& state_match) const = 0;
105     virtual void get_eof_ids(node_set& eof_set) const = 0;
106     virtual void assign_node_ids(node_id_t& node_count) = 0;
107 #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
108     virtual void dump(std::ostream& out) const = 0;
109 #endif
110 
111 };
112 
113 class char_node : public node
114 {
115 
116 public:
117 
118     char_node(const uchar c);
119     char_node(const char_node& x);
~char_node()120     virtual ~char_node(){}
121 
122     node* clone() const BOOST_OVERRIDE;
123     bool nullable() const BOOST_OVERRIDE;
124     node_set firstpos() const BOOST_OVERRIDE;
125     node_set lastpos() const BOOST_OVERRIDE;
126     void compute_followpos(followpos_t& followpos) const BOOST_OVERRIDE;
127     void compute_state_match(state_match_t& state_match ) const BOOST_OVERRIDE;
128     void get_eof_ids(node_set& eof_set) const BOOST_OVERRIDE;
129     void assign_node_ids(node_id_t& node_count) BOOST_OVERRIDE;
130 #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
131     void dump(std::ostream& out) const BOOST_OVERRIDE;
132 #endif
133 
134 private:
135 
136     uchar m_char;
137     node_id_t m_node_num;
138 };
139 
140 inline
char_node(const uchar c)141 char_node::char_node(const uchar c)
142     : node()
143     , m_char(c)
144     , m_node_num(0)
145 {
146 }
147 
148 inline
char_node(const char_node & x)149 char_node::char_node(const char_node& x)
150     : node(x)
151     , m_char(x.m_char)
152     , m_node_num(x.m_node_num)
153 {
154 }
155 
156 inline node *
clone() const157 char_node::clone() const
158 {
159     return new char_node(*this);
160 }
161 
162 inline bool
nullable() const163 char_node::nullable() const
164 {
165     return false;
166 }
167 
168 inline node_set
firstpos() const169 char_node::firstpos() const
170 {
171     node_set rval;
172     rval.insert(m_node_num);
173     return rval;
174 }
175 
176 inline node_set
lastpos() const177 char_node::lastpos() const
178 {
179     return firstpos();
180 }
181 
182 inline void
compute_followpos(followpos_t &) const183 char_node::compute_followpos(followpos_t&) const
184 {
185     return;
186 }
187 
188 inline void
compute_state_match(state_match_t & state_match) const189 char_node::compute_state_match(state_match_t& state_match) const
190 {
191     if (state_match.size() < m_node_num + 1)
192         state_match.resize(m_node_num + 1);
193     state_match[m_node_num].resize(256);
194     state_match[m_node_num][m_char] = 1;
195 }
196 
197 inline void
get_eof_ids(node_set &) const198 char_node::get_eof_ids(node_set&) const
199 {
200     return;
201 }
202 
203 inline void
assign_node_ids(node_id_t & node_count)204 char_node::assign_node_ids(node_id_t& node_count)
205 {
206     m_node_num = node_count++;
207 }
208 
209 #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
210 inline void
dump(std::ostream & out) const211 char_node::dump(std::ostream& out) const
212 {
213     out << "\nchar_node m_char = " << m_char;
214     out << " m_node_num = " << m_node_num;
215     out << " nullable() = " << (nullable() ? "true" : "false");
216     out << " firstpos() = ";
217     node_set fp = firstpos();
218     std::copy(fp.begin(), fp.end(),
219             std::ostream_iterator<node_id_t>(out, ","));
220 
221     out << " lastpos() = ";
222     node_set lp = lastpos();
223     std::copy(lp.begin(), lp.end(),
224             std::ostream_iterator<node_id_t>(out, ","));
225 }
226 #endif
227 
228 
229 class epsilon_node : public node
230 {
231 
232 public:
233 
234     epsilon_node();
235     epsilon_node(const epsilon_node& x);
~epsilon_node()236     virtual ~epsilon_node(){}
237 
238     node* clone() const BOOST_OVERRIDE;
239     bool nullable() const BOOST_OVERRIDE;
240     node_set firstpos() const BOOST_OVERRIDE;
241     node_set lastpos() const BOOST_OVERRIDE;
242     void compute_followpos(followpos_t& followpos) const BOOST_OVERRIDE;
243     void compute_state_match(state_match_t& state_match ) const BOOST_OVERRIDE;
244     void get_eof_ids(node_set& eof_set) const BOOST_OVERRIDE;
245     void assign_node_ids(node_id_t& node_count) BOOST_OVERRIDE;
246 #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
247     void dump(std::ostream& out) const BOOST_OVERRIDE;
248 #endif
249 
250 private:
251 
252     node_id_t m_node_num;
253 };
254 
255 inline
epsilon_node()256 epsilon_node::epsilon_node()
257     : node()
258     , m_node_num(0)
259 {
260 }
261 
262 inline
epsilon_node(const epsilon_node & x)263 epsilon_node::epsilon_node(const epsilon_node& x)
264     : node(x)
265     , m_node_num(x.m_node_num)
266 {
267 }
268 
269 inline node *
clone() const270 epsilon_node::clone() const
271 {
272     return new epsilon_node(*this);
273 }
274 
275 inline bool
nullable() const276 epsilon_node::nullable() const
277 {
278     return true;
279 }
280 
281 inline node_set
firstpos() const282 epsilon_node::firstpos() const
283 {
284     return node_set();
285 }
286 
287 inline node_set
lastpos() const288 epsilon_node::lastpos() const
289 {
290     return node_set();
291 }
292 
293 inline void
compute_followpos(followpos_t &) const294 epsilon_node::compute_followpos(followpos_t&) const
295 {
296     return;
297 }
298 
299 inline void
compute_state_match(state_match_t & state_match) const300 epsilon_node::compute_state_match(state_match_t& state_match) const
301 {
302     if (state_match.size() < m_node_num + 1)
303         state_match.resize(m_node_num + 1);
304     state_match[m_node_num].resize(256, 1);
305 }
306 
307 inline void
get_eof_ids(node_set &) const308 epsilon_node::get_eof_ids(node_set&) const
309 {
310     return;
311 }
312 
313 inline void
assign_node_ids(node_id_t & node_count)314 epsilon_node::assign_node_ids(node_id_t& node_count)
315 {
316     m_node_num = node_count++;
317 }
318 
319 #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
320 inline void
dump(std::ostream & out) const321 epsilon_node::dump(std::ostream& out) const
322 {
323     out << "\nepsilon_node";
324     out << " m_node_num = " << m_node_num;
325     out << " nullable() = " << (nullable() ? "true" : "false");
326     out << " firstpos() = ";
327     node_set fp = firstpos();
328     std::copy(fp.begin(), fp.end(),
329             std::ostream_iterator<node_id_t>(out, ","));
330 
331     out << " lastpos() = ";
332     node_set lp = lastpos();
333     std::copy(lp.begin(), lp.end(),
334             std::ostream_iterator<node_id_t>(out, ","));
335 }
336 #endif
337 
338 
339 class or_node : public node
340 {
341 
342 public:
343 
344     or_node(node* left, node* right);
345     or_node(const or_node& x);
~or_node()346     virtual ~or_node(){}
347 
348     node* clone() const BOOST_OVERRIDE;
349     bool nullable() const BOOST_OVERRIDE;
350     node_set firstpos() const BOOST_OVERRIDE;
351     node_set lastpos() const BOOST_OVERRIDE;
352     void compute_followpos(followpos_t& followpos) const BOOST_OVERRIDE;
353     void compute_state_match(state_match_t& state_match ) const BOOST_OVERRIDE;
354     void get_eof_ids(node_set& eof_set) const BOOST_OVERRIDE;
355     void assign_node_ids(node_id_t& node_count) BOOST_OVERRIDE;
356 #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
357     void dump(std::ostream& out) const BOOST_OVERRIDE;
358 #endif
359 
360 private:
361 
362 #ifndef BOOST_NO_CXX11_SMART_PTR
363     std::unique_ptr<node> m_left;
364     std::unique_ptr<node> m_right;
365 #else
366     std::auto_ptr<node> m_left;
367     std::auto_ptr<node> m_right;
368 #endif
369 };
370 
371 inline
or_node(node * left,node * right)372 or_node::or_node(node* left, node* right)
373     : node()
374     , m_left(left)
375     , m_right(right)
376 {
377 }
378 
379 inline
or_node(const or_node & x)380 or_node::or_node(const or_node& x)
381     : node(x)
382     , m_left(x.m_left->clone())
383     , m_right(x.m_right->clone())
384 {
385 }
386 
387 inline node *
clone() const388 or_node::clone() const
389 {
390     return new or_node(m_left->clone(), m_right->clone());
391 }
392 
393 inline bool
nullable() const394 or_node::nullable() const
395 {
396     return m_left->nullable() || m_right->nullable();
397 }
398 
399 inline node_set
firstpos() const400 or_node::firstpos() const
401 {
402     node_set rval;
403     node_set l = m_left->firstpos();
404     node_set r = m_right->firstpos();
405     std::set_union(l.begin(), l.end(), r.begin(), r.end(),
406             std::inserter(rval, rval.begin()));
407     return rval;
408 }
409 
410 inline node_set
lastpos() const411 or_node::lastpos() const
412 {
413     node_set rval;
414     node_set l = m_left->lastpos();
415     node_set r = m_right->lastpos();
416     std::set_union(l.begin(), l.end(), r.begin(), r.end(),
417             std::inserter(rval, rval.begin()));
418     return rval;
419 }
420 
421 inline void
compute_followpos(followpos_t & followpos) const422 or_node::compute_followpos(followpos_t& followpos) const
423 {
424     m_left->compute_followpos(followpos);
425     m_right->compute_followpos(followpos);
426 }
427 
428 inline void
compute_state_match(state_match_t & state_match) const429 or_node::compute_state_match(state_match_t& state_match) const
430 {
431     m_left->compute_state_match(state_match);
432     m_right->compute_state_match(state_match);
433 }
434 
435 inline void
get_eof_ids(node_set & eof_nodes) const436 or_node::get_eof_ids(node_set& eof_nodes) const
437 {
438     m_left->get_eof_ids(eof_nodes);
439     m_right->get_eof_ids(eof_nodes);
440 }
441 
442 inline void
assign_node_ids(node_id_t & node_count)443 or_node::assign_node_ids(node_id_t& node_count)
444 {
445     m_left->assign_node_ids(node_count);
446     m_right->assign_node_ids(node_count);
447 }
448 
449 #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
450 inline void
dump(std::ostream & out) const451 or_node::dump(std::ostream& out) const
452 {
453     m_left->dump(out);
454 
455     out << "\nor_node";
456     out << " nullable() = " << (nullable() ? "true" : "false");
457     out << " firstpos() = ";
458     node_set fp = firstpos();
459     std::copy(fp.begin(), fp.end(),
460             std::ostream_iterator<node_id_t>(out, ","));
461 
462     out << " lastpos() = ";
463     node_set lp = lastpos();
464     std::copy(lp.begin(), lp.end(),
465             std::ostream_iterator<node_id_t>(out, ","));
466 
467     m_right->dump(out);
468 }
469 #endif
470 
471 
472 class cat_node : public node
473 {
474 
475 public:
476 
477     cat_node(node* left, node* right);
478     cat_node(const cat_node& x);
~cat_node()479     virtual ~cat_node(){}
480 
481     node* clone() const BOOST_OVERRIDE;
482     bool nullable() const BOOST_OVERRIDE;
483     node_set firstpos() const BOOST_OVERRIDE;
484     node_set lastpos() const BOOST_OVERRIDE;
485     void compute_followpos(followpos_t& followpos) const BOOST_OVERRIDE;
486     void compute_state_match(state_match_t& state_match ) const BOOST_OVERRIDE;
487     void get_eof_ids(node_set& eof_set) const BOOST_OVERRIDE;
488     void assign_node_ids(node_id_t& node_count) BOOST_OVERRIDE;
489 #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
490     void dump(std::ostream& out) const BOOST_OVERRIDE;
491 #endif
492 
493 private:
494 
495 #ifndef BOOST_NO_CXX11_SMART_PTR
496     std::unique_ptr<node> m_left;
497     std::unique_ptr<node> m_right;
498 #else
499     std::auto_ptr<node> m_left;
500     std::auto_ptr<node> m_right;
501 #endif
502 };
503 
504 inline
cat_node(node * left,node * right)505 cat_node::cat_node(node* left, node* right)
506     : node()
507     , m_left(left)
508     , m_right(right)
509 {
510 }
511 
512 inline
cat_node(const cat_node & x)513 cat_node::cat_node(const cat_node& x)
514     : node(x)
515     , m_left(x.m_left->clone())
516     , m_right(x.m_right->clone())
517 {
518 }
519 
520 inline node *
clone() const521 cat_node::clone() const
522 {
523     return new cat_node(m_left->clone(), m_right->clone());
524 }
525 
526 inline bool
nullable() const527 cat_node::nullable() const
528 {
529     return m_left->nullable() && m_right->nullable();
530 }
531 
532 inline node_set
firstpos() const533 cat_node::firstpos() const
534 {
535     if (m_left->nullable())
536     {
537         node_set rval;
538         node_set l = m_left->firstpos();
539         node_set r = m_right->firstpos();
540         std::set_union(l.begin(), l.end(), r.begin(), r.end(),
541                 std::inserter(rval, rval.begin()));
542         return rval;
543     }
544     else
545     {
546         return m_left->firstpos();
547     }
548 }
549 
550 inline node_set
lastpos() const551 cat_node::lastpos() const
552 {
553     if (m_right->nullable())
554     {
555         node_set rval;
556         node_set l = m_left->lastpos();
557         node_set r = m_right->lastpos();
558         std::set_union(l.begin(), l.end(), r.begin(), r.end(),
559                 std::inserter(rval, rval.begin()));
560         return rval;
561     }
562     else
563     {
564         return m_right->lastpos();
565     }
566 }
567 
568 inline void
compute_followpos(followpos_t & followpos) const569 cat_node::compute_followpos(followpos_t& followpos) const
570 {
571     node_set l = m_left->lastpos();
572     for (node_set::iterator i = l.begin();
573             i != l.end();
574             ++i)
575     {
576         node_set rf = m_right->firstpos();
577         followpos[*i].insert(rf.begin(), rf.end());
578     }
579 
580     m_left->compute_followpos(followpos);
581     m_right->compute_followpos(followpos);
582 }
583 
584 inline void
compute_state_match(state_match_t & state_match) const585 cat_node::compute_state_match(state_match_t& state_match) const
586 {
587     m_left->compute_state_match(state_match);
588     m_right->compute_state_match(state_match);
589 }
590 
591 inline void
get_eof_ids(node_set & eof_nodes) const592 cat_node::get_eof_ids(node_set& eof_nodes) const
593 {
594     m_left->get_eof_ids(eof_nodes);
595     m_right->get_eof_ids(eof_nodes);
596 }
597 
598 inline void
assign_node_ids(node_id_t & node_count)599 cat_node::assign_node_ids(node_id_t& node_count)
600 {
601     m_left->assign_node_ids(node_count);
602     m_right->assign_node_ids(node_count);
603 }
604 
605 #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
606 inline void
dump(std::ostream & out) const607 cat_node::dump(std::ostream& out) const
608 {
609     m_left->dump(out);
610 
611     out << "\ncat_node";
612     out << " nullable() = " << (nullable() ? "true" : "false");
613     out << " firstpos() = ";
614     node_set fp = firstpos();
615     std::copy(fp.begin(), fp.end(),
616             std::ostream_iterator<node_id_t>(out, ","));
617 
618     out << " lastpos() = ";
619     node_set lp = lastpos();
620     std::copy(lp.begin(), lp.end(),
621             std::ostream_iterator<node_id_t>(out, ","));
622 
623     m_right->dump(out);
624 }
625 #endif
626 
627 
628 class star_node : public node
629 {
630 
631 public:
632 
633     star_node(node* left);
634     star_node(const star_node& x);
~star_node()635     virtual ~star_node(){}
636 
637     node* clone() const BOOST_OVERRIDE;
638     bool nullable() const BOOST_OVERRIDE;
639     node_set firstpos() const BOOST_OVERRIDE;
640     node_set lastpos() const BOOST_OVERRIDE;
641     void compute_followpos(followpos_t& followpos) const BOOST_OVERRIDE;
642     void compute_state_match(state_match_t& state_match ) const BOOST_OVERRIDE;
643     void get_eof_ids(node_set& eof_set) const BOOST_OVERRIDE;
644     void assign_node_ids(node_id_t& node_count) BOOST_OVERRIDE;
645 #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
646     void dump(std::ostream& out) const BOOST_OVERRIDE;
647 #endif
648 
649 private:
650 
651 #ifndef BOOST_NO_CXX11_SMART_PTR
652     std::unique_ptr<node> m_left;
653 #else
654     std::auto_ptr<node> m_left;
655 #endif
656 };
657 
658 inline
star_node(node * left)659 star_node::star_node(node* left)
660     : node()
661     , m_left(left)
662 {
663 }
664 
665 inline
star_node(const star_node & x)666 star_node::star_node(const star_node& x)
667     : node(x)
668     , m_left(x.m_left->clone())
669 {
670 }
671 
672 inline node *
clone() const673 star_node::clone() const
674 {
675     return new star_node(m_left->clone());
676 }
677 
678 inline bool
nullable() const679 star_node::nullable() const
680 {
681     return true;
682 }
683 
684 inline node_set
firstpos() const685 star_node::firstpos() const
686 {
687     return m_left->firstpos();
688 }
689 
690 inline node_set
lastpos() const691 star_node::lastpos() const
692 {
693     return m_left->lastpos();
694 }
695 
696 inline void
compute_followpos(followpos_t & followpos) const697 star_node::compute_followpos(followpos_t& followpos) const
698 {
699     node_set lp = this->lastpos();
700     for (node_set::iterator i = lp.begin();
701             i != lp.end();
702             ++i)
703     {
704         node_set fp = this->firstpos();
705         followpos[*i].insert(fp.begin(), fp.end());
706     }
707 
708     m_left->compute_followpos(followpos);
709 }
710 
711 inline void
compute_state_match(state_match_t & state_match) const712 star_node::compute_state_match(state_match_t& state_match) const
713 {
714     m_left->compute_state_match(state_match);
715 }
716 
717 inline void
get_eof_ids(node_set & eof_nodes) const718 star_node::get_eof_ids(node_set& eof_nodes) const
719 {
720     m_left->get_eof_ids(eof_nodes);
721 }
722 
723 inline void
assign_node_ids(node_id_t & node_count)724 star_node::assign_node_ids(node_id_t& node_count)
725 {
726     m_left->assign_node_ids(node_count);
727 }
728 
729 #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
730 inline void
dump(std::ostream & out) const731 star_node::dump(std::ostream& out) const
732 {
733     m_left->dump(out);
734 
735     out << "\nstar_node";
736     out << " nullable() = " << (nullable() ? "true" : "false");
737     out << " firstpos() = ";
738     node_set fp = firstpos();
739     std::copy(fp.begin(), fp.end(),
740             std::ostream_iterator<node_id_t>(out, ","));
741 
742     out << " lastpos() = ";
743     node_set lp = lastpos();
744     std::copy(lp.begin(), lp.end(),
745             std::ostream_iterator<node_id_t>(out, ","));
746 
747 }
748 #endif
749 
750 
751 class eof_node : public node
752 {
753 
754 public:
755 
756     eof_node();
757     eof_node(const eof_node& x);
~eof_node()758     virtual ~eof_node(){}
759 
760     node* clone() const BOOST_OVERRIDE;
761     bool nullable() const BOOST_OVERRIDE;
762     node_set firstpos() const BOOST_OVERRIDE;
763     node_set lastpos() const BOOST_OVERRIDE;
764     void compute_followpos(followpos_t& followpos) const BOOST_OVERRIDE;
765     void compute_state_match(state_match_t& state_match ) const BOOST_OVERRIDE;
766     void get_eof_ids(node_set& eof_set) const BOOST_OVERRIDE;
767     void assign_node_ids(node_id_t& node_count) BOOST_OVERRIDE;
768 #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
769     void dump(std::ostream& out) const BOOST_OVERRIDE;
770 #endif
771 
772 private:
773 
774     node_id_t m_node_num;
775 };
776 
777 inline
eof_node()778 eof_node::eof_node()
779     : node()
780     , m_node_num(0)
781 {
782 }
783 
784 inline
eof_node(const eof_node & x)785 eof_node::eof_node(const eof_node& x)
786     : node(x)
787     , m_node_num(x.m_node_num)
788 {
789 }
790 
791 inline node *
clone() const792 eof_node::clone() const
793 {
794     return new eof_node(*this);
795 }
796 
797 inline bool
nullable() const798 eof_node::nullable() const
799 {
800     return false;
801 }
802 
803 inline node_set
firstpos() const804 eof_node::firstpos() const
805 {
806     node_set rval;
807     rval.insert(m_node_num);
808     return rval;
809 }
810 
811 inline node_set
lastpos() const812 eof_node::lastpos() const
813 {
814     node_set rval;
815     rval.insert(m_node_num);
816     return rval;
817 }
818 
819 inline void
compute_followpos(followpos_t &) const820 eof_node::compute_followpos(followpos_t&) const
821 {
822     return;
823 }
824 
825 inline void
compute_state_match(state_match_t & state_match) const826 eof_node::compute_state_match(state_match_t& state_match) const
827 {
828     if (state_match.size() < m_node_num + 1)
829         state_match.resize(m_node_num + 1);
830     state_match[m_node_num].resize(256, 0);
831 }
832 
833 inline void
get_eof_ids(node_set & eof_nodes) const834 eof_node::get_eof_ids(node_set& eof_nodes) const
835 {
836     eof_nodes.insert(m_node_num);
837 }
838 
839 inline void
assign_node_ids(node_id_t & node_count)840 eof_node::assign_node_ids(node_id_t& node_count)
841 {
842     m_node_num = node_count++;
843 }
844 
845 #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
846 inline void
dump(std::ostream & out) const847 eof_node::dump(std::ostream& out) const
848 {
849     out << "\neof_node";
850     out << " m_node_num = " << m_node_num;
851     out << " nullable() = " << (nullable() ? "true" : "false");
852     out << " firstpos() = ";
853     node_set fp = firstpos();
854     std::copy(fp.begin(), fp.end(),
855             std::ostream_iterator<node_id_t>(out, ","));
856 
857     out << " lastpos() = ";
858     node_set lp = lastpos();
859     std::copy(lp.begin(), lp.end(),
860             std::ostream_iterator<node_id_t>(out, ","));
861 }
862 #endif
863 
864 class ccl_node : public node
865 {
866 
867 public:
868 
869     ccl_node(const std::vector<uchar>& v);
870     ccl_node(const uchar c1, const uchar c2);
871     ccl_node(const ccl_node& x);
~ccl_node()872     virtual ~ccl_node(){}
873 
874     node* clone() const BOOST_OVERRIDE;
875     bool nullable() const BOOST_OVERRIDE;
876     node_set firstpos() const BOOST_OVERRIDE;
877     node_set lastpos() const BOOST_OVERRIDE;
878     void compute_followpos(followpos_t& followpos) const BOOST_OVERRIDE;
879     void compute_state_match(state_match_t& state_match ) const BOOST_OVERRIDE;
880     void get_eof_ids(node_set& eof_set) const BOOST_OVERRIDE;
881     void assign_node_ids(node_id_t& node_count) BOOST_OVERRIDE;
882 #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
883     void dump(std::ostream& out) const BOOST_OVERRIDE;
884 #endif
885 
886 private:
887 
888     std::vector<uchar> m_match;
889     node_id_t m_node_num;
890 };
891 
892 inline
ccl_node(const std::vector<uchar> & v)893 ccl_node::ccl_node(const std::vector<uchar>& v)
894     : node()
895     , m_match(v)
896     , m_node_num(0)
897 {
898     m_match.resize(256); // make sure it's the right size
899 }
900 
901 inline
ccl_node(const uchar c1,const uchar c2)902 ccl_node::ccl_node(const uchar c1, const uchar c2)
903     : node()
904     , m_match(256, uchar(0))
905     , m_node_num(0)
906 {
907     BOOST_ASSERT(c1 < c2);
908     for (std::size_t i = c1; i <= std::size_t(c2); ++i)
909     {
910         m_match[i] = 1;
911     }
912 }
913 
914 inline
ccl_node(const ccl_node & x)915 ccl_node::ccl_node(const ccl_node& x)
916     : node(x)
917     , m_match(x.m_match)
918     , m_node_num(x.m_node_num)
919 {
920 }
921 
922 inline node *
clone() const923 ccl_node::clone() const
924 {
925     return new ccl_node(*this);
926 }
927 
928 inline bool
nullable() const929 ccl_node::nullable() const
930 {
931     return false;
932 }
933 
934 inline node_set
firstpos() const935 ccl_node::firstpos() const
936 {
937     node_set rval;
938     rval.insert(m_node_num);
939     return rval;
940 }
941 
942 inline node_set
lastpos() const943 ccl_node::lastpos() const
944 {
945     return firstpos();
946 }
947 
948 inline void
compute_followpos(followpos_t &) const949 ccl_node::compute_followpos(followpos_t&) const
950 {
951     return;
952 }
953 
954 inline void
compute_state_match(state_match_t & state_match) const955 ccl_node::compute_state_match(state_match_t& state_match) const
956 {
957     if (state_match.size() < m_node_num + 1)
958         state_match.resize(m_node_num + 1);
959     state_match[m_node_num] = m_match;
960 }
961 
962 inline void
get_eof_ids(node_set &) const963 ccl_node::get_eof_ids(node_set&) const
964 {
965     return;
966 }
967 
968 inline void
assign_node_ids(node_id_t & node_count)969 ccl_node::assign_node_ids(node_id_t& node_count)
970 {
971     m_node_num = node_count++;
972 }
973 
974 #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
975 inline void
dump(std::ostream & out) const976 ccl_node::dump(std::ostream& out) const
977 {
978     out << "\nccl_node m_match = ";
979     for (std::size_t i = 0; i < m_match.size(); ++i)
980     {
981         if (m_match[i])
982             out << i << ", ";
983     }
984     out << " m_node_num = " << m_node_num;
985     out << " nullable() = " << (nullable() ? "true" : "false");
986     out << " firstpos() = ";
987     node_set fp = firstpos();
988     std::copy(fp.begin(), fp.end(),
989             std::ostream_iterator<node_id_t>(out, ","));
990 
991     out << " lastpos() = ";
992     node_set lp = lastpos();
993     std::copy(lp.begin(), lp.end(),
994             std::ostream_iterator<node_id_t>(out, ","));
995 }
996 #endif
997 
998 template <typename ScannerT>
999 class make_concat
1000 {
1001     typedef typename ScannerT::iterator_t iterator_type;
1002 
1003 public:
1004 
make_concat(std::stack<node * > & the_stack)1005     make_concat(std::stack<node*>& the_stack)
1006         : m_stack(the_stack)
1007         {}
1008 
operator ()(iterator_type const &,iterator_type const &) const1009     void operator()(iterator_type const &, iterator_type const &) const
1010     {
1011         node* right = m_stack.top();
1012         m_stack.pop();
1013         node* left = m_stack.top();
1014         m_stack.pop();
1015         node* newnode = new cat_node(left, right);
1016         m_stack.push(newnode);
1017     }
1018 
1019     std::stack<node*>& m_stack;
1020 };
1021 
1022 template <int CharTSize>
1023 struct get_byte_aux;
1024 
1025 template<>
1026 struct get_byte_aux<1>
1027 {
1028     template <typename CharT>
operator ()boost::spirit::classic::lexerimpl::get_byte_aux1029     unsigned char operator()(CharT c, unsigned int byte)
1030     {
1031         BOOST_ASSERT(byte == 0);
1032         return c;
1033     }
1034 };
1035 
1036 template<>
1037 struct get_byte_aux<2>
1038 {
1039     template <typename CharT>
operator ()boost::spirit::classic::lexerimpl::get_byte_aux1040     unsigned char operator()(CharT c, unsigned int byte)
1041     {
1042         static unsigned long mask[] =
1043         {
1044             0xFF00,
1045             0x00FF
1046         };
1047 
1048         BOOST_ASSERT(byte < 2);
1049         return (c & mask[byte]) >> ((sizeof(c) - 1 - byte) * 8);
1050     }
1051 };
1052 
1053 template<>
1054 struct get_byte_aux<4>
1055 {
1056     template <typename CharT>
operator ()boost::spirit::classic::lexerimpl::get_byte_aux1057     unsigned char operator()(CharT c, unsigned int byte)
1058     {
1059         static unsigned long mask[] =
1060         {
1061             0xFF000000,
1062             0x00FF0000,
1063             0x0000FF00,
1064             0x000000FF
1065         };
1066 
1067         BOOST_ASSERT(byte < 4);
1068         return (c & mask[byte]) >> ((sizeof(c) - 1 - byte) * 8);
1069     }
1070 };
1071 
1072 template <typename CharT>
1073 inline unsigned char
get_byte(CharT c,unsigned int byte)1074 get_byte(CharT c, unsigned int byte)
1075 {
1076     return get_byte_aux<sizeof(c)>()(c, byte);
1077 }
1078 
1079 template <typename ScannerT>
1080 class make_star
1081 {
1082     typedef typename ScannerT::iterator_t iterator_type;
1083 
1084 public:
1085     typedef
1086         typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1087         char_t;
1088 
make_star(std::stack<node * > & the_stack)1089     make_star(std::stack<node*>& the_stack)
1090         : m_stack(the_stack)
1091         {}
1092 
operator ()(const char_t) const1093     void operator()(const char_t) const
1094     {
1095         node* left = m_stack.top();
1096         m_stack.pop();
1097         node* newnode = new star_node(left);
1098         m_stack.push(newnode);
1099     }
1100 
1101     std::stack<node*>& m_stack;
1102 };
1103 
1104 template <typename ScannerT>
1105 class make_or
1106 {
1107     typedef typename ScannerT::iterator_t iterator_type;
1108 
1109 public:
1110 
make_or(std::stack<node * > & the_stack)1111     make_or(std::stack<node*>& the_stack)
1112         : m_stack(the_stack)
1113         {}
1114 
operator ()(iterator_type const &,iterator_type const &) const1115     void operator()(iterator_type const&, iterator_type const&) const
1116     {
1117         node* right = m_stack.top();
1118         m_stack.pop();
1119         node* left = m_stack.top();
1120         m_stack.pop();
1121         node* newnode = new or_node(left, right);
1122         m_stack.push(newnode);
1123     }
1124 
1125     std::stack<node*>& m_stack;
1126 };
1127 
1128 template <typename ScannerT>
1129 class make_plus
1130 {
1131     typedef typename ScannerT::iterator_t iterator_type;
1132 
1133 public:
1134     typedef
1135         typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1136         char_t;
1137 
make_plus(std::stack<node * > & the_stack)1138     make_plus(std::stack<node*>& the_stack)
1139         : m_stack(the_stack)
1140         {}
1141 
operator ()(const char_t) const1142     void operator()(const char_t) const
1143     {
1144         node* left = m_stack.top();
1145         m_stack.pop();
1146 
1147         node* copy = left->clone();
1148 
1149         node* new_star = new star_node(copy);
1150         node* new_cat = new cat_node(left, new_star);
1151         m_stack.push(new_cat);
1152     }
1153 
1154     std::stack<node*>& m_stack;
1155 };
1156 
1157 template <typename ScannerT>
1158 class make_optional
1159 {
1160     typedef typename ScannerT::iterator_t iterator_type;
1161 
1162 public:
1163     typedef
1164         typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1165         char_t;
1166 
make_optional(std::stack<node * > & the_stack)1167     make_optional(std::stack<node*>& the_stack)
1168         : m_stack(the_stack)
1169         {}
1170 
operator ()(const char_t) const1171     void operator()(const char_t) const
1172     {
1173         node* left = m_stack.top();
1174         m_stack.pop();
1175 
1176         node* new_or = new or_node(left, new epsilon_node());
1177         m_stack.push(new_or);
1178     }
1179 
1180     std::stack<node*>& m_stack;
1181 };
1182 
1183 ///////////////////////////////////////////////////////////////////////////////
1184 //  utility function
1185 template <typename CharT>
1186 inline utility::impl::range<CharT> const&
full_range()1187 full_range()
1188 {
1189     static utility::impl::range<CharT> full((std::numeric_limits<CharT>::min)(),
1190         (std::numeric_limits<CharT>::max)());
1191     return full;
1192 }
1193 
1194 namespace ccl_utils
1195 {
1196     template <typename char_t>
1197     inline utility::impl::range_run<char_t>
negate_range_run(const utility::impl::range_run<char_t> & rr)1198     negate_range_run(
1199             const utility::impl::range_run<char_t>& rr)
1200     {
1201         utility::impl::range_run<char_t> newrr;
1202         newrr.set(full_range<char_t>());
1203         for (typename utility::impl::range_run<char_t>::const_iterator iter = rr.begin();
1204                 iter != rr.end(); ++iter)
1205             newrr.clear(*iter);
1206         return newrr;
1207     }
1208 
1209     template <typename char_t>
1210     inline node*
create_mb_node_seq(char_t c)1211     create_mb_node_seq(char_t c)
1212     {
1213         node* newnode = new char_node(get_byte(c, 0));
1214         for (unsigned int i = 1; i < sizeof(c); ++i)
1215         {
1216             node* cnode = new char_node(get_byte(c, i));
1217             node* top_node = new cat_node(newnode, cnode);
1218             newnode = top_node;
1219         }
1220         return newnode;
1221     }
1222 
1223     template <typename char_t>
1224     inline void
handle_mb_char(char_t c,bool first_time,std::stack<node * > & stack)1225     handle_mb_char(char_t c, bool first_time,
1226             std::stack<node*>& stack)
1227     {
1228         node* newnode = create_mb_node_seq(c);
1229 
1230         if (first_time)
1231         {
1232             stack.push(newnode);
1233         }
1234         else
1235         {
1236             node* top = stack.top();
1237             stack.pop();
1238 
1239             node* newtop = new or_node(top, newnode);
1240             stack.push(newtop);
1241         }
1242     }
1243 
1244     // forward decl only
1245     template <typename char_t>
1246     inline void
1247     handle_mb_range(char_t c1, char_t c2, bool first_time,
1248             std::stack<node*>& stack);
1249 
1250     template <typename char_t>
1251     inline void
create_nodes(const utility::impl::range_run<char_t> & rr,std::stack<node * > & stack)1252     create_nodes(const utility::impl::range_run<char_t>& rr,
1253             std::stack<node*>& stack)
1254     {
1255 
1256         if (sizeof(char_t) == 1)
1257         {
1258             std::vector<uchar> ccl;
1259             ccl.resize(256);
1260             for (typename utility::impl::range_run<char_t>::const_iterator iter = rr.begin();
1261                     iter != rr.end(); ++iter)
1262             {
1263                 for (int i = iter->first; i <= iter->last; ++i)
1264                 {
1265 //                  this is always true because of the limited datatype
1266 //                  BOOST_ASSERT(uchar(i) < 256 && ccl.size() == 256);
1267                     ccl[uchar(i)] = 1;
1268                 }
1269             }
1270 
1271             node* new_ccl = new ccl_node(ccl);
1272             stack.push(new_ccl);
1273         }
1274         else
1275         {
1276             bool mb_first_time = true;
1277             for (typename utility::impl::range_run<char_t>::const_iterator iter = rr.begin();
1278                     iter != rr.end(); ++iter)
1279             {
1280                 if (iter->first == iter->last)
1281                 {
1282                     handle_mb_char(iter->first, mb_first_time, stack);
1283                 }
1284                 else
1285                 {
1286                     handle_mb_range(iter->first, iter->last, mb_first_time, stack);
1287                 }
1288                 mb_first_time = false;
1289             }
1290         }
1291     }
1292 
1293     template <typename char_t>
1294     inline std::size_t
compute_differing_byte(char_t c1,char_t c2)1295     compute_differing_byte(char_t c1, char_t c2)
1296     {
1297         std::size_t rval = 0;
1298         while (rval < sizeof(c1) &&
1299                get_byte(c1, (unsigned int)rval) == get_byte(c2, (unsigned int)rval))
1300         {
1301            ++rval;
1302         }
1303         return rval;
1304     }
1305 
1306     template <typename char_t>
1307     inline node*
create_mb_node_type1(std::size_t j,char_t c1,char_t c2)1308     create_mb_node_type1(std::size_t j, char_t c1, char_t c2)
1309     {
1310         std::size_t diff = get_byte(c2, (unsigned int)j) -
1311             get_byte(c1, (unsigned int)j);
1312         if (diff == 1) {
1313             return 0;
1314         }
1315         else if (diff == 2) {
1316             return new char_node(get_byte(c1, (unsigned int)j)+1);
1317         }
1318         else {
1319             return new ccl_node(get_byte(c1, (unsigned int)j)+1,
1320                 get_byte(c2, (unsigned int)j)-1);
1321         }
1322     }
1323 
1324     template <typename char_t>
1325     inline node *
create_mb_node_for_byte(std::size_t i,std::size_t j,std::size_t sizem1,std::size_t differing_byte,char_t c1,char_t c2,node * newnode)1326     create_mb_node_for_byte(std::size_t i, std::size_t j, std::size_t sizem1,
1327             std::size_t differing_byte, char_t c1, char_t c2, node* newnode)
1328     {
1329         node* cnode;
1330         if (i == sizem1 && j == differing_byte && j != sizem1)
1331         {
1332             node* tmp = create_mb_node_type1(j, c1, c2);
1333             if (tmp == 0)
1334             {
1335                 delete newnode;
1336                 return 0;
1337             }
1338             else
1339                 cnode = tmp;
1340         }
1341         else if (i == differing_byte && j == sizem1)
1342         {
1343             if (i != sizem1) {
1344                 cnode = new ccl_node(get_byte(c1, (unsigned int)j), 0xFF);
1345             }
1346             else {
1347                 cnode = new ccl_node(get_byte(c1, (unsigned int)j),
1348                     get_byte(c2, (unsigned int)j));
1349             }
1350         }
1351         else if (i != differing_byte && i != sizem1 &&
1352                 j == (sizem1 - i + differing_byte))
1353         {
1354             cnode = new ccl_node(get_byte(c1, (unsigned int)j)+1, 0xFF);
1355         }
1356         else if (i + j - differing_byte > sizem1) {
1357             cnode = new ccl_node(0, 0xFF);
1358         }
1359         else {//if (is plain)
1360             cnode = new char_node(get_byte(c1, (unsigned int)j));
1361         }
1362 
1363         node* top_node = new cat_node(newnode, cnode);
1364         return top_node;
1365     }
1366 
1367 // On platforms, where wchar_t is a typedef for unsigned short, the
1368 // comparision for a negative value is pointless
1369     template <bool is_signed>
1370     struct correct_char_aux {
1371     };
1372 
1373     template <>
1374     struct correct_char_aux<true> {
1375 
1376         template <typename char_t>
correctboost::spirit::classic::lexerimpl::ccl_utils::correct_char_aux1377         static char_t correct(char_t c) { if (c < 0) c = 0; return c; }
1378     };
1379 
1380     template <>
1381     struct correct_char_aux<false> {
1382 
1383         template <typename char_t>
correctboost::spirit::classic::lexerimpl::ccl_utils::correct_char_aux1384         static char_t correct(char_t c) { return c; }
1385     };
1386 
1387     template <typename char_t>
1388     struct correct_char
1389     {
correctboost::spirit::classic::lexerimpl::ccl_utils::correct_char1390         static char_t correct(char_t c)
1391         {
1392             return correct_char_aux<std::numeric_limits<char_t>::is_signed >::
1393                 correct(c);
1394         }
1395     };
1396 
1397     template <typename char_t>
1398     inline void
handle_mb_range(char_t c1,char_t c2,bool first_time,std::stack<node * > & stack)1399     handle_mb_range(char_t c1, char_t c2, bool first_time,
1400             std::stack<node*>& stack)
1401     {
1402         // The algorithm can't handle negative value chars, which don't make
1403         // much sense anyway. This comparision is pointless for wchar_t's on
1404         // platforms, where wchar_t is a typedef for unsigned short
1405 
1406         c1 = correct_char<char_t>::correct(c1);
1407         //if (c1 < 0)
1408         //    c1 = 0;
1409 
1410         BOOST_ASSERT(c1 < c2);
1411         node* newnode = 0;
1412         node* savednode = 0;
1413         const std::size_t differing_byte = compute_differing_byte(c1, c2);
1414         const std::size_t sizem1 = sizeof(c1) - 1;
1415         const std::size_t ndb = sizem1 - differing_byte;
1416         for (std::size_t i = differing_byte; i < sizeof(c1); ++i)
1417         {
1418             // generate node for the first byte
1419             if (differing_byte == 0 && i == ndb)
1420             {
1421                 node* tmp = create_mb_node_type1(0, c1, c2);
1422                 if (tmp == 0)
1423                     continue;
1424                 else
1425                     newnode = tmp;
1426             }
1427             else
1428             {
1429                 newnode = new char_node(get_byte(c1, 0));
1430             }
1431             for (std::size_t j = 1; j < sizeof(c1); ++j)
1432             {
1433                 newnode = create_mb_node_for_byte(i, j, sizem1, differing_byte,
1434                         c1, c2, newnode);
1435                 if (newnode == 0)
1436                     goto end_outer_for;
1437             }
1438 
1439             // or together the various parts
1440             if (savednode)
1441             {
1442                 node* top_node = new or_node(savednode, newnode);
1443                 savednode = top_node;
1444             }
1445             else
1446             {
1447                 savednode = newnode;
1448             }
1449 end_outer_for:
1450             continue;
1451         }
1452 
1453         for (std::size_t k = 0; k < ndb; ++k)
1454         {
1455             newnode = new char_node(get_byte(c2, 0));
1456             for (std::size_t j = 1; j < sizeof(c2); ++j)
1457             {
1458                 node* cnode;
1459                 if (k == differing_byte && j == sizem1 && k != sizem1)
1460                     cnode = new ccl_node(0, get_byte(c2, (unsigned int)j));
1461 
1462                 else if (k != differing_byte && k != sizem1 &&
1463                         j == (sizem1 - k + differing_byte))
1464                     cnode = new ccl_node(0, get_byte(c2, (unsigned int)j)-1);
1465 
1466                 else if (k + j - differing_byte > sizem1)
1467                     cnode = new ccl_node(0, 0xFF);
1468 
1469                 else //if (is plain)
1470                     cnode = new char_node(get_byte(c2, (unsigned int)j));
1471 
1472 
1473                 node* top_node = new cat_node(newnode, cnode);
1474                 newnode = top_node;
1475             }
1476 
1477             // or together the various parts
1478             if (savednode)
1479             {
1480                 node* top_node = new or_node(savednode, newnode);
1481                 savednode = top_node;
1482             }
1483             else
1484             {
1485                 savednode = newnode;
1486             }
1487         }
1488 
1489 
1490         if (first_time)
1491         {
1492             stack.push(savednode);
1493         }
1494         else
1495         {
1496             node* top = stack.top();
1497             stack.pop();
1498 
1499             node* newtop = new or_node(top, savednode);
1500             stack.push(newtop);
1501         }
1502     }
1503 } // namespace ccl_utils
1504 
1505 template <typename ScannerT>
1506 class make_char
1507 {
1508     typedef typename ScannerT::iterator_t iterator_type;
1509 
1510 public:
1511     typedef
1512         typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1513         char_t;
1514 
make_char(std::stack<node * > & the_stack)1515     make_char(std::stack<node*>& the_stack)
1516         : m_stack(the_stack)
1517         {}
1518 
operator ()(iterator_type const & first,iterator_type const & last) const1519     void operator()(iterator_type const& first, iterator_type const& last) const
1520     {
1521         const escape_char_parser<lex_escapes, char_t> lex_escape_ch =
1522             escape_char_parser<lex_escapes, char_t>();
1523         char_t the_char;
1524         iterator_type first_ = first;
1525         ScannerT scan(first_, last);
1526         lex_escape_ch[assign(the_char)].parse(scan);
1527         node* newnode = ccl_utils::create_mb_node_seq(the_char);
1528         m_stack.push(newnode);
1529     }
1530 
1531     std::stack<node*>& m_stack;
1532 };
1533 
1534 
1535 template <typename ScannerT>
1536 class make_ccl
1537 {
1538     typedef typename ScannerT::iterator_t iterator_type;
1539 
1540 public:
1541     typedef
1542         typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1543         char_t;
1544 
make_ccl(std::stack<node * > & the_stack)1545     make_ccl(std::stack<node*>& the_stack)
1546         : m_stack(the_stack)
1547         {}
1548 
is_equal_to_string(iterator_type first,iterator_type const & last,const char * str)1549     static bool is_equal_to_string(iterator_type first,
1550         iterator_type const & last, const char* str)
1551     {
1552         while (first != last &&*str &&*first ==*str)
1553         {
1554             ++first;
1555             ++str;
1556         }
1557         return*str == 0;
1558     }
1559 
1560     template <typename ParserT>
fill_ccl(utility::impl::range_run<char_t> & rr,const ParserT & parser)1561     static void fill_ccl(utility::impl::range_run<char_t>& rr, const ParserT& parser)
1562     {
1563         for (int i = 0; i < 256; ++i)
1564         {
1565             if (parser.test(static_cast<char_t>(uchar(i))))
1566                 rr.set(utility::impl::range<char_t>(char_t(i), char_t(i)));
1567         }
1568     }
1569 
operator ()(iterator_type const & first_,iterator_type const & last) const1570     void operator()(iterator_type const& first_, iterator_type const& last) const
1571     {
1572         BOOST_ASSERT(*first_ == '[');
1573 
1574         iterator_type first = first_;
1575         ++first; // skip over '['
1576         bool negated_ccl = false;
1577         if (*first == '^')
1578         {
1579             negated_ccl = true;
1580             ++first;
1581         }
1582 
1583         utility::impl::range_run<char_t> rr;
1584         while (first != last &&*first != ']')
1585         {
1586             if (*first == '[') // it's a ccl_expr like [:space:]
1587             {
1588                 // check for [:space:], etc.
1589                 if (is_equal_to_string(first, last, "[:alnum:]"))
1590                 {
1591                     fill_ccl(rr, alnum_p);
1592                 }
1593                 else if (is_equal_to_string(first, last, "[:alpha:]"))
1594                 {
1595                     fill_ccl(rr, alpha_p);
1596                 }
1597                 else if (is_equal_to_string(first, last, "[:blank:]"))
1598                 {
1599                     fill_ccl(rr, blank_p);
1600                 }
1601                 else if (is_equal_to_string(first, last, "[:cntrl:]"))
1602                 {
1603                     fill_ccl(rr, cntrl_p);
1604                 }
1605                 else if (is_equal_to_string(first, last, "[:digit:]"))
1606                 {
1607                     fill_ccl(rr, digit_p);
1608                 }
1609                 else if (is_equal_to_string(first, last, "[:graph:]"))
1610                 {
1611                     fill_ccl(rr, graph_p);
1612                 }
1613                 else if (is_equal_to_string(first, last, "[:lower:]"))
1614                 {
1615                     fill_ccl(rr, lower_p);
1616                 }
1617                 else if (is_equal_to_string(first, last, "[:print:]"))
1618                 {
1619                     fill_ccl(rr, print_p);
1620                 }
1621                 else if (is_equal_to_string(first, last, "[:punct:]"))
1622                 {
1623                     fill_ccl(rr, punct_p);
1624                 }
1625                 else if (is_equal_to_string(first, last, "[:space:]"))
1626                 {
1627                     fill_ccl(rr, space_p);
1628                 }
1629                 else if (is_equal_to_string(first, last, "[:upper:]"))
1630                 {
1631                     fill_ccl(rr, upper_p);
1632                 }
1633                 else if (is_equal_to_string(first, last, "[:xdigit:]"))
1634                 {
1635                     fill_ccl(rr, xdigit_p);
1636                 }
1637                 // this can't happen, because it's parsed before we get here.
1638                 //else
1639                 //    throw bad_regex();
1640 
1641                 // Advance past the character class expression
1642                 while (first != last &&*first != ']')
1643                     ++first;
1644                 BOOST_ASSERT(*first == ']');
1645                 ++first;
1646             }
1647             else {
1648                 const escape_char_parser<lex_escapes, char_t> lex_escape_ch =
1649                     escape_char_parser<lex_escapes, char_t>();
1650 
1651                 char_t c1;
1652                 ScannerT scan(first, last);
1653                 lex_escape_ch[assign(c1)].parse(scan);
1654                 if (*scan.first == '-') // insert a range
1655                 {
1656                     ++scan.first;
1657                     char_t c2;
1658                     lex_escape_ch[assign(c2)].parse(scan);
1659                     BOOST_ASSERT(c1 < c2); // Throw exception?
1660                     rr.set(utility::impl::range<char_t>(c1, c2));
1661                 }
1662                 else // insert 1 char
1663                 {
1664                     rr.set(utility::impl::range<char_t>(c1, c1));
1665                 }
1666             }
1667         }
1668 
1669         if (negated_ccl)
1670         {
1671             rr = ccl_utils::negate_range_run(rr);
1672         }
1673 
1674         ccl_utils::create_nodes(rr, m_stack);
1675     }
1676 
1677     std::stack<node*>& m_stack;
1678 };
1679 
1680 template <typename ScannerT>
1681 class make_any_char
1682 {
1683     typedef typename ScannerT::iterator_t iterator_type;
1684 
1685 public:
1686     typedef
1687         typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1688         char_t;
1689 
1690     std::stack<node*>& m_stack;
1691 
make_any_char(std::stack<node * > & the_stack)1692     make_any_char(std::stack<node*>& the_stack)
1693         : m_stack(the_stack)
1694         {}
1695 
operator ()(const char_t c) const1696     void operator()(const char_t c) const
1697     {
1698         BOOST_ASSERT(c == '.');
1699         do_any_char();
1700     }
1701 
do_any_char() const1702     void do_any_char() const
1703     {
1704         static utility::impl::range_run<char_t> rr;
1705         rr.set(full_range<char_t>());
1706         char_t newline = '\n';
1707         rr.clear(utility::impl::range<char_t>(newline, newline));
1708 
1709         ccl_utils::create_nodes(rr, m_stack);
1710     }
1711 };
1712 
1713 template <typename ScannerT>
1714 class make_string
1715 {
1716     typedef typename ScannerT::iterator_t iterator_type;
1717 
1718 public:
1719     typedef
1720         typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1721         char_t;
1722 
1723     std::stack<node*>& m_stack;
1724 
make_string(std::stack<node * > & the_stack)1725     make_string(std::stack<node*>& the_stack)
1726         : m_stack(the_stack)
1727         {}
1728 
operator ()(iterator_type const & first,iterator_type const & last) const1729     void operator()(iterator_type const& first, iterator_type const& last) const
1730     {
1731         BOOST_ASSERT(*first == '"');
1732 
1733         iterator_type first_ = first;
1734         ScannerT scan(first_, last);
1735         ++scan.first; // skip over '"'
1736 
1737         // empty string not allowed
1738         if (*scan.first == '"')
1739         {
1740             boost::throw_exception(bad_regex());
1741         }
1742 
1743         const escape_char_parser<lex_escapes, char_t> lex_escape_ch =
1744             escape_char_parser<lex_escapes, char_t>();
1745 
1746         char_t c;
1747         lex_escape_ch[assign(c)].parse(scan);
1748         node* top_node = ccl_utils::create_mb_node_seq(c);
1749 
1750         while (*scan.first != '"' && scan.first != scan.last)
1751         {
1752             lex_escape_ch[assign(c)].parse(scan);
1753             node* cur_node = ccl_utils::create_mb_node_seq(c);
1754             top_node = new cat_node(top_node, cur_node);
1755         }
1756         m_stack.push(top_node);
1757     }
1758 };
1759 
1760 inline
repeat_node(node * n,int num)1761 node* repeat_node(node* n, int num)
1762 {
1763     node* list_of_nodes = n;
1764     for (int i = 1; i < num; ++i)
1765     {
1766         list_of_nodes = new cat_node(list_of_nodes, n->clone());
1767     }
1768     return list_of_nodes;
1769 }
1770 
1771 inline
optional_node(node * n)1772 node* optional_node(node* n)
1773 {
1774     return new or_node(n, new epsilon_node());
1775 }
1776 
1777 template <typename ScannerT>
1778 class make_rep1
1779 {
1780     typedef typename ScannerT::iterator_t iterator_type;
1781 
1782 public:
1783     typedef
1784         typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1785         char_t;
1786 
1787     std::stack<node*>& m_stack;
1788 
make_rep1(std::stack<node * > & the_stack)1789     make_rep1(std::stack<node*>& the_stack)
1790         : m_stack(the_stack)
1791         {}
1792 
operator ()(iterator_type const & first,iterator_type const & last) const1793     void operator()(iterator_type const& first, iterator_type const& last) const
1794     {
1795         BOOST_ASSERT(*first == '{');
1796 
1797         iterator_type first_ = first;
1798         ScannerT scan(first_, last);
1799         ++scan.first; // skip over '{'
1800 
1801         unsigned int count;
1802         uint_p[assign(count)].parse(scan);
1803         if (count == 0)
1804             boost::throw_exception(bad_regex());
1805 
1806         node* top_node = m_stack.top();
1807         m_stack.pop();
1808         top_node = repeat_node(top_node, count);
1809         m_stack.push(top_node);
1810     }
1811 };
1812 
1813 template <typename ScannerT>
1814 class make_rep2
1815 {
1816     typedef typename ScannerT::iterator_t iterator_type;
1817 
1818 public:
1819     typedef
1820         typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1821         char_t;
1822 
1823     std::stack<node*>& m_stack;
1824 
make_rep2(std::stack<node * > & the_stack)1825     make_rep2(std::stack<node*>& the_stack)
1826         : m_stack(the_stack)
1827         {}
1828 
operator ()(iterator_type const & first,iterator_type const & last) const1829     void operator()(iterator_type const& first, iterator_type const& last) const
1830     {
1831         BOOST_ASSERT(*first == '{');
1832 
1833         iterator_type first_ = first;
1834         ScannerT scan (first_, last);
1835         ++scan.first; // skip over '{'
1836 
1837         unsigned int count;
1838         uint_p[assign(count)].parse(scan);
1839         if (count == 0)
1840             boost::throw_exception(bad_regex());
1841 
1842         node* top_node = m_stack.top();
1843         m_stack.pop();
1844         top_node = new cat_node(repeat_node(top_node, count),
1845                 new star_node(top_node->clone()));
1846         m_stack.push(top_node);
1847 
1848     }
1849 };
1850 
1851 template <typename ScannerT>
1852 class make_rep3
1853 {
1854     typedef typename ScannerT::iterator_t iterator_type;
1855 
1856 public:
1857     typedef
1858         typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1859         char_t;
1860 
1861     std::stack<node*>& m_stack;
1862 
make_rep3(std::stack<node * > & the_stack)1863     make_rep3(std::stack<node*>& the_stack)
1864         : m_stack(the_stack)
1865         {}
1866 
operator ()(iterator_type const & first,iterator_type const & last) const1867     void operator()(iterator_type const& first, iterator_type const& last) const
1868     {
1869         BOOST_ASSERT(*first == '{');
1870 
1871         iterator_type first_ = first;
1872         ScannerT scan(first_, last);
1873         ++scan.first; // skip over '{'
1874 
1875         unsigned int count1, count2;
1876         uint_p[assign(count1)].parse(scan);
1877         if (count1 == 0)
1878             boost::throw_exception(bad_regex());
1879 
1880         ++scan.first; // skip over ','
1881 
1882         uint_p[assign(count2)].parse(scan);
1883         if (count2 <= count1)
1884             boost::throw_exception(bad_regex());
1885 
1886         node* top_node = m_stack.top();
1887         m_stack.pop();
1888         node* repeats = repeat_node(top_node, count1);
1889         top_node = new cat_node(repeats,
1890                 repeat_node(optional_node(top_node->clone()),
1891                     count2 - count1));
1892 
1893         m_stack.push(top_node);
1894     }
1895 };
1896 
1897 ///////////////////////////////////////////////////////////////////////////////
1898 //
1899 //  Lexer grammar
1900 //
1901 //      Defines the grammar, which mandates the syntax of the understood
1902 //      lexeme definitions passed to lexer::register_regex.
1903 //
1904 ///////////////////////////////////////////////////////////////////////////////
1905 class lexer_grammar : public boost::spirit::classic::grammar<lexer_grammar>
1906 {
1907 public:
lexer_grammar(std::stack<node * > & node_stack_)1908     lexer_grammar(std::stack<node*> &node_stack_)
1909     : node_stack(node_stack_) {}
1910 
1911     template <typename ScannerT>
1912     struct definition
1913     {
1914         typedef rule<ScannerT> rule_t;
1915         typedef typename ScannerT::iterator_t iterator_type;
1916         typedef
1917             typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
1918             char_t;
1919 
1920         rule_t regex, re, series, singleton, singleton2, fullccl, ccl, string,
1921             escseq, ccl_char;
1922         symbols<> ccl_expr;
1923 
definitionboost::spirit::classic::lexerimpl::lexer_grammar::definition1924         definition(lexer_grammar const &self)
1925         {
1926             regex =
1927                     re >> !('/' >> re) >> !ch_p('$')
1928                 ;
1929 
1930             re =
1931                     series
1932                 >>*( ('|' >> series)[make_or<ScannerT>(self.node_stack)] )
1933                 ;
1934 
1935             series =
1936                     singleton
1937                 >>*( singleton[make_concat<ScannerT>(self.node_stack)] )
1938                 ;
1939 
1940             singleton =
1941                     ch_p('.')[make_any_char<ScannerT>(self.node_stack)]
1942                     >>  singleton2
1943                 |   fullccl
1944                     >>  singleton2
1945                 |   ('"' >> string >> '"')
1946                     [
1947                         make_string<ScannerT>(self.node_stack)
1948                     ]
1949                     >>  singleton2
1950                 |   '(' >> re >> ')'
1951                     >>  singleton2
1952                 |   ((anychar_p - chset<>("/|*+?.(){}\\")) | escseq)
1953                     [
1954                         make_char<ScannerT>(self.node_stack)
1955                     ]
1956                     >>  singleton2
1957                 ;
1958 
1959             singleton2 =
1960                     ch_p('*')[make_star<ScannerT>(self.node_stack)]
1961                     >> singleton2
1962                 |   ch_p('+')[make_plus<ScannerT>(self.node_stack)]
1963                     >> singleton2
1964                 |   ch_p('?')[make_optional<ScannerT>(self.node_stack)]
1965                     >> singleton2
1966                 |   ('{' >> uint_p >> '}')
1967                     [
1968                         make_rep1<ScannerT>(self.node_stack)
1969                     ]
1970                     >>  singleton2
1971                 |   ('{' >> uint_p >> ',' >> '}')
1972                     [
1973                         make_rep2<ScannerT>(self.node_stack)
1974                     ]
1975                     >>  singleton2
1976                 |   ('{' >> uint_p >> ',' >> uint_p >> '}')
1977                     [
1978                         make_rep3<ScannerT>(self.node_stack)
1979                     ]
1980                     >> singleton2
1981                 |   epsilon_p
1982                 ;
1983 
1984             fullccl =
1985                     ('[' >> !ch_p('^') >> ccl >> ']')
1986                     [
1987                         make_ccl<ScannerT>(self.node_stack)
1988                     ]
1989                 ;
1990 
1991             ccl =
1992                    *(ccl_expr | (ccl_char >> !('-' >> ccl_char)))
1993                 ;
1994 
1995             ccl_char =
1996                     ( (anychar_p - chset<>("\\\n]")) | escseq )
1997                 ;
1998 
1999             ccl_expr =
2000                     "[:alnum:]",
2001                     "[:alpha:]",
2002                     "[:blank:]",
2003                     "[:cntrl:]",
2004                     "[:digit:]",
2005                     "[:graph:]",
2006                     "[:lower:]",
2007                     "[:print:]",
2008                     "[:punct:]",
2009                     "[:space:]",
2010                     "[:upper:]",
2011                     "[:xdigit:]"
2012                 ;
2013 
2014             string =
2015                     +( (anychar_p - chset<>("\"\\")) | escseq )
2016                 ;
2017 
2018             typedef
2019                 uint_parser<char_t, 8, 1,
2020                     std::numeric_limits<char_t>::digits / 3 + 1
2021                 > oct_parser_t;
2022             typedef
2023                 uint_parser<char_t, 16, 1,
2024                     std::numeric_limits<char_t>::digits / 4 + 1
2025                 > hex_parser_t;
2026 
2027             escseq =
2028                     ch_p('\\')
2029                     >>  (
2030                             oct_parser_t()
2031                         |   as_lower_d['x'] >> hex_parser_t()
2032                         |   (anychar_p - chset<>('\n'))
2033                         )
2034                 ;
2035 
2036 #define BOOST_SLEX_DEBUG (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
2037 
2038             BOOST_SPIRIT_DEBUG_TRACE_RULE(regex, BOOST_SLEX_DEBUG);
2039             BOOST_SPIRIT_DEBUG_TRACE_RULE(re, BOOST_SLEX_DEBUG);
2040             BOOST_SPIRIT_DEBUG_TRACE_RULE(series, BOOST_SLEX_DEBUG);
2041             BOOST_SPIRIT_DEBUG_TRACE_RULE(singleton, BOOST_SLEX_DEBUG);
2042             BOOST_SPIRIT_DEBUG_TRACE_RULE(singleton2, BOOST_SLEX_DEBUG);
2043             BOOST_SPIRIT_DEBUG_TRACE_RULE(fullccl, BOOST_SLEX_DEBUG);
2044             BOOST_SPIRIT_DEBUG_TRACE_RULE(ccl, BOOST_SLEX_DEBUG);
2045             BOOST_SPIRIT_DEBUG_TRACE_RULE(string, BOOST_SLEX_DEBUG);
2046             BOOST_SPIRIT_DEBUG_TRACE_RULE(escseq, BOOST_SLEX_DEBUG);
2047             BOOST_SPIRIT_DEBUG_TRACE_RULE(ccl_char, BOOST_SLEX_DEBUG);
2048 
2049 #undef BOOST_SLEX_DEBUG
2050         }
2051 
2052         rule<ScannerT> const&
startboost::spirit::classic::lexerimpl::lexer_grammar::definition2053         start() const { return regex; }
2054     };
2055 
2056     std::stack<node*> &node_stack;
2057 
2058 }; // class lexer_grammar
2059 
2060 template <typename StringT>
2061 inline node *
parse(lexer_grammar & g,StringT const & str)2062 parse(lexer_grammar& g, StringT const& str)
2063 {
2064     typedef
2065         scanner<typename StringT::const_iterator, scanner_policies<> >
2066         scanner_t;
2067     typedef typename rule<scanner_t>::template result<scanner_t>::type
2068         result_t;
2069 
2070     typename StringT::const_iterator first = str.begin();
2071     typename StringT::const_iterator last = str.end();
2072 
2073     scanner_t scan(first, last);
2074 //    typename rule<scanner_t>::result_t hit = g.parse(scan);
2075     result_t hit = g.parse(scan);
2076     if (!hit || !scan.at_end())
2077     {
2078         while (g.node_stack.size())
2079         {
2080             delete g.node_stack.top();
2081             g.node_stack.pop();
2082         }
2083         return 0;
2084     }
2085 
2086     BOOST_ASSERT(g.node_stack.size() == 1);
2087     node* rval = g.node_stack.top();
2088     g.node_stack.pop();
2089     node* an_eof_node = new eof_node();
2090     rval = new cat_node(rval, an_eof_node);
2091     return rval;
2092 }
2093 
2094 inline
make_case_insensitive(state_match_t & state_match)2095 void make_case_insensitive(state_match_t& state_match)
2096 {
2097     // TODO: Fix this.
2098     // This doesn't take into account foreign languages, figure out how to
2099     // do that. Also this approach is broken for this implementation of
2100     // wide chars.
2101     for (state_match_t::iterator iter = state_match.begin();
2102             iter != state_match.end(); ++iter)
2103     {
2104         int i, j;
2105         for (i = 'A', j = 'a'; i <= 'Z'; ++i, ++j)
2106         {
2107             // if either is set, turn them both on
2108             (*iter)[i] = (*iter)[j] = uchar((*iter)[i] | (*iter)[j]);
2109         }
2110     }
2111 }
2112 
2113 template<bool wide_char>
2114 struct regex_match_helper;
2115 
2116 template<>
2117 struct regex_match_helper<false> // single byte char
2118 {
2119     template <typename DfaT, typename IteratorT>
2120     static bool
do_matchboost::spirit::classic::lexerimpl::regex_match_helper2121     do_match(DfaT const& dfa, IteratorT &first, IteratorT const& last,
2122         int& regex_index,
2123         std::basic_string<
2124             typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
2125         > *token)
2126     {
2127         typedef std::basic_string<
2128             typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
2129         > string_type;
2130         typedef typename string_type::size_type size_type;
2131 
2132         node_id_t s = 0;
2133         node_id_t last_accepting_index = invalid_node;
2134         IteratorT p = first;
2135         IteratorT last_accepting_cpos = first;
2136         while (p != last)
2137         {
2138             s = dfa.transition_table[s][(uchar)*p];
2139             if (s == invalid_node)
2140                 break;
2141             if (token) token->append((size_type)1, *p);
2142             ++p;
2143             if (dfa.acceptance_index[s] != invalid_node)
2144             {
2145                 last_accepting_index = s;
2146                 last_accepting_cpos = p;
2147             }
2148         }
2149         if (last_accepting_index != invalid_node)
2150         {
2151 #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
2152             std::cout << "dfa.acceptance_index[" << last_accepting_index << "] = " <<
2153                 dfa.acceptance_index[last_accepting_index] << '\n';
2154 #endif
2155 
2156             first = last_accepting_cpos;
2157             regex_index = dfa.acceptance_index[last_accepting_index];
2158             return true;
2159         }
2160         else
2161             return false;
2162     }
2163 };
2164 
2165 template<>
2166 struct regex_match_helper<true> // wide char
2167 {
2168     template <typename DfaT, typename IteratorT>
2169     static bool
do_matchboost::spirit::classic::lexerimpl::regex_match_helper2170     do_match(DfaT const& dfa, IteratorT &first, IteratorT const& last,
2171         int& regex_index,
2172         std::basic_string<
2173             typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
2174         > *token)
2175     {
2176         typedef
2177             typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
2178             char_t;
2179         typedef std::basic_string<char_t> string_type;
2180         typedef typename string_type::size_type size_type;
2181 
2182         node_id_t s = 0;
2183         node_id_t last_accepting_index = invalid_node;
2184         IteratorT wp = first;
2185         IteratorT last_accepting_cpos = first;
2186 
2187         while (wp != last)
2188         {
2189             for (unsigned int i = 0;  i < sizeof(char_t); ++i)
2190             {
2191                 s = dfa.transition_table[s][get_byte(*wp,i)];
2192                 if (s == invalid_node)
2193                 {
2194                     goto break_while;
2195                 }
2196             }
2197             if (token) token->append((size_type)1, *wp);
2198             ++wp;
2199             if (dfa.acceptance_index[s] != invalid_node)
2200             {
2201                 last_accepting_index = s;
2202                 last_accepting_cpos = wp;
2203             }
2204 
2205         }
2206 
2207     break_while:
2208         if (last_accepting_index != invalid_node)
2209         {
2210 #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
2211             std::cout << "dfa.acceptance_index[" << last_accepting_index << "] = " <<
2212                 dfa.acceptance_index[last_accepting_index] << '\n';
2213 #endif
2214             first = last_accepting_cpos;
2215             regex_index = dfa.acceptance_index[last_accepting_index];
2216 
2217             return true;
2218         }
2219         else
2220             return false;
2221     }
2222 };
2223 
2224 template <typename DfaT, typename IteratorT, bool wide_char>
2225 struct regex_match
2226 {
2227     static bool
do_matchboost::spirit::classic::lexerimpl::regex_match2228     do_match(DfaT const& dfa, IteratorT &first, IteratorT const& last,
2229         int& regex_index,
2230         std::basic_string<
2231             typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
2232         > *token)
2233     {
2234         return regex_match_helper<wide_char>::do_match(
2235             dfa, first, last, regex_index, token);
2236     }
2237 };
2238 
2239 } // namespace lexerimpl
2240 
2241 ///////////////////////////////////////////////////////////////////////////////
2242 //
2243 template <typename IteratorT = char const*, typename TokenT = int,
2244           typename CallbackT = void(*)(IteratorT const &,
2245                                        IteratorT &,
2246                                        IteratorT const&,
2247                                        TokenT const&,
2248                                        lexer_control<TokenT> &)>
2249 class lexer
2250 {
2251 public:
2252     typedef CallbackT callback_t;
2253     typedef
2254         typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
2255         char_t;
2256 
2257     struct regex_info
2258     {
2259         std::basic_string<char_t>   str;
2260         TokenT                      token;
2261         CallbackT                   callback;
2262 
regex_infoboost::spirit::classic::lexer::regex_info2263         regex_info(const std::basic_string<char_t>& _str,
2264                 const TokenT& _token,
2265                 const CallbackT& _callback)
2266             : str(_str)
2267             , token(_token)
2268             , callback(_callback)
2269             {}
2270 
2271     };
2272 
2273     struct dfa_table
2274     {
2275         std::vector<std::vector<node_id_t> >    transition_table;
2276         std::vector<node_id_t>                  acceptance_index;
2277     };
2278     typedef std::vector<node_id_t> node_table_t;
2279     typedef std::vector<node_table_t> transition_table_t;
2280     typedef std::vector<dfa_table> dfa_t;
2281 
2282 
2283     lexer(unsigned int states = 1);
2284 
2285     void register_regex(const std::basic_string<char_t>& regex,
2286             const TokenT& id, const CallbackT& cb = CallbackT(),
2287             unsigned int state = 0);
2288 
2289     TokenT next_token(IteratorT &first, IteratorT const &last,
2290         std::basic_string<char_t> *token = 0);
2291 
2292     void create_dfa();
has_compiled_dfa()2293     bool has_compiled_dfa() { return m_compiled_dfa; }
2294 
2295     void set_case_insensitive(bool insensitive);
2296 
2297 #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
2298     void dump(std::ostream& out);
2299 #endif
2300     typedef std::vector<std::vector<regex_info> > regex_list_t;
2301 
2302     bool load (std::ifstream &in, long unique_id = 0);
2303     bool save (std::ofstream &out, long unique_id = 0);
2304     enum {
2305         SLEX_SIGNATURE = 0x58454C53,    // "SLEX"
2306         SLEX_VERSION_100 = 0x0100,      // persistance version
2307         SLEX_LAST_KNOWN_VERSION = SLEX_VERSION_100,
2308         SLEX_MINOR_VERSION_MASK = 0xFF
2309     };
2310 
2311 private:
2312 
2313     void create_dfa_for_state(int state);
2314 
2315     static bool regex_match(const dfa_t& dfa, IteratorT& first,
2316         IteratorT& last, int& regex_index);
2317 
2318     mutable std::stack<lexerimpl::node*> node_stack;
2319     lexerimpl::lexer_grammar g;
2320 
2321     mutable bool m_compiled_dfa;
2322     mutable dfa_t m_dfa;
2323 
2324     regex_list_t m_regex_list;
2325     bool m_case_insensitive;
2326 
2327     unsigned int m_state;
2328     std::stack<unsigned int> m_state_stack;
2329     unsigned int m_num_states;
2330 };
2331 
2332 
2333 template <typename IteratorT, typename TokenT, typename CallbackT>
2334 inline
lexer(unsigned int states)2335 lexer<IteratorT, TokenT, CallbackT>::lexer(unsigned int states)
2336     : g(node_stack)
2337     , m_compiled_dfa(false)
2338     , m_regex_list(states)
2339     , m_case_insensitive(false)
2340     , m_state(0)
2341     , m_state_stack()
2342     , m_num_states(states)
2343 {
2344     BOOST_SPIRIT_DEBUG_TRACE_NODE_NAME(g, "slex::lexer",
2345         BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX);
2346 }
2347 
2348 template <typename IteratorT, typename TokenT, typename CallbackT>
2349 inline void
register_regex(const std::basic_string<char_t> & regex,const TokenT & id,const CallbackT & callback,unsigned int state)2350 lexer<IteratorT, TokenT, CallbackT>::register_regex(
2351         const std::basic_string<char_t>& regex, const TokenT& id,
2352         const CallbackT& callback, unsigned int state)
2353 {
2354     if (state > m_num_states) {
2355         m_regex_list.resize(state);
2356         m_num_states = state;
2357     }
2358     m_regex_list[state].push_back(regex_info(regex, id, callback));
2359 }
2360 
2361 template <typename IteratorT, typename TokenT, typename CallbackT>
2362 inline TokenT
next_token(IteratorT & first,IteratorT const & last,std::basic_string<typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type> * token)2363 lexer<IteratorT, TokenT, CallbackT>::next_token(
2364     IteratorT &first, IteratorT const& last,
2365     std::basic_string<
2366         typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
2367     > *token)
2368 {
2369     if (!m_compiled_dfa)
2370     {
2371         create_dfa();
2372     }
2373 
2374     IteratorT saved = first;
2375     int regex_index;
2376     if (!lexerimpl::regex_match<dfa_table, IteratorT, (sizeof(char_t) > 1)>::
2377             do_match(m_dfa[m_state], first, last, regex_index, token))
2378         return -1;  // TODO: can't return -1, need to return some invalid token.
2379     // how to figure this out?  We can use traits I guess.
2380     else
2381     {
2382         regex_info regex = m_regex_list[m_state][regex_index];
2383         TokenT rval = regex.token;
2384         if (regex.callback)
2385         {
2386             // execute corresponding callback
2387             lexer_control<TokenT> controller(rval, m_state, m_state_stack);
2388             regex.callback(saved, first, last, regex.token, controller);
2389             if (controller.ignore_current_token_set()) {
2390                 if (token)
2391                     token->erase();
2392                 return next_token(first, last, token);
2393             }
2394         }
2395         return rval;
2396     }
2397 }
2398 
2399 namespace lexerimpl
2400 {
2401 
2402 inline
find_acceptance_state(const node_set & eof_node_ids,const node_set & current_set,node_id_t & acceptance_node_id)2403 bool find_acceptance_state(const node_set& eof_node_ids,
2404         const node_set& current_set,
2405         node_id_t& acceptance_node_id)
2406 {
2407     for(node_set::const_iterator nsi = eof_node_ids.begin();
2408             nsi != eof_node_ids.end(); ++nsi)
2409     {
2410         node_id_t eof_node_id =*nsi;
2411         if (current_set.end() != current_set.find(eof_node_id))
2412         {
2413             // store the first one we come to as the
2414             // matched pattern
2415             acceptance_node_id = eof_node_id;
2416             // don't bother searching for more
2417             return true;
2418         }
2419     }
2420     return false;
2421 }
2422 
2423 template <typename RegexListT, typename GrammarT>
2424 #ifndef BOOST_NO_CXX11_SMART_PTR
2425 inline std::unique_ptr<node>
2426 #else
2427 inline std::auto_ptr<node>
2428 #endif
parse_regexes(const RegexListT & regex_list,GrammarT & g)2429 parse_regexes(const RegexListT& regex_list, GrammarT& g)
2430 {
2431     // parse the expressions into a tree
2432     if (regex_list.begin() == regex_list.end())
2433         boost::throw_exception(bad_regex());
2434 
2435     typename RegexListT::const_iterator ri = regex_list.begin();
2436 #ifndef BOOST_NO_CXX11_SMART_PTR
2437     std::unique_ptr<node> tree(lexerimpl::parse(g, (*ri).str));
2438 #else
2439     std::auto_ptr<node> tree(lexerimpl::parse(g, (*ri).str));
2440 #endif
2441     if (tree.get() == 0)
2442         boost::throw_exception(bad_regex());
2443 
2444     ++ri;
2445     for (/**/; ri != regex_list.end(); ++ri)
2446     {
2447 #ifndef BOOST_NO_CXX11_SMART_PTR
2448         std::unique_ptr<node> next_tree(lexerimpl::parse(g, (*ri).str));
2449 #else
2450         std::auto_ptr<node> next_tree(lexerimpl::parse(g, (*ri).str));
2451 #endif
2452         if (next_tree.get() == 0)
2453             boost::throw_exception(bad_regex());
2454 #ifndef BOOST_NO_CXX11_SMART_PTR
2455         tree = std::unique_ptr<node>(new or_node(tree.release(), next_tree.release()));
2456 #else
2457         tree = std::auto_ptr<node>(new or_node(tree.release(), next_tree.release()));
2458 #endif
2459     }
2460     return tree;
2461 }
2462 
2463 } //namespace lexerimpl
2464 
2465 template <typename IteratorT, typename TokenT, typename CallbackT>
2466 inline void
create_dfa()2467 lexer<IteratorT, TokenT, CallbackT>::create_dfa()
2468 {
2469     m_dfa.resize(m_num_states);
2470     for (unsigned int i = 0; i < m_num_states; ++i)
2471         create_dfa_for_state(i);
2472 }
2473 
2474 // Algorithm from Compilers: Principles, Techniques, and Tools p. 141
2475 template <typename IteratorT, typename TokenT, typename CallbackT>
2476 inline void
create_dfa_for_state(int state)2477 lexer<IteratorT, TokenT, CallbackT>::create_dfa_for_state(int state)
2478 {
2479     using lexerimpl::node;
2480 #ifndef BOOST_NO_CXX11_SMART_PTR
2481     std::unique_ptr<node> tree = lexerimpl::parse_regexes(m_regex_list[state], g);
2482 #else
2483     std::auto_ptr<node> tree = lexerimpl::parse_regexes(m_regex_list[state], g);
2484 #endif
2485     node_id_t dummy = 0;
2486     tree->assign_node_ids(dummy);
2487 
2488 #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
2489     tree->dump(std::cout);
2490 #endif
2491 
2492     // compute followpos(root)
2493     followpos_t followpos;
2494     tree->compute_followpos(followpos);
2495 
2496     // the dfa states <-> nfa state groups
2497     std::map<node_set, node_id_t> dstates1;
2498     std::map<node_id_t, node_set> dstates2;
2499 
2500     // the dfa transitions
2501     m_dfa[state].transition_table.push_back(
2502             std::vector<node_id_t>(256, invalid_node));
2503     m_dfa[state].acceptance_index.push_back(invalid_node);
2504 
2505     // whether the dfa state has been processed yet
2506     std::vector<node_id_t> marked;
2507 
2508     // used to give a unique id to each dfa state
2509     node_id_t num_states = 0;
2510 
2511     // initially, the only unmarked state in Dstates is firstpos(root).
2512     marked.push_back(0);
2513     node_set fpr = tree->firstpos();
2514     dstates1[fpr] = 0;
2515     dstates2[0] = fpr;
2516     state_match_t state_match;
2517     tree->compute_state_match(state_match);
2518 
2519     if (m_case_insensitive)
2520         lexerimpl::make_case_insensitive(state_match);
2521 
2522     node_set eof_node_ids;
2523     tree->get_eof_ids(eof_node_ids);
2524     // translate the eof_node_ids into a 0-based index
2525     std::map<node_id_t, node_id_t> eof_node_id_map;
2526     unsigned int x = 0;
2527     for (node_set::iterator node_id_it = eof_node_ids.begin();
2528             node_id_it != eof_node_ids.end();
2529             ++node_id_it)
2530     {
2531         eof_node_id_map[*node_id_it] = x++;
2532     }
2533 
2534     // figure out if this is an acceptance state
2535     node_id_t eof_node_id;
2536     if (lexerimpl::find_acceptance_state(eof_node_ids, fpr, eof_node_id))
2537     {
2538         m_dfa[state].acceptance_index[0] = eof_node_id_map[eof_node_id];
2539     }
2540 
2541     std::vector<node_id_t>::iterator i = std::find(marked.begin(), marked.end(),
2542             node_id_t(0));
2543     while (marked.end() != i)
2544     {
2545        *i = 1;
2546         node_id_t T = node_id_t(std::distance(marked.begin(), i));
2547         BOOST_ASSERT(T < dstates2.size());
2548         node_set Tstates = dstates2[T];
2549         for (node_id_t j = 0; j < 256; ++j)
2550         {
2551             node_set U;
2552             for (node_set::iterator k = Tstates.begin();
2553                     k != Tstates.end(); ++k)
2554             {
2555                 node_id_t p =*k;
2556                 BOOST_ASSERT(p < state_match.size());
2557                 BOOST_ASSERT(j < state_match[p].size());
2558                 if (state_match[p][j])
2559                 {
2560                     node_set fpp = followpos[p];
2561                     U.insert(fpp.begin(), fpp.end());
2562                 }
2563             }
2564             if (U.size() > 0)
2565             {
2566                 std::map<node_set, node_id_t>::iterator l = dstates1.find(U);
2567                 node_id_t target_state;
2568                 if (l == dstates1.end()) // not in the states yet
2569                 {
2570                     ++num_states;
2571                     dstates1[U] = target_state = num_states;
2572                     dstates2[target_state] = U;
2573                     marked.push_back(0);
2574                     m_dfa[state].transition_table.push_back(
2575                             std::vector<node_id_t>(256, invalid_node));
2576                     m_dfa[state].acceptance_index.push_back(invalid_node);
2577                     // figure out if this is an acceptance state
2578                     node_id_t eof_node_id;
2579                     if (lexerimpl::find_acceptance_state(eof_node_ids, U, eof_node_id))
2580                     {
2581                         m_dfa[state].acceptance_index[target_state] =
2582                             eof_node_id_map[eof_node_id];
2583                     }
2584                 }
2585                 else
2586                 {
2587                     target_state = dstates1[U];
2588                 }
2589 
2590                 BOOST_ASSERT(T < m_dfa[state].transition_table.size());
2591                 BOOST_ASSERT(j < m_dfa[state].transition_table[T].size());
2592                 m_dfa[state].transition_table[T][j] = target_state;
2593             }
2594 
2595         }
2596 
2597         i = std::find(marked.begin(), marked.end(), node_id_t(0));
2598     }
2599     m_compiled_dfa = true;
2600 }
2601 
2602 template <typename IteratorT, typename TokenT, typename CallbackT>
2603 inline void
set_case_insensitive(bool insensitive)2604 lexer<IteratorT, TokenT, CallbackT>::set_case_insensitive(bool insensitive)
2605 {
2606     m_case_insensitive = insensitive;
2607 }
2608 
2609 
2610 #if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
2611 template <typename IteratorT, typename TokenT, typename CallbackT>
2612 inline void
dump(std::ostream & out)2613 lexer<IteratorT, TokenT, CallbackT>::dump(std::ostream& out)
2614 {
2615     for (unsigned x = 0; x < m_dfa.size(); ++x)
2616     {
2617         out << "\nm_dfa[" << x << "] has " << m_dfa[x].transition_table.size() << " states\n";
2618         for (node_id_t i = 0; i < m_dfa[x].transition_table.size(); ++i)
2619         {
2620             out << "state " << i << ":";
2621             for (node_id_t j = 0; j < m_dfa[x].transition_table[i].size(); ++j)
2622             {
2623                 if (m_dfa[x].transition_table[i][j] != invalid_node)
2624                     out << j << "->" << m_dfa[x].transition_table[i][j] << " ";
2625             }
2626             out << "\n";
2627         }
2628         out << "acceptance states: ";
2629         for(unsigned int k = 0; k < m_dfa[x].acceptance_index.size(); ++k)
2630         {
2631             if (m_dfa[x].acceptance_index[k] != invalid_node)
2632                 out << '<' << k << ',' << m_dfa[x].acceptance_index[k] << "> ";
2633         }
2634         out << endl;
2635     }
2636 }
2637 #endif
2638 
2639 ///////////////////////////////////////////////////////////////////////////////
2640 // load the lexer tables
2641 #define slex_in(strm, val) \
2642     strm.read((char*)&val, sizeof(val)); \
2643     if (std::ios::goodbit != strm.rdstate()) return false
2644 
2645 template <typename IteratorT, typename TokenT, typename CallbackT>
2646 inline bool
load(std::ifstream & in,long unique_id)2647 lexer<IteratorT, TokenT, CallbackT>::load (std::ifstream &in, long unique_id)
2648 {
2649 // ensure correct signature and version
2650 long signature = 0;
2651 
2652     slex_in (in, signature);
2653     if (signature != SLEX_SIGNATURE)
2654         return false;       // not for us
2655 
2656 long version = 0;
2657 
2658     slex_in (in, version);
2659     if ((version & ~SLEX_MINOR_VERSION_MASK) > SLEX_LAST_KNOWN_VERSION)
2660         return false;       // to new for us
2661 
2662 long uid = 0;
2663 
2664     slex_in (in, uid);
2665     if (uid != unique_id)
2666         return false;       // not saved by us
2667 
2668 // load auxiliary members
2669 int num_states = 0;
2670 
2671     slex_in (in, num_states);
2672 
2673 // load the dfa tables
2674 dfa_t in_dfa;
2675 std::size_t dfa_size = 0;
2676 
2677     slex_in (in, dfa_size);
2678     in_dfa.resize(dfa_size);
2679     for (std::size_t dfa = 0; dfa < dfa_size; ++dfa)
2680     {
2681     // load the transition tables
2682     std::size_t tt_size = 0;
2683     transition_table_t &tt_table = in_dfa[dfa].transition_table;
2684 
2685         slex_in (in, tt_size);
2686         tt_table.resize(tt_size);
2687         for (std::size_t tt = 0; tt < tt_size; ++tt)
2688         {
2689         std::size_t nt_size = 0;
2690         node_table_t &nt_table = tt_table[tt];
2691 
2692             slex_in (in, nt_size);
2693             nt_table.resize(nt_size);
2694             for (std::size_t nt = 0; nt < nt_size; ++nt)
2695             {
2696                 slex_in (in, nt_table[nt]);
2697             }
2698         }
2699 
2700     // load the acceptance index table
2701     std::size_t ai_size = 0;
2702     node_table_t &ai_table = in_dfa[dfa].acceptance_index;
2703 
2704         slex_in (in, ai_size);
2705         ai_table.resize(ai_size);
2706         for (std::size_t ai = 0; ai < ai_size; ++ai)
2707         {
2708             slex_in (in, ai_table[ai]);
2709         }
2710     }
2711 
2712     m_dfa.swap(in_dfa);         // success, swap in the read values
2713     m_num_states = num_states;
2714 
2715     m_compiled_dfa = true;
2716     return true;
2717 }
2718 
2719 #undef slex_in
2720 
2721 ///////////////////////////////////////////////////////////////////////////////
2722 // save the lexer tables
2723 #define slex_out(strm, val) \
2724     strm.write((char*)&val, sizeof(val)); \
2725     if (std::ios::goodbit != strm.rdstate()) return false
2726 
2727 template <typename IteratorT, typename TokenT, typename CallbackT>
2728 inline bool
save(std::ofstream & out,long unique_id)2729 lexer<IteratorT, TokenT, CallbackT>::save (std::ofstream &out, long unique_id)
2730 {
2731 // save signature and version information
2732 long out_long = SLEX_SIGNATURE;
2733 
2734     slex_out(out, out_long);
2735     out_long = SLEX_VERSION_100;
2736     slex_out(out, out_long);
2737     slex_out(out, unique_id);
2738 
2739 // save auxiliary members
2740     slex_out(out, m_num_states);
2741 
2742 // save the dfa tables
2743     typedef typename dfa_t::const_iterator dfa_iter_t;
2744     typedef transition_table_t::const_iterator transition_table_iter_t;
2745     typedef node_table_t::const_iterator node_table_iter_t;
2746 
2747     std::size_t out_size_t = m_dfa.size();
2748     slex_out(out, out_size_t);
2749 
2750     dfa_iter_t end = m_dfa.end();
2751     for (dfa_iter_t it = m_dfa.begin(); it != end; ++it)
2752     {
2753     // save the transition table
2754         out_size_t = (*it).transition_table.size();
2755         slex_out(out, out_size_t);
2756 
2757         transition_table_iter_t tt_end = (*it).transition_table.end();
2758         for (transition_table_iter_t tt_it = (*it).transition_table.begin();
2759              tt_it != tt_end;
2760              ++tt_it)
2761         {
2762             out_size_t = (*tt_it).size();
2763             slex_out(out, out_size_t);
2764 
2765             node_table_iter_t nt_end = (*tt_it).end();
2766             for (node_table_iter_t nt_it = (*tt_it).begin();
2767                  nt_it != nt_end;
2768                  ++nt_it)
2769             {
2770                 slex_out(out, (*nt_it));
2771             }
2772         }
2773 
2774     // save the acceptance index table
2775         out_size_t = (*it).acceptance_index.size();
2776         slex_out(out, out_size_t);
2777 
2778         node_table_iter_t nt_end = (*it).acceptance_index.end();
2779         for (node_table_iter_t nt_it = (*it).acceptance_index.begin();
2780              nt_it != nt_end;
2781              ++nt_it)
2782         {
2783             slex_out(out, (*nt_it));
2784         }
2785     }
2786     return true;
2787 }
2788 
2789 #undef slex_out
2790 
2791 /*
2792 a lexer_control object supports some operations on the lexer.
2793     get current lexer state
2794     set state
2795     terminate
2796     state stack (push, pop, top)
2797     set new token return value
2798     ignore the current token
2799     yymore
2800     get length of matched token
2801 */
2802 template <typename TokenT>
2803 class lexer_control
2804 {
2805 public:
2806 
2807     lexer_control(TokenT& token, unsigned int& current_state,
2808         std::stack<unsigned int>& state_stack);
2809     // functions dealing with the lexer state
2810 
2811     // set the state to state
2812     void set_state(unsigned int state);
2813 
2814     // get the current state
2815     unsigned int get_state();
2816 
2817     // pushes the current state onto the top of the state stack and
2818     // switches to new_state
2819     void push_state(unsigned int new_state);
2820 
2821     // pops the top of the state stack and switches to it.
2822     void pop_state();
2823 
2824     // returns the top of the stack without altering the stack's contents.
2825     unsigned int top_state();
2826 
2827     // functions dealing with the token returned.
2828 
2829     // set a new TokenT return value, overriding that one that was
2830     // registered via. register_regex()
2831     void set_token(const TokenT& token);
2832 
2833     // tell the lexer to return an invalid token, signifying termination.
2834     void terminate();
2835 
2836     // ignore the current token, and move on to the next one.  The current
2837     // token will NOT be returned from next_token()
2838     void ignore_current_token();
2839 
2840     // returns true if ignore_current_token() has been called,
2841     // false otherwise.
2842     bool ignore_current_token_set();
2843 
2844 private:
2845     TokenT& m_token;
2846     bool m_ignore_current_token;
2847     unsigned int& m_current_state;
2848     std::stack<unsigned int>& m_state_stack;
2849 };
2850 
2851 template <typename TokenT>
2852 inline
lexer_control(TokenT & token,unsigned int & current_state,std::stack<unsigned int> & state_stack)2853 lexer_control<TokenT>::lexer_control(TokenT& token, unsigned int& current_state,
2854         std::stack<unsigned int>& state_stack)
2855     : m_token(token)
2856     , m_ignore_current_token(false)
2857     , m_current_state(current_state)
2858     , m_state_stack(state_stack)
2859 {
2860 }
2861 
2862 template <typename TokenT>
2863 inline void
set_state(unsigned int state)2864 lexer_control<TokenT>::set_state(unsigned int state)
2865 {
2866     m_current_state = state;
2867 }
2868 
2869 template <typename TokenT>
2870 inline unsigned int
get_state()2871 lexer_control<TokenT>::get_state()
2872 {
2873     return m_current_state;
2874 }
2875 
2876 template <typename TokenT>
2877 inline void
push_state(unsigned int new_state)2878 lexer_control<TokenT>::push_state(unsigned int new_state)
2879 {
2880     m_state_stack.push(m_current_state);
2881     m_current_state = new_state;
2882 }
2883 
2884 template <typename TokenT>
2885 inline void
pop_state()2886 lexer_control<TokenT>::pop_state()
2887 {
2888     m_current_state = m_state_stack.top();
2889     m_state_stack.pop();
2890 }
2891 
2892 template <typename TokenT>
2893 inline unsigned int
top_state()2894 lexer_control<TokenT>::top_state()
2895 {
2896     return m_state_stack.top();
2897 }
2898 
2899 template <typename TokenT>
2900 inline void
set_token(const TokenT & token)2901 lexer_control<TokenT>::set_token(const TokenT& token)
2902 {
2903     m_token = token;
2904 }
2905 
2906 template <typename TokenT>
2907 inline void
terminate()2908 lexer_control<TokenT>::terminate()
2909 {
2910     m_token = -1; // TOOD: fix this, need to be determined by traits
2911 }
2912 
2913 template <typename TokenT>
2914 inline void
ignore_current_token()2915 lexer_control<TokenT>::ignore_current_token()
2916 {
2917     m_ignore_current_token = true;
2918 }
2919 
2920 template <typename TokenT>
2921 inline bool
ignore_current_token_set()2922 lexer_control<TokenT>::ignore_current_token_set()
2923 {
2924     return m_ignore_current_token;
2925 }
2926 
2927 }   // namespace classic
2928 }   // namespace spirit
2929 }   // namespace boost
2930 
2931 #undef BOOST_SPIRIT_IT_NS
2932 
2933 #endif
2934 
2935