• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 ///////////////////////////////////////////////////////////////////////////////
2 // toy_spirit.hpp
3 //
4 //  Copyright 2008 Eric Niebler. Distributed under the Boost
5 //  Software License, Version 1.0. (See accompanying file
6 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 
8 #include <cctype>
9 #include <string>
10 #include <cstring>
11 #include <iostream>
12 #include <boost/assert.hpp>
13 #include <boost/mpl/assert.hpp>
14 #include <boost/proto/core.hpp>
15 #include <boost/proto/context.hpp>
16 #include <boost/test/unit_test.hpp>
17 
18 namespace boost
19 {
20     // global tags
21     struct char_tag {};
22     struct ichar_tag {};
23     struct istring_tag {};
24     struct ichar_range_tag {};
25     struct never_tag {};
26     struct always_tag {};
27     struct space_tag {};
28 
29     // global primitives
30     proto::terminal<char_tag>::type const char_ = {{}};
31     proto::terminal<space_tag>::type const space = {{}};
32 
33     using proto::lit;
34     using proto::literal;
35 }
36 
37 namespace boost { namespace spirit2
38 {
39 
40     // handy typedefs
41     typedef proto::terminal<char_tag>::type anychar_p;
42     typedef proto::terminal<ichar_tag>::type ianychar_p;
43     typedef proto::terminal<istring_tag>::type ianystr_p;
44     typedef proto::terminal<ichar_range_tag>::type ianychar_range_p;
45     typedef proto::terminal<never_tag>::type never_p;
46     typedef proto::terminal<space_tag>::type space_p;
47 
48     struct SpiritGrammar;
49     struct SkipperGrammar;
50     struct SpiritPrimitives;
51     template<typename Grammar>
52     struct SpiritComposites;
53 
54     struct CharLiteral
55       : proto::terminal<char>
56     {};
57 
58     struct NTBSLiteral
59       : proto::terminal<char const *>
60     {};
61 
62     struct StdStringLiteral
63       : proto::terminal<std::string>
64     {};
65 
66     struct CharParser
67       : proto::function<anychar_p, CharLiteral>
68     {};
69 
70     struct ICharParser
71       : proto::function<ianychar_p, CharLiteral, CharLiteral>
72     {};
73 
74     struct CharRangeParser
75       : proto::function<anychar_p, CharLiteral, CharLiteral>
76     {};
77 
78     struct IStrParser
79       : proto::function<ianystr_p, StdStringLiteral>
80     {};
81 
82     struct ICharRangeParser
83       : proto::function<ianychar_range_p, CharLiteral, CharLiteral>
84     {};
85 
86     ianychar_p const ichar_ = {{}};
87     ianystr_p const istr_ = {{}};
88     ianychar_range_p const ichar_range_ = {{}};
89 
90     namespace utility
91     {
char_icmp(char ch,char lo,char hi)92         inline bool char_icmp(char ch, char lo, char hi)
93         {
94             return ch == lo || ch == hi;
95         }
96 
97         template<typename FwdIter>
string_cmp(char const * sz,FwdIter & begin,FwdIter end)98         inline bool string_cmp(char const *sz, FwdIter &begin, FwdIter end)
99         {
100             FwdIter tmp = begin;
101             for(; *sz; ++tmp, ++sz)
102                 if(tmp == end || *tmp != *sz)
103                     return false;
104             begin = tmp;
105             return true;
106         }
107 
108         template<typename FwdIter>
string_icmp(std::string const & str,FwdIter & begin,FwdIter end)109         inline bool string_icmp(std::string const &str, FwdIter &begin, FwdIter end)
110         {
111             BOOST_ASSERT(0 == str.size() % 2);
112             FwdIter tmp = begin;
113             std::string::const_iterator istr = str.begin(), estr = str.end();
114             for(; istr != estr; ++tmp, istr += 2)
115                 if(tmp == end || (*tmp != *istr && *tmp != *(istr+1)))
116                     return false;
117             begin = tmp;
118             return true;
119         }
120 
in_range(char ch,char lo,char hi)121         inline bool in_range(char ch, char lo, char hi)
122         {
123             return ch >= lo && ch <= hi;
124         }
125 
in_irange(char ch,char lo,char hi)126         inline bool in_irange(char ch, char lo, char hi)
127         {
128             return in_range(ch, lo, hi)
129                 || in_range(std::tolower(ch), lo, hi)
130                 || in_range(std::toupper(ch), lo, hi);
131         }
132 
to_istr(char const * sz)133         inline std::string to_istr(char const *sz)
134         {
135             std::string res;
136             res.reserve(std::strlen(sz) * 2);
137             for(; *sz; ++sz)
138             {
139                 res.push_back(std::tolower(*sz));
140                 res.push_back(std::toupper(*sz));
141             }
142             return res;
143         }
144     } // namespace utility
145 
146     template<typename FwdIter, typename Skipper = never_p>
147     struct spirit_context
148       : std::pair<FwdIter, FwdIter>
149       , proto::callable_context<spirit_context<FwdIter, Skipper> >
150     {
151         typedef bool result_type;
152         typedef FwdIter iterator;
153 
spirit_contextboost::spirit2::spirit_context154         spirit_context(FwdIter first, FwdIter second, Skipper const &skip = Skipper())
155           : std::pair<FwdIter, FwdIter>(first, second)
156           , skip_(skip)
157           , in_skip_(false)
158         {}
159 
160         // parse function for anychar_p
operator ()boost::spirit2::spirit_context161         bool operator()(proto::tag::terminal, char_tag)
162         {
163             this->skip();
164             if(this->first == this->second)
165                 return false;
166             ++this->first;
167             return true;
168         }
169 
170         // parse function for char_('a')
171         template<typename Expr>
operator ()boost::spirit2::spirit_context172         bool operator()(proto::tag::function, anychar_p, Expr const &expr)
173         {
174             this->skip();
175             return proto::eval(expr, *this);
176         }
177 
178         // parse function for space_p
operator ()boost::spirit2::spirit_context179         bool operator()(proto::tag::terminal, space_tag)
180         {
181             this->skip();
182             if(this->first == this->second || !std::isspace(*this->first))
183                 return false;
184             ++this->first;
185             return true;
186         }
187 
188         // parse function for bare character literals
operator ()boost::spirit2::spirit_context189         bool operator()(proto::tag::terminal, char ch)
190         {
191             this->skip();
192             if(this->first == this->second || *this->first != ch)
193                 return false;
194             ++this->first;
195             return true;
196         }
197 
198         // case-insensitive character parser
199         template<typename Arg1, typename Arg2>
operator ()boost::spirit2::spirit_context200         bool operator()(proto::tag::function, ianychar_p, Arg1 const &arg1, Arg2 const &arg2)
201         {
202             this->skip();
203             if(this->first == this->second
204               || !utility::char_icmp(*this->first, proto::value(arg1), proto::value(arg2)))
205                 return false;
206             ++this->first;
207             return true;
208         }
209 
210         // parse function for NTBS literals
operator ()boost::spirit2::spirit_context211         bool operator()(proto::tag::terminal, char const *sz)
212         {
213             this->skip();
214             return utility::string_cmp(sz, this->first, this->second);
215         }
216 
217         // parse function for istr_("hello")
218         template<typename Expr>
operator ()boost::spirit2::spirit_context219         bool operator()(proto::tag::function, ianystr_p, Expr const &expr)
220         {
221             this->skip();
222             return utility::string_icmp(proto::value(expr), this->first, this->second);
223         }
224 
225         // parse function for char_('a','z')
226         template<typename Arg1, typename Arg2>
operator ()boost::spirit2::spirit_context227         bool operator()(proto::tag::function, anychar_p, Arg1 const &arg1, Arg2 const &arg2)
228         {
229             BOOST_ASSERT(proto::value(arg1) <= proto::value(arg2));
230             this->skip();
231             if(this->first == this->second
232               || !utility::in_range(*this->first, proto::value(arg1), proto::value(arg2)))
233                 return false;
234             ++this->first;
235             return true;
236         }
237 
238         // parse function for ichar_range_('a','z')
239         template<typename Arg1, typename Arg2>
operator ()boost::spirit2::spirit_context240         bool operator()(proto::tag::function, ianychar_range_p, Arg1 const &arg1, Arg2 const &arg2)
241         {
242             BOOST_ASSERT(proto::value(arg1) <= proto::value(arg2));
243             this->skip();
244             if(this->first == this->second
245               || !utility::in_irange(*this->first, proto::value(arg1), proto::value(arg2)))
246                 return false;
247             ++this->first;
248             return true;
249         }
250 
251         // parse function for complemented thingies (where thingies are assumed
252         // to be 1 character wide).
253         template<typename Expr>
operator ()boost::spirit2::spirit_context254         bool operator()(proto::tag::complement, Expr const &expr)
255         {
256             this->skip();
257             iterator where = this->first;
258             if(proto::eval(expr, *this))
259                 return this->first = where, false;
260             this->first = ++where;
261             return true;
262         }
263 
264         // never_p parse function always returns false.
operator ()boost::spirit2::spirit_context265         bool operator()(proto::tag::terminal, never_tag)
266         {
267             return false;
268         }
269 
270         // for A >> B, succeeds if A and B matches.
271         template<typename Left, typename Right>
operator ()boost::spirit2::spirit_context272         bool operator()(proto::tag::shift_right, Left const &left, Right const &right)
273         {
274             return proto::eval(left, *this) && proto::eval(right, *this);
275         }
276 
277         // for A | B, succeeds if either A or B matches at this point.
278         template<typename Left, typename Right>
operator ()boost::spirit2::spirit_context279         bool operator()(proto::tag::bitwise_or, Left const &left, Right const &right)
280         {
281             iterator where = this->first;
282             return proto::eval(left, *this) || proto::eval(right, this->reset(where));
283         }
284 
285         // for *A, greedily match A as many times as possible.
286         template<typename Expr>
operator ()boost::spirit2::spirit_context287         bool operator()(proto::tag::dereference, Expr const &expr)
288         {
289             iterator where = this->first;
290             while(proto::eval(expr, *this))
291                 where = this->first;
292             // make sure that when we return true, the iterator is at the correct position!
293             this->first = where;
294             return true;
295         }
296 
297         // for +A, greedily match A one or more times.
298         template<typename Expr>
operator ()boost::spirit2::spirit_context299         bool operator()(proto::tag::unary_plus, Expr const &expr)
300         {
301             return proto::eval(expr, *this) && proto::eval(*expr, *this);
302         }
303 
304         // for !A, optionally match A.
305         template<typename Expr>
operator ()boost::spirit2::spirit_context306         bool operator()(proto::tag::logical_not, Expr const &expr)
307         {
308             iterator where = this->first;
309             if(!proto::eval(expr, *this))
310                 this->first = where;
311             return true;
312         }
313 
314         // for (A - B), matches when A but not B matches.
315         template<typename Left, typename Right>
operator ()boost::spirit2::spirit_context316         bool operator()(proto::tag::minus, Left const &left, Right const &right)
317         {
318             iterator where = this->first;
319             return !proto::eval(right, *this) && proto::eval(left, this->reset(where));
320         }
321     private:
resetboost::spirit2::spirit_context322         spirit_context &reset(iterator where)
323         {
324             this->first = where;
325             return *this;
326         }
327 
skipboost::spirit2::spirit_context328         void skip()
329         {
330             if(!this->in_skip_)
331             {
332                 this->in_skip_ = true;
333                 while(proto::eval(this->skip_, *this))
334                 {}
335                 this->in_skip_ = false;
336             }
337         }
338 
339         Skipper skip_;
340         bool in_skip_;
341     };
342 
343     struct as_ichar_parser : proto::callable
344     {
345         typedef proto::function<
346             ianychar_p
347           , proto::terminal<char>::type
348           , proto::terminal<char>::type
349         >::type result_type;
350 
351         template<typename Expr>
operator ()boost::spirit2::as_ichar_parser352         result_type operator()(Expr const &expr) const
353         {
354             char lo = std::tolower(proto::value(proto::child_c<1>(expr)));
355             char hi = std::toupper(proto::value(proto::child_c<1>(expr)));
356             result_type that = {ichar_, {lo}, {hi}};
357             return that;
358         }
359     };
360 
361     struct as_ichar_range_parser : proto::callable
362     {
363         typedef proto::function<
364             ianychar_range_p
365           , proto::terminal<char>::type
366           , proto::terminal<char>::type
367         >::type result_type;
368 
369         template<typename Expr>
operator ()boost::spirit2::as_ichar_range_parser370         result_type operator()(Expr const &expr) const
371         {
372             char lo = proto::value(proto::child_c<1>(expr));
373             char hi = proto::value(proto::child_c<2>(expr));
374             result_type that = {ichar_range_, {lo}, {hi}};
375             return that;
376         }
377     };
378 
379     struct as_ichar_literal : proto::callable
380     {
381         typedef proto::function<
382             ianychar_p
383           , proto::terminal<char>::type
384           , proto::terminal<char>::type
385         >::type result_type;
386 
387         template<typename Expr>
operator ()boost::spirit2::as_ichar_literal388         result_type operator()(Expr const &expr) const
389         {
390             char lo = std::tolower(proto::value(expr));
391             char hi = std::toupper(proto::value(expr));
392             result_type that = {ichar_, {lo}, {hi}};
393             return that;
394         }
395     };
396 
397     struct as_intbs_literal : proto::callable
398     {
399         typedef proto::function<
400             ianystr_p
401           , proto::terminal<std::string>::type
402         >::type result_type;
403 
404         template<typename Expr>
operator ()boost::spirit2::as_intbs_literal405         result_type operator()(Expr const &expr) const
406         {
407             result_type that = {istr_, {utility::to_istr(proto::value(expr))}};
408             return that;
409         }
410     };
411 
412     struct as_istdstring_literal : proto::callable
413     {
414         typedef proto::function<
415             ianystr_p
416           , proto::terminal<std::string>::type
417         >::type result_type;
418 
419         template<typename Expr>
operator ()boost::spirit2::as_istdstring_literal420         result_type operator()(Expr const &expr) const
421         {
422             result_type that = {istr_, {utility::to_istr(proto::value(expr).c_str())}};
423             return that;
424         }
425     };
426 
427     ///////////////////////////////////////////////////////////////////////////
428     // Transforms
429     ///////////////////////////////////////////////////////////////////////////
430 
431     struct skip_primitives : proto::transform<skip_primitives>
432     {
433         template<typename Expr, typename State, typename Data>
434         struct impl : proto::transform_impl<Expr, State, Data>
435         {
436             typedef
437                 typename proto::shift_right<
438                     typename proto::dereference<State>::type
439                   , Expr
440                 >::type
441             result_type;
442 
operator ()boost::spirit2::skip_primitives::impl443             result_type operator ()(
444                 typename impl::expr_param expr
445               , typename impl::state_param state
446               , typename impl::data_param data
447             ) const
448             {
449                 result_type that = {{state}, expr};
450                 return that;
451             }
452         };
453     };
454 
455     ///////////////////////////////////////////////////////////////////////////
456     // Grammar
457     ///////////////////////////////////////////////////////////////////////////
458     using proto::_;
459 
460     struct SpiritGrammar;
461 
462     struct SpiritCaseSensitivePrimitives
463       : proto::or_<
464             proto::when<CharParser, as_ichar_parser(_)>
465           , proto::when<CharLiteral, as_ichar_literal(_)>
466           , proto::when<NTBSLiteral, as_intbs_literal(_)>
467           , proto::when<CharRangeParser, as_ichar_range_parser(_)>
468           , proto::when<StdStringLiteral, as_istdstring_literal(_)>
469         >
470     {};
471 
472     struct SpiritCaseInsensitivePrimitives
473       : proto::or_<
474             anychar_p
475           , IStrParser
476           , ICharParser
477           , ICharRangeParser
478           , proto::complement<SpiritPrimitives>
479         >
480     {};
481 
482     struct SpiritPrimitives
483       : proto::or_<
484             SpiritCaseSensitivePrimitives
485           , SpiritCaseInsensitivePrimitives
486         >
487     {};
488 
489     template<typename Grammar>
490     struct SpiritComposites
491       : proto::or_<
492             proto::bitwise_or< Grammar, Grammar >
493           , proto::shift_right< Grammar, Grammar >
494           , proto::minus< Grammar, Grammar >
495           , proto::dereference< Grammar >
496           , proto::unary_plus< Grammar >
497           , proto::logical_not< Grammar >
498         >
499     {};
500 
501     // Regular Spirit grammar, has no-case transforms
502     struct SpiritGrammar
503       : proto::or_<
504             SpiritComposites<SpiritGrammar>
505           , SpiritPrimitives
506         >
507     {};
508 
509     // Spirit grammar with the skipper transform
510     struct SkipperGrammar
511       : proto::or_<
512             SpiritComposites<SkipperGrammar>
513           , proto::when<SpiritPrimitives, skip_primitives>
514         >
515     {};
516 
517     ///////////////////////////////////////////////////////////////////////////
518     // Directives
519     ///////////////////////////////////////////////////////////////////////////
520 
521     struct no_case_directive
522     {
523         template<typename Expr>
524         typename boost::result_of<SpiritGrammar(Expr const &)>::type const
operator []boost::spirit2::no_case_directive525         operator [](Expr const &expr) const
526         {
527             return SpiritGrammar()(expr);
528         }
529     };
530 
531     // no_case
532     no_case_directive const no_case = {};
533 
534     template<typename Skipper>
535     struct skip_directive
536     {
skip_directiveboost::spirit2::skip_directive537         skip_directive(Skipper const &skip)
538           : skip_(skip)
539         {}
540 
541         template<typename Expr>
542         typename boost::result_of<SkipperGrammar(Expr const &, Skipper const &)>::type const
operator []boost::spirit2::skip_directive543         operator [](Expr const &expr) const
544         {
545             return SkipperGrammar()(expr, this->skip_);
546         }
547     private:
548         Skipper skip_;
549     };
550 
551     // skip
552     template<typename Skipper>
skip(Skipper const & skip)553     skip_directive<Skipper> skip(Skipper const &skip)
554     {
555         return skip_directive<Skipper>(skip);
556     }
557 
558     ///////////////////////////////////////////////////////////////////////////
559     // parse
560     ///////////////////////////////////////////////////////////////////////////
561 
562     template<typename FwdIter, typename Rule>
parse(FwdIter begin,FwdIter end,Rule const & rule)563     bool parse(FwdIter begin, FwdIter end, Rule const &rule)
564     {
565         // make sure the rule corresponds to the Spirit grammar:
566         BOOST_MPL_ASSERT((proto::matches<Rule, SpiritGrammar>));
567 
568         spirit_context<FwdIter> ctx(begin, end);
569         return proto::eval(rule, ctx);
570     }
571 
572     // parse with a skip parser can be implemented in one of two ways:
573     // Method 1)
574     //      The skip parser is passed to all the parsers which invoke it
575     //      before they invoke themselves. This is how Spirit-1 does it,
576     //      and it is the cause of the Scanner Business. However, it has
577     //      the advantage of not needing a parser transformation phase.
578     // Method 2)
579     //      Transform the expression template to insert the skip parser
580     //      in between all sequenced parsers. That is, transform (A >> B)
581     //      to (*skip >> A >> *skip >> B). This has the advantage of making
582     //      it unnecessary to pass the scanner to all the parsers, which
583     //      means its type doesn't show up in function signatures, avoiding
584     //      the Scanner Business.
585     // Recommendation:
586     //      Both methods should be supported. Method 1 should be preferred
587     //      when calling parse with parsers defined inline. Method 2 should
588     //      be preferred when a parser expression is assigned to a rule<>,
589     //      thereby making the type of the rule<> independent of the skip
590     //      parser used. I imagine a syntax like:
591     //          rule<> r = skip(space)[A >> B >> C]
592     template<typename FwdIter, typename Rule, typename Skipper>
parse(FwdIter begin,FwdIter end,Rule const & rule,Skipper const & skipper)593     bool parse(FwdIter begin, FwdIter end, Rule const &rule, Skipper const &skipper)
594     {
595         // make sure the rule corresponds to the Spirit grammar:
596         BOOST_MPL_ASSERT((proto::matches<Rule, SpiritGrammar>));
597 
598         //// Method 1: pass skip parser in the context structure.
599         //spirit_context<FwdIter, Skipper> ctx(begin, end, skipper);
600         //return proto::eval(rule, ctx);
601 
602         // Method 2: Embed skip parser via tree transformation.
603         spirit_context<FwdIter> ctx(begin, end);
604         return proto::eval(spirit2::skip(skipper)[rule], ctx);
605     }
606 
607 }}
608 
609 using namespace boost;
610 using namespace spirit2;
611 
test_toy_spirit()612 void test_toy_spirit()
613 {
614     std::string str("abcd");
615 
616     // This will fail:
617     BOOST_CHECK(!spirit2::parse(str.begin(), str.end()
618                    , char_ >> char_('a')));
619 
620     // This will succeed:
621     BOOST_CHECK(spirit2::parse(str.begin(), str.end()
622                    , char_ >> char_('b') >> char_ >> 'd'));
623 
624     // This will succeed:
625     BOOST_CHECK(spirit2::parse(str.begin(), str.end()
626                    , 'a' >> ('c' >> char_ | 'b' >> char_('d') | 'b' >> char_('c')) >> 'd'));
627 
628     // This will succeed:
629     BOOST_CHECK(spirit2::parse(str.begin(), str.end()
630                    , *(char_ - 'd')));
631 
632     // This will succeed:
633     BOOST_CHECK(spirit2::parse(str.begin(), str.end()
634                    , no_case[char_('A') >> 'B' >> "CD"]));
635 
636     // This will succeed:
637     BOOST_CHECK(spirit2::parse(str.begin(), str.end()
638                    , no_case[*char_('A','Z')]));
639 
640     literal<char> a = lit('a');
641     literal<char const *> bcd = lit("bcd");
642 
643     // This will succeed:
644     BOOST_CHECK(spirit2::parse(str.begin(), str.end()
645                    , +~~a >> no_case[bcd]));
646 
647     // Scanner Business: R.I.P. :-)
648     str = "a  b    cd";
649     BOOST_CHECK(spirit2::parse(str.begin(), str.end()
650                    , char_('a') >> 'b' >> 'c' >> 'd', space >> space));
651 
652 }
653 
654 using namespace boost::unit_test;
655 ///////////////////////////////////////////////////////////////////////////////
656 // init_unit_test_suite
657 //
init_unit_test_suite(int argc,char * argv[])658 test_suite* init_unit_test_suite( int argc, char* argv[] )
659 {
660     test_suite *test = BOOST_TEST_SUITE("test proto and and toy spirit-2");
661 
662     test->add(BOOST_TEST_CASE(&test_toy_spirit));
663 
664     return test;
665 }
666