• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1//  Copyright (c) 2001-2009 Hartmut Kaiser
2//
3//  Distributed under the Boost Software License, Version 1.0. (See accompanying
4//  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5
6//  This example is the equivalent to the following lex program:
7//
8//       %{
9//       /* INITIAL is the default start state.  COMMENT is our new  */
10//       /* state where we remove comments.                          */
11//       %}
12//
13//       %s COMMENT
14//       %%
15//       <INITIAL>"//".*    ;
16//       <INITIAL>"/*"      BEGIN COMMENT;
17//       <INITIAL>.         ECHO;
18//       <INITIAL>[\n]      ECHO;
19//       <COMMENT>"*/"      BEGIN INITIAL;
20//       <COMMENT>.         ;
21//       <COMMENT>[\n]      ;
22//       %%
23//
24//       main()
25//       {
26//         yylex();
27//       }
28//
29//  Its purpose is to strip comments out of C code.
30//
31//  Additionally this example demonstrates the use of lexer states to structure
32//  the lexer definition.
33
34// #define BOOST_SPIRIT_LEXERTL_DEBUG
35
36#include <boost/config/warning_disable.hpp>
37#include <boost/spirit/include/qi.hpp>
38#include <boost/spirit/include/lex_lexer_lexertl.hpp>
39#include <boost/spirit/include/phoenix_operator.hpp>
40#include <boost/spirit/include/phoenix_container.hpp>
41
42#include <iostream>
43#include <string>
44
45#include "example.hpp"
46
47using namespace boost::spirit;
48using namespace boost::spirit::qi;
49using namespace boost::spirit::lex;
50
51///////////////////////////////////////////////////////////////////////////////
52//  Token definition: We use the lexertl based lexer engine as the underlying
53//                    lexer type.
54///////////////////////////////////////////////////////////////////////////////
55enum tokenids
56{
57    IDANY = lex::min_token_id + 10
58};
59
60template <typename Lexer>
61struct strip_comments_tokens : lexer<Lexer>
62{
63    strip_comments_tokens()
64    {
65        // define tokens and associate them with the lexer
66        cppcomment = "//[^\n]*";
67        ccomment = "/\\*";
68        endcomment = "\\*/";
69
70        // The following tokens are associated with the default lexer state
71        // (the "INITIAL" state). Specifying 'INITIAL' as a lexer state is
72        // strictly optional.
73        this->self.add
74            (cppcomment)    // no explicit token id is associated
75            (ccomment)
76            (".", IDANY)    // IDANY is the token id associated with this token
77                            // definition
78        ;
79
80        // The following tokens are associated with the lexer state "COMMENT".
81        // We switch lexer states from inside the parsing process using the
82        // in_state("COMMENT")[] parser component as shown below.
83        this->self("COMMENT").add
84            (endcomment)
85            (".", IDANY)
86        ;
87    }
88
89    token_def<> cppcomment, ccomment, endcomment;
90};
91
92///////////////////////////////////////////////////////////////////////////////
93//  Grammar definition
94///////////////////////////////////////////////////////////////////////////////
95template <typename Iterator>
96struct strip_comments_grammar : grammar<Iterator>
97{
98    template <typename TokenDef>
99    strip_comments_grammar(TokenDef const& tok)
100      : strip_comments_grammar::base_type(start)
101    {
102        // The in_state("COMMENT")[...] parser component switches the lexer
103        // state to be 'COMMENT' during the matching of the embedded parser.
104        start =  *(   tok.ccomment
105                      >>  in_state("COMMENT")
106                          [
107                              // the lexer is in the 'COMMENT' state during
108                              // matching of the following parser components
109                              *token(IDANY) >> tok.endcomment
110                          ]
111                  |   tok.cppcomment
112                  |   token(IDANY)   [ std::cout << _1 ]
113                  )
114              ;
115    }
116
117    rule<Iterator> start;
118};
119
120///////////////////////////////////////////////////////////////////////////////
121int main(int argc, char* argv[])
122{
123    // iterator type used to expose the underlying input stream
124    typedef std::string::iterator base_iterator_type;
125
126    // lexer type
127    typedef lexertl::lexer<lexertl::token<base_iterator_type> > lexer_type;
128
129    // iterator type exposed by the lexer
130    typedef strip_comments_tokens<lexer_type>::iterator_type iterator_type;
131
132    // now we use the types defined above to create the lexer and grammar
133    // object instances needed to invoke the parsing process
134    strip_comments_tokens<lexer_type> strip_comments;           // Our lexer
135    strip_comments_grammar<iterator_type> g (strip_comments);   // Our grammar
136
137    // Parsing is done based on the token stream, not the character
138    // stream read from the input.
139    std::string str (read_from_file(1 == argc ? "strip_comments.input" : argv[1]));
140    base_iterator_type first = str.begin();
141
142    bool r = tokenize_and_parse(first, str.end(), strip_comments, g);
143
144    if (r) {
145        std::cout << "-------------------------\n";
146        std::cout << "Parsing succeeded\n";
147        std::cout << "-------------------------\n";
148    }
149    else {
150        std::string rest(first, str.end());
151        std::cout << "-------------------------\n";
152        std::cout << "Parsing failed\n";
153        std::cout << "stopped at: \"" << rest << "\"\n";
154        std::cout << "-------------------------\n";
155    }
156
157    std::cout << "Bye... :-) \n\n";
158    return 0;
159}
160
161
162
163