• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //  Copyright (c) 2001-2010 Hartmut Kaiser
2 //
3 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
4 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5 
6 //  This example is the equivalent to the following lex program:
7 //
8 //       %{
9 //       /* INITIAL is the default start state.  COMMENT is our new  */
10 //       /* state where we remove comments.                          */
11 //       %}
12 //
13 //       %s COMMENT
14 //       %%
15 //       <INITIAL>"//".*    ;
16 //       <INITIAL>"/*"      BEGIN COMMENT;
17 //       <INITIAL>.         ECHO;
18 //       <INITIAL>[\n]      ECHO;
19 //       <COMMENT>"*/"      BEGIN INITIAL;
20 //       <COMMENT>.         ;
21 //       <COMMENT>[\n]      ;
22 //       %%
23 //
24 //       main()
25 //       {
26 //         yylex();
27 //       }
28 //
29 //  Its purpose is to strip comments out of C code.
30 //
31 //  Additionally this example demonstrates the use of lexer states to structure
32 //  the lexer definition.
33 
34 // #define BOOST_SPIRIT_LEXERTL_DEBUG
35 
36 #include <boost/config/warning_disable.hpp>
37 #include <boost/spirit/include/qi.hpp>
38 #include <boost/spirit/include/lex_lexertl.hpp>
39 #include <boost/spirit/include/phoenix_operator.hpp>
40 #include <boost/spirit/include/phoenix_container.hpp>
41 
42 #include <iostream>
43 #include <string>
44 
45 #include "example.hpp"
46 
47 using namespace boost::spirit;
48 
49 ///////////////////////////////////////////////////////////////////////////////
50 //  Token definition: We use the lexertl based lexer engine as the underlying
51 //                    lexer type.
52 ///////////////////////////////////////////////////////////////////////////////
53 enum tokenids
54 {
55     IDANY = lex::min_token_id + 10
56 };
57 
58 template <typename Lexer>
59 struct strip_comments_tokens : lex::lexer<Lexer>
60 {
strip_comments_tokensstrip_comments_tokens61     strip_comments_tokens()
62       : strip_comments_tokens::base_type(lex::match_flags::match_default)
63     {
64         // define tokens and associate them with the lexer
65         cppcomment = "\"//\"[^\n]*";    // '//[^\n]*'
66         ccomment = "\"/*\"";            // '/*'
67         endcomment = "\"*/\"";          // '*/'
68 
69         // The following tokens are associated with the default lexer state
70         // (the "INITIAL" state). Specifying 'INITIAL' as a lexer state is
71         // strictly optional.
72         this->self.add
73             (cppcomment)    // no explicit token id is associated
74             (ccomment)
75             (".", IDANY)    // IDANY is the token id associated with this token
76                             // definition
77         ;
78 
79         // The following tokens are associated with the lexer state "COMMENT".
80         // We switch lexer states from inside the parsing process using the
81         // in_state("COMMENT")[] parser component as shown below.
82         this->self("COMMENT").add
83             (endcomment)
84             (".", IDANY)
85         ;
86     }
87 
88     lex::token_def<> cppcomment, ccomment, endcomment;
89 };
90 
91 ///////////////////////////////////////////////////////////////////////////////
92 //  Grammar definition
93 ///////////////////////////////////////////////////////////////////////////////
94 template <typename Iterator>
95 struct strip_comments_grammar : qi::grammar<Iterator>
96 {
97     template <typename TokenDef>
strip_comments_grammarstrip_comments_grammar98     strip_comments_grammar(TokenDef const& tok)
99       : strip_comments_grammar::base_type(start)
100     {
101         // The in_state("COMMENT")[...] parser component switches the lexer
102         // state to be 'COMMENT' during the matching of the embedded parser.
103         start =  *(   tok.ccomment
104                       >>  qi::in_state("COMMENT")
105                           [
106                               // the lexer is in the 'COMMENT' state during
107                               // matching of the following parser components
108                               *token(IDANY) >> tok.endcomment
109                           ]
110                   |   tok.cppcomment
111                   |   qi::token(IDANY)   [ std::cout << _1 ]
112                   )
113               ;
114     }
115 
116     qi::rule<Iterator> start;
117 };
118 
119 ///////////////////////////////////////////////////////////////////////////////
main(int argc,char * argv[])120 int main(int argc, char* argv[])
121 {
122     // iterator type used to expose the underlying input stream
123     typedef std::string::iterator base_iterator_type;
124 
125     // lexer type
126     typedef
127         lex::lexertl::lexer<lex::lexertl::token<base_iterator_type> >
128     lexer_type;
129 
130     // iterator type exposed by the lexer
131     typedef strip_comments_tokens<lexer_type>::iterator_type iterator_type;
132 
133     // now we use the types defined above to create the lexer and grammar
134     // object instances needed to invoke the parsing process
135     strip_comments_tokens<lexer_type> strip_comments;           // Our lexer
136     strip_comments_grammar<iterator_type> g (strip_comments);   // Our parser
137 
138     // Parsing is done based on the token stream, not the character
139     // stream read from the input.
140     std::string str (read_from_file(1 == argc ? "strip_comments.input" : argv[1]));
141     base_iterator_type first = str.begin();
142 
143     bool r = lex::tokenize_and_parse(first, str.end(), strip_comments, g);
144 
145     if (r) {
146         std::cout << "-------------------------\n";
147         std::cout << "Parsing succeeded\n";
148         std::cout << "-------------------------\n";
149     }
150     else {
151         std::string rest(first, str.end());
152         std::cout << "-------------------------\n";
153         std::cout << "Parsing failed\n";
154         std::cout << "stopped at: \"" << rest << "\"\n";
155         std::cout << "-------------------------\n";
156     }
157 
158     std::cout << "Bye... :-) \n\n";
159     return 0;
160 }
161 
162 
163 
164