1// Copyright (c) 2001-2009 Hartmut Kaiser 2// 3// Distributed under the Boost Software License, Version 1.0. (See accompanying 4// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 5 6// This example is the equivalent to the following lex program: 7// 8// %{ 9// /* INITIAL is the default start state. COMMENT is our new */ 10// /* state where we remove comments. */ 11// %} 12// 13// %s COMMENT 14// %% 15// <INITIAL>"//".* ; 16// <INITIAL>"/*" BEGIN COMMENT; 17// <INITIAL>. ECHO; 18// <INITIAL>[\n] ECHO; 19// <COMMENT>"*/" BEGIN INITIAL; 20// <COMMENT>. ; 21// <COMMENT>[\n] ; 22// %% 23// 24// main() 25// { 26// yylex(); 27// } 28// 29// Its purpose is to strip comments out of C code. 30// 31// Additionally this example demonstrates the use of lexer states to structure 32// the lexer definition. 33 34// #define BOOST_SPIRIT_LEXERTL_DEBUG 35 36#include <boost/config/warning_disable.hpp> 37#include <boost/spirit/include/qi.hpp> 38#include <boost/spirit/include/lex_lexer_lexertl.hpp> 39#include <boost/spirit/include/phoenix_operator.hpp> 40#include <boost/spirit/include/phoenix_container.hpp> 41 42#include <iostream> 43#include <string> 44 45#include "example.hpp" 46 47using namespace boost::spirit; 48using namespace boost::spirit::qi; 49using namespace boost::spirit::lex; 50 51/////////////////////////////////////////////////////////////////////////////// 52// Token definition: We use the lexertl based lexer engine as the underlying 53// lexer type. 54/////////////////////////////////////////////////////////////////////////////// 55enum tokenids 56{ 57 IDANY = lex::min_token_id + 10 58}; 59 60template <typename Lexer> 61struct strip_comments_tokens : lexer<Lexer> 62{ 63 strip_comments_tokens() 64 { 65 // define tokens and associate them with the lexer 66 cppcomment = "//[^\n]*"; 67 ccomment = "/\\*"; 68 endcomment = "\\*/"; 69 70 // The following tokens are associated with the default lexer state 71 // (the "INITIAL" state). Specifying 'INITIAL' as a lexer state is 72 // strictly optional. 73 this->self.add 74 (cppcomment) // no explicit token id is associated 75 (ccomment) 76 (".", IDANY) // IDANY is the token id associated with this token 77 // definition 78 ; 79 80 // The following tokens are associated with the lexer state "COMMENT". 81 // We switch lexer states from inside the parsing process using the 82 // in_state("COMMENT")[] parser component as shown below. 83 this->self("COMMENT").add 84 (endcomment) 85 (".", IDANY) 86 ; 87 } 88 89 token_def<> cppcomment, ccomment, endcomment; 90}; 91 92/////////////////////////////////////////////////////////////////////////////// 93// Grammar definition 94/////////////////////////////////////////////////////////////////////////////// 95template <typename Iterator> 96struct strip_comments_grammar : grammar<Iterator> 97{ 98 template <typename TokenDef> 99 strip_comments_grammar(TokenDef const& tok) 100 : strip_comments_grammar::base_type(start) 101 { 102 // The in_state("COMMENT")[...] parser component switches the lexer 103 // state to be 'COMMENT' during the matching of the embedded parser. 104 start = *( tok.ccomment 105 >> in_state("COMMENT") 106 [ 107 // the lexer is in the 'COMMENT' state during 108 // matching of the following parser components 109 *token(IDANY) >> tok.endcomment 110 ] 111 | tok.cppcomment 112 | token(IDANY) [ std::cout << _1 ] 113 ) 114 ; 115 } 116 117 rule<Iterator> start; 118}; 119 120/////////////////////////////////////////////////////////////////////////////// 121int main(int argc, char* argv[]) 122{ 123 // iterator type used to expose the underlying input stream 124 typedef std::string::iterator base_iterator_type; 125 126 // lexer type 127 typedef lexertl::lexer<lexertl::token<base_iterator_type> > lexer_type; 128 129 // iterator type exposed by the lexer 130 typedef strip_comments_tokens<lexer_type>::iterator_type iterator_type; 131 132 // now we use the types defined above to create the lexer and grammar 133 // object instances needed to invoke the parsing process 134 strip_comments_tokens<lexer_type> strip_comments; // Our lexer 135 strip_comments_grammar<iterator_type> g (strip_comments); // Our grammar 136 137 // Parsing is done based on the token stream, not the character 138 // stream read from the input. 139 std::string str (read_from_file(1 == argc ? "strip_comments.input" : argv[1])); 140 base_iterator_type first = str.begin(); 141 142 bool r = tokenize_and_parse(first, str.end(), strip_comments, g); 143 144 if (r) { 145 std::cout << "-------------------------\n"; 146 std::cout << "Parsing succeeded\n"; 147 std::cout << "-------------------------\n"; 148 } 149 else { 150 std::string rest(first, str.end()); 151 std::cout << "-------------------------\n"; 152 std::cout << "Parsing failed\n"; 153 std::cout << "stopped at: \"" << rest << "\"\n"; 154 std::cout << "-------------------------\n"; 155 } 156 157 std::cout << "Bye... :-) \n\n"; 158 return 0; 159} 160 161 162 163