1 ///////////////////////////////////////////////////////////////////////////////
2 // perl2xpr.cpp
3 // A utility for translating a Perl regular expression into an
4 // xpressive static regular expression.
5 //
6 // Copyright 2007 Eric Niebler. Distributed under the Boost
7 // Software License, Version 1.0. (See accompanying file
8 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9
10 #include <stack>
11 #include <string>
12 #include <iostream>
13 #include <boost/xpressive/xpressive_static.hpp>
14 #include <boost/xpressive/regex_actions.hpp>
15
16 namespace x = boost::xpressive;
17 using namespace x;
18
main(int argc,char * argv[])19 int main(int argc, char *argv[])
20 {
21 int i = 1, j = 1;
22 bool nocase = false;
23 char const *dot = " ~_n ";
24 char const *bos = " bos ";
25 char const *eos = " eos ";
26
27 for(; i < argc && '-' == *argv[i]; argv[i][++j]? 0: (j=1,++i))
28 {
29 switch(argv[i][j])
30 {
31 case 'i': // perl /i modifier
32 nocase = true;
33 break;
34 case 's': // perl /s modifier
35 dot = " _ ";
36 break;
37 case 'm': // perl /m modifier
38 bos = " bol ";
39 eos = " eol ";
40 break;
41 default:
42 std::cerr << "Unknown option : " << argv[i] << std::endl;
43 return -1;
44 }
45 }
46
47 if(i == argc)
48 {
49 std::cerr << "Usage:\n perl2xpr [-i] [-s] [-m] 're'\n";
50 return -1;
51 }
52
53 // Local variables used by the semantic actions below
54 local<int> mark_nbr;
55 local<std::string> tmp;
56 local<std::stack<std::string> > strings;
57
58 // The rules in the dynamic regex grammar
59 cregex regex, alts, seq, quant, repeat, atom, escape, group, lit, charset, setelem;
60
61 lit = ~(set='.','^','$','*','+','?','(',')','{','}','[',']','\\','|')
62 ;
63
64 escape = as_xpr('b') [top(strings) += " _b "]
65 | as_xpr('B') [top(strings) += " ~_b "]
66 | as_xpr('d') [top(strings) += " _d "]
67 | as_xpr('D') [top(strings) += " ~_d "]
68 | as_xpr('s') [top(strings) += " _s "]
69 | as_xpr('S') [top(strings) += " ~_s "]
70 | as_xpr('w') [top(strings) += " _w "]
71 | as_xpr('W') [top(strings) += " ~_w "]
72 | _d [top(strings) += " s" + _ + " "]
73 | _ [top(strings) += " as_xpr('" + _ + "') "]
74 ;
75
76 group = (
77 as_xpr("?:") [top(strings) += " ( "]
78 | as_xpr("?i:") [top(strings) += " icase( "]
79 | as_xpr("?>") [top(strings) += " keep( "]
80 | as_xpr("?=") [top(strings) += " before( "]
81 | as_xpr("?!") [top(strings) += " ~before( "]
82 | as_xpr("?<=") [top(strings) += " after( "]
83 | as_xpr("?<!") [top(strings) += " ~after( "]
84 | nil [top(strings) += " ( s" + as<std::string>(++mark_nbr) + "= "]
85 )
86 >> x::ref(regex)
87 >> as_xpr(')') [top(strings) += " ) "]
88 ;
89
90 setelem = as_xpr('\\') >> _ [top(strings) += " as_xpr('" + _ + "') "]
91 | "[:" >> !as_xpr('^') [top(strings) += "~"]
92 >> (+_w) [top(strings) += _ ]
93 >> ":]"
94 | (
95 (s1=~as_xpr(']'))
96 >> '-'
97 >> (s2=~as_xpr(']'))
98 ) [top(strings) += "range('" + s1 + "','" + s2 + "')"]
99 ;
100
101 charset = !as_xpr('^') [top(strings) += " ~ "]
102 >> nil [top(strings) += " set[ "]
103 >> (
104 setelem
105 | (~as_xpr(']')) [top(strings) += " as_xpr('" + _ + "') "]
106 )
107 >>*(
108 nil [top(strings) += " | "]
109 >> (
110 setelem
111 | (~as_xpr(']')) [top(strings) += "'" + _ + "'"]
112 )
113 )
114 >> as_xpr(']') [top(strings) += " ] "]
115 ;
116
117 atom = (
118 +(lit >> ~before((set='*','+','?','{')))
119 | lit
120 ) [top(strings) += " as_xpr(\"" + _ + "\") "]
121 | as_xpr('.') [top(strings) += dot]
122 | as_xpr('^') [top(strings) += bos]
123 | as_xpr('$') [top(strings) += eos]
124 | '\\' >> escape
125 | '(' >> group
126 | '[' >> charset
127 ;
128
129 repeat = as_xpr('{') [tmp = " repeat<"]
130 >> (+_d) [tmp += _]
131 >> !(
132 as_xpr(',') [tmp += ","]
133 >> (
134 (+_d) [tmp += _]
135 | nil [tmp += "inf"]
136 )
137 )
138 >> as_xpr('}') [top(strings) = tmp + ">( " + top(strings) + " ) "]
139 ;
140
141 quant = nil [push(strings, "")]
142 >> atom
143 >> !(
144 (
145 as_xpr("*") [insert(top(strings), 0, " * ")] // [strings->*top()->*insert(0, " * ")]
146 | as_xpr("+") [insert(top(strings), 0, " + ")] // [strings->*top()->*insert(0, " + ")]
147 | as_xpr("?") [insert(top(strings), 0, " ! ")] // [strings->*top()->*insert(0, " ! ")]
148 | repeat
149 )
150 >> !as_xpr('?') [insert(top(strings), 0, " - ")]
151 )
152 >> nil [tmp = top(strings), pop(strings), top(strings) += tmp]
153 ;
154
155 seq = quant
156 >> *(
157 nil [top(strings) += " >> "]
158 >> quant
159 )
160 ;
161
162 alts = seq
163 >> *(
164 as_xpr('|') [top(strings) += " | "]
165 >> seq
166 )
167 ;
168
169 regex = alts
170 ;
171
172 strings.get().push("");
173 if(!regex_match(argv[i], regex))
174 {
175 std::cerr << "ERROR: unrecognized regular expression" << std::endl;
176 return -1;
177 }
178 else if(nocase)
179 {
180 std::cout << "icase( " << strings.get().top() << " )" << std::endl;
181 }
182 else
183 {
184 std::cout << strings.get().top() << std::endl;
185 }
186
187 return 0;
188 }
189