• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 ///////////////////////////////////////////////////////////////////////////////
2 // perl2xpr.cpp
3 //      A utility for translating a Perl regular expression into an
4 //      xpressive static regular expression.
5 //
6 //  Copyright 2007 Eric Niebler. Distributed under the Boost
7 //  Software License, Version 1.0. (See accompanying file
8 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9 
10 #include <stack>
11 #include <string>
12 #include <iostream>
13 #include <boost/xpressive/xpressive_static.hpp>
14 #include <boost/xpressive/regex_actions.hpp>
15 
16 namespace x = boost::xpressive;
17 using namespace x;
18 
main(int argc,char * argv[])19 int main(int argc, char *argv[])
20 {
21     int i = 1, j = 1;
22     bool nocase = false;
23     char const *dot = " ~_n ";
24     char const *bos = " bos ";
25     char const *eos = " eos ";
26 
27     for(; i < argc && '-' == *argv[i]; argv[i][++j]? 0: (j=1,++i))
28     {
29         switch(argv[i][j])
30         {
31         case 'i':           // perl /i modifier
32             nocase = true;
33             break;
34         case 's':           // perl /s modifier
35             dot = " _ ";
36             break;
37         case 'm':           // perl /m modifier
38             bos = " bol ";
39             eos = " eol ";
40             break;
41         default:
42             std::cerr << "Unknown option : " << argv[i] << std::endl;
43             return -1;
44         }
45     }
46 
47     if(i == argc)
48     {
49         std::cerr << "Usage:\n    perl2xpr [-i] [-s] [-m] 're'\n";
50         return -1;
51     }
52 
53     // Local variables used by the semantic actions below
54     local<int> mark_nbr;
55     local<std::string> tmp;
56     local<std::stack<std::string> > strings;
57 
58     // The rules in the dynamic regex grammar
59     cregex regex, alts, seq, quant, repeat, atom, escape, group, lit, charset, setelem;
60 
61     lit     = ~(set='.','^','$','*','+','?','(',')','{','}','[',']','\\','|')
62             ;
63 
64     escape  = as_xpr('b')               [top(strings) += " _b "]
65             | as_xpr('B')               [top(strings) += " ~_b "]
66             | as_xpr('d')               [top(strings) += " _d "]
67             | as_xpr('D')               [top(strings) += " ~_d "]
68             | as_xpr('s')               [top(strings) += " _s "]
69             | as_xpr('S')               [top(strings) += " ~_s "]
70             | as_xpr('w')               [top(strings) += " _w "]
71             | as_xpr('W')               [top(strings) += " ~_w "]
72             | _d                        [top(strings) += " s" + _ + " "]
73             | _                         [top(strings) += " as_xpr('" + _ + "') "]
74             ;
75 
76     group   = (
77                   as_xpr("?:")          [top(strings) += " ( "]
78                 | as_xpr("?i:")         [top(strings) += " icase( "]
79                 | as_xpr("?>")          [top(strings) += " keep( "]
80                 | as_xpr("?=")          [top(strings) += " before( "]
81                 | as_xpr("?!")          [top(strings) += " ~before( "]
82                 | as_xpr("?<=")         [top(strings) += " after( "]
83                 | as_xpr("?<!")         [top(strings) += " ~after( "]
84                 | nil                   [top(strings) += " ( s" + as<std::string>(++mark_nbr) + "= "]
85               )
86             >> x::ref(regex)
87             >> as_xpr(')')              [top(strings) += " ) "]
88             ;
89 
90     setelem = as_xpr('\\') >> _         [top(strings) += " as_xpr('" + _ + "') "]
91             | "[:" >> !as_xpr('^')      [top(strings) += "~"]
92                 >> (+_w)                [top(strings) += _ ]
93                 >> ":]"
94             | (
95                    (s1=~as_xpr(']'))
96                 >> '-'
97                 >> (s2=~as_xpr(']'))
98               )                         [top(strings) += "range('" + s1 + "','" + s2 + "')"]
99             ;
100 
101     charset = !as_xpr('^')              [top(strings) += " ~ "]
102             >> nil                      [top(strings) += " set[ "]
103             >> (
104                     setelem
105                   | (~as_xpr(']'))      [top(strings) += " as_xpr('" + _ + "') "]
106                )
107             >>*(
108                     nil                 [top(strings) += " | "]
109                  >> (
110                         setelem
111                       | (~as_xpr(']'))  [top(strings) += "'" + _ + "'"]
112                     )
113                )
114             >> as_xpr(']')              [top(strings) += " ] "]
115             ;
116 
117     atom    = (
118                   +(lit >> ~before((set='*','+','?','{')))
119                 | lit
120               )                         [top(strings) += " as_xpr(\"" + _ + "\") "]
121             | as_xpr('.')               [top(strings) += dot]
122             | as_xpr('^')               [top(strings) += bos]
123             | as_xpr('$')               [top(strings) += eos]
124             | '\\' >> escape
125             | '(' >> group
126             | '[' >> charset
127             ;
128 
129     repeat  = as_xpr('{')               [tmp = " repeat<"]
130             >> (+_d)                    [tmp += _]
131             >> !(
132                     as_xpr(',')         [tmp += ","]
133                  >> (
134                         (+_d)           [tmp += _]
135                       | nil             [tmp += "inf"]
136                     )
137                 )
138             >> as_xpr('}')              [top(strings) = tmp + ">( " + top(strings) + " ) "]
139             ;
140 
141     quant   = nil                       [push(strings, "")]
142             >> atom
143             >> !(
144                     (
145                         as_xpr("*")     [insert(top(strings), 0, " * ")] // [strings->*top()->*insert(0, " * ")]
146                       | as_xpr("+")     [insert(top(strings), 0, " + ")] // [strings->*top()->*insert(0, " + ")]
147                       | as_xpr("?")     [insert(top(strings), 0, " ! ")] // [strings->*top()->*insert(0, " ! ")]
148                       | repeat
149                     )
150                  >> !as_xpr('?')        [insert(top(strings), 0, " - ")]
151                 )
152             >> nil                      [tmp = top(strings), pop(strings), top(strings) += tmp]
153             ;
154 
155     seq     = quant
156             >> *(
157                     nil                 [top(strings) += " >> "]
158                  >> quant
159                 )
160             ;
161 
162     alts    = seq
163             >> *(
164                     as_xpr('|')         [top(strings) += " | "]
165                  >> seq
166                 )
167             ;
168 
169     regex   = alts
170             ;
171 
172     strings.get().push("");
173     if(!regex_match(argv[i], regex))
174     {
175         std::cerr << "ERROR: unrecognized regular expression" << std::endl;
176         return -1;
177     }
178     else if(nocase)
179     {
180         std::cout << "icase( " << strings.get().top() << " )" << std::endl;
181     }
182     else
183     {
184         std::cout << strings.get().top() << std::endl;
185     }
186 
187     return 0;
188 }
189