• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *
3  * Copyright (c) 1998-2002
4  * John Maddock
5  *
6  * Use, modification and distribution are subject to the
7  * Boost Software License, Version 1.0. (See accompanying file
8  * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9  *
10  */
11 
12 #ifdef _MSC_VER
13 #pragma warning(disable: 4996 4127)
14 #endif
15 
16 #include <boost/config.hpp>
17 #include <boost/regex.hpp>
18 #include <boost/cregex.hpp>
19 #include <boost/timer.hpp>
20 #include <boost/smart_ptr.hpp>
21 
22 #include <string>
23 #include <algorithm>
24 #include <deque>
25 #include <iterator>
26 
27 #ifdef BOOST_RE_OLD_IOSTREAM
28 #include <iostream.h>
29 #include <fstream.h>
30 #else
31 #include <iostream>
32 #include <fstream>
33 using std::cout;
34 using std::cin;
35 using std::cerr;
36 using std::istream;
37 using std::ostream;
38 using std::endl;
39 using std::ifstream;
40 using std::streambuf;
41 using std::getline;
42 #endif
43 
44 #if defined(_WIN32) && defined(BOOST_REGEX_USE_WIN32_LOCALE)
45 #include <windows.h>
46 #endif
47 
48 #if (defined(_MSC_VER) && (_MSC_VER <= 1300)) || defined(__sgi)
49 // maybe no Koenig lookup, use using declaration instead:
50 using namespace boost;
51 #endif
52 
53 #ifndef BOOST_NO_WREGEX
operator <<(ostream & os,const std::wstring & s)54 ostream& operator << (ostream& os, const std::wstring& s)
55 {
56    std::wstring::const_iterator i, j;
57    i = s.begin();
58    j = s.end();
59    while(i != j)
60    {
61       os.put(static_cast<char>(*i));
62       ++i;
63    }
64    return os;
65 }
66 #endif
67 
68 template <class S>
69 class string_out_iterator
70 {
71 public:
72    typedef std::output_iterator_tag iterator_category;
73    typedef void value_type;
74    typedef void difference_type;
75    typedef void pointer;
76    typedef void reference;
77 private:
78    S* out;
79 public:
string_out_iterator(S & s)80    string_out_iterator(S& s) : out(&s) {}
operator ++()81    string_out_iterator& operator++() { return *this; }
operator ++(int)82    string_out_iterator& operator++(int) { return *this; }
operator *()83    string_out_iterator& operator*() { return *this; }
operator =(typename S::value_type v)84    string_out_iterator& operator=(typename S::value_type v)
85    {
86       out->append(1, v);
87       return *this;
88    }
89 };
90 
91 namespace boost{
92 #if defined(BOOST_MSVC) || (defined(BOOST_BORLANDC) && (BOOST_BORLANDC == 0x550)) || defined(__SGI_STL_PORT)
93 //
94 // problem with std::getline under MSVC6sp3
95 // and C++ Builder 5.5, is this really that hard?
getline(istream & is,std::string & s)96 istream& getline(istream& is, std::string& s)
97 {
98    s.erase();
99    char c = static_cast<char>(is.get());
100    while(c != '\n')
101    {
102       BOOST_ASSERT(is.good());
103       s.append(1, c);
104       c = static_cast<char>(is.get());
105    }
106    return is;
107 }
108 #else
109 istream& getline(istream& is, std::string& s)
110 {
111    std::getline(is, s);
112    if(s.size() && (s[s.size() -1] == '\r'))
113       s.erase(s.size() - 1);
114    return is;
115 }
116 #endif
117 }
118 
119 
main(int argc,char ** argv)120 int main(int argc, char**argv)
121 {
122    ifstream ifs;
123    std::istream* p_in = &std::cin;
124    if(argc == 2)
125    {
126       ifs.open(argv[1]);
127       ifs.peek();
128       if(!ifs.good())
129       {
130          cout << "Bad filename: " << argv[1] << endl;
131          return -1;
132       }
133       p_in = &ifs;
134    }
135 
136    boost::regex ex;
137    boost::match_results<std::string::const_iterator> sm;
138 #ifndef BOOST_NO_WREGEX
139    std::wstring ws1, ws2;
140    boost::wregex wex;
141    boost::match_results<std::wstring::const_iterator> wsm;
142 #endif
143    boost::match_results<std::deque<char>::iterator> dm;
144    std::string s1, s2, ts;
145    std::deque<char> ds;
146    boost::regex_tA r;
147    boost::scoped_array<boost::regmatch_t> matches;
148    std::size_t nsubs;
149    boost::timer t;
150    double tim;
151    int result = 0;
152    unsigned iters = 100;
153    double wait_time = (std::min)(t.elapsed_min() * 1000, 0.5);
154 
155    while(true)
156    {
157       cout << "Enter expression (or \"quit\" to exit): ";
158       boost::getline(*p_in, s1);
159       if(argc == 2)
160          cout << endl << s1 << endl;
161       if(s1 == "quit")
162          break;
163 #ifndef BOOST_NO_WREGEX
164       ws1.erase();
165       std::copy(s1.begin(), s1.end(), string_out_iterator<std::wstring>(ws1));
166 #endif
167       try{
168          ex.assign(s1);
169 #ifndef BOOST_NO_WREGEX
170          wex.assign(ws1);
171 #endif
172       }
173       catch(std::exception& e)
174       {
175          cout << "Error in expression: \"" << e.what() << "\"" << endl;
176          continue;
177       }
178       int code = regcompA(&r, s1.c_str(), boost::REG_PERL);
179       if(code != 0)
180       {
181          char buf[256];
182          regerrorA(code, &r, buf, 256);
183          cout << "regcompA error: \"" << buf << "\"" << endl;
184          continue;
185       }
186       nsubs = r.re_nsub + 1;
187       matches.reset(new boost::regmatch_t[nsubs]);
188 
189       while(true)
190       {
191          cout << "Enter string to search (or \"quit\" to exit): ";
192          boost::getline(*p_in, s2);
193          if(argc == 2)
194             cout << endl << s2 << endl;
195          if(s2 == "quit")
196             break;
197 
198 #ifndef BOOST_NO_WREGEX
199          ws2.erase();
200          std::copy(s2.begin(), s2.end(), string_out_iterator<std::wstring>(ws2));
201 #endif
202          ds.erase(ds.begin(), ds.end());
203          std::copy(s2.begin(), s2.end(), std::back_inserter(ds));
204 
205          unsigned i;
206          iters = 10;
207          tim = 1.1;
208 
209 #if defined(_WIN32) && defined(BOOST_REGEX_USE_WIN32_LOCALE)
210          MSG msg;
211          PeekMessage(&msg, 0, 0, 0, 0);
212          Sleep(0);
213 #endif
214 
215          // cache load:
216          regex_search(s2, sm, ex);
217 
218          // measure time interval for basic_regex<char>
219          do{
220             iters *= static_cast<unsigned>((tim > 0.001) ? (1.1/tim) : 100);
221             t.restart();
222             for(i =0; i < iters; ++i)
223             {
224                result = regex_search(s2, sm, ex);
225             }
226             tim = t.elapsed();
227          }while(tim < wait_time);
228 
229          cout << "regex time: " << (tim * 1000000 / iters) << "us" << endl;
230          if(result)
231          {
232             for(i = 0; i < sm.size(); ++i)
233             {
234                ts = sm[i];
235                cout << "\tmatch " << i << ": \"";
236                cout << ts;
237                cout << "\" (matched=" << sm[i].matched << ")" << endl;
238             }
239             cout << "\tmatch $`: \"";
240             cout << std::string(sm[-1]);
241             cout << "\" (matched=" << sm[-1].matched << ")" << endl;
242             cout << "\tmatch $': \"";
243             cout << std::string(sm[-2]);
244             cout << "\" (matched=" << sm[-2].matched << ")" << endl << endl;
245          }
246 
247 #ifndef BOOST_NO_WREGEX
248          // measure time interval for boost::wregex
249          iters = 10;
250          tim = 1.1;
251          // cache load:
252          regex_search(ws2, wsm, wex);
253          do{
254             iters *= static_cast<unsigned>((tim > 0.001) ? (1.1/tim) : 100);
255             t.restart();
256             for(i = 0; i < iters; ++i)
257             {
258                result = regex_search(ws2, wsm, wex);
259             }
260             tim = t.elapsed();
261          }while(tim < wait_time);
262          cout << "wregex time: " << (tim * 1000000 / iters) << "us" << endl;
263          if(result)
264          {
265             std::wstring tw;
266             for(i = 0; i < wsm.size(); ++i)
267             {
268                tw.erase();
269                std::copy(wsm[i].first, wsm[i].second, string_out_iterator<std::wstring>(tw));
270                cout << "\tmatch " << i << ": \"" << tw;
271                cout << "\" (matched=" << sm[i].matched << ")" << endl;
272             }
273             cout << "\tmatch $`: \"";
274             tw.erase();
275             std::copy(wsm[-1].first, wsm[-1].second, string_out_iterator<std::wstring>(tw));
276             cout << tw;
277             cout << "\" (matched=" << sm[-1].matched << ")" << endl;
278             cout << "\tmatch $': \"";
279             tw.erase();
280             std::copy(wsm[-2].first, wsm[-2].second, string_out_iterator<std::wstring>(tw));
281             cout << tw;
282             cout << "\" (matched=" << sm[-2].matched << ")" << endl << endl;
283          }
284 #endif
285 
286          // measure time interval for basic_regex<char> using a deque
287          iters = 10;
288          tim = 1.1;
289          // cache load:
290          regex_search(ds.begin(), ds.end(), dm, ex);
291          do{
292             iters *= static_cast<unsigned>((tim > 0.001) ? (1.1/tim) : 100);
293             t.restart();
294             for(i = 0; i < iters; ++i)
295             {
296                result = regex_search(ds.begin(), ds.end(), dm, ex);
297             }
298             tim = t.elapsed();
299          }while(tim < wait_time);
300          cout << "regex time (search over std::deque<char>): " << (tim * 1000000 / iters) << "us" << endl;
301 
302          if(result)
303          {
304             for(i = 0; i < dm.size(); ++i)
305             {
306                ts.erase();
307                std::copy(dm[i].first, dm[i].second, string_out_iterator<std::string>(ts));
308                cout << "\tmatch " << i << ": \"" << ts;
309                cout << "\" (matched=" << sm[i].matched << ")" << endl;
310             }
311             cout << "\tmatch $`: \"";
312             ts.erase();
313             std::copy(dm[-1].first, dm[-1].second, string_out_iterator<std::string>(ts));
314             cout << ts;
315             cout << "\" (matched=" << sm[-1].matched << ")" << endl;
316             cout << "\tmatch $': \"";
317             ts.erase();
318             std::copy(dm[-2].first, dm[-2].second, string_out_iterator<std::string>(ts));
319             cout << ts;
320             cout << "\" (matched=" << sm[-2].matched << ")" << endl << endl;
321          }
322 
323          // measure time interval for POSIX matcher:
324          iters = 10;
325          tim = 1.1;
326          // cache load:
327          regexecA(&r, s2.c_str(), nsubs, matches.get(), 0);
328          do{
329             iters *= static_cast<unsigned>((tim > 0.001) ? (1.1/tim) : 100);
330             t.restart();
331             for(i = 0; i < iters; ++i)
332             {
333                result = regexecA(&r, s2.c_str(), nsubs, matches.get(), 0);
334             }
335             tim = t.elapsed();
336          }while(tim < wait_time);
337          cout << "POSIX regexecA time: " << (tim * 1000000 / iters) << "us" << endl;
338 
339          if(result == 0)
340          {
341             for(i = 0; i < nsubs; ++i)
342             {
343                if(matches[i].rm_so >= 0)
344                {
345                   ts.assign(s2.begin() + matches[i].rm_so, s2.begin() + matches[i].rm_eo);
346                   cout << "\tmatch " << i << ": \"" << ts << "\" (matched=" << (matches[i].rm_so != -1) << ")"<< endl;
347                }
348                else
349                   cout << "\tmatch " << i << ": \"\" (matched=" << (matches[i].rm_so != -1) << ")" << endl;   // no match
350             }
351             cout << "\tmatch $`: \"";
352             ts.erase();
353             ts.assign(s2.begin(), s2.begin() + matches[0].rm_so);
354             cout << ts;
355             cout << "\" (matched=" << (matches[0].rm_so != 0) << ")" << endl;
356             cout << "\tmatch $': \"";
357             ts.erase();
358             ts.assign(s2.begin() + matches[0].rm_eo, s2.end());
359             cout << ts;
360             cout << "\" (matched=" << (matches[0].rm_eo != (int)s2.size()) << ")" << endl << endl;
361          }
362       }
363       regfreeA(&r);
364    }
365 
366    return 0;
367 }
368 
369 #if defined(_WIN32) && defined(BOOST_REGEX_USE_WIN32_LOCALE) && !defined(UNDER_CE)
370 #if !defined(BOOST_EMBTC)
371 #pragma comment(lib, "user32.lib")
372 #else
373 #pragma comment(lib, "user32.a")
374 #endif
375 #endif
376