1 /*
2 *
3 * Copyright (c) 1998-2002
4 * John Maddock
5 *
6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9 *
10 */
11
12 /*
13 * LOCATION: see http://www.boost.org for most recent version.
14 * FILE regex_split_example_2.cpp
15 * VERSION see <boost/version.hpp>
16 * DESCRIPTION: regex_split example: spit out linked URL's.
17 */
18
19
20 #include <boost/regex.hpp>
21 #include <list>
22 #include <fstream>
23 #include <iostream>
24 #include <iterator>
25
26 boost::regex e("<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\"",
27 boost::regex::normal | boost::regbase::icase);
28
load_file(std::string & s,std::istream & is)29 void load_file(std::string& s, std::istream& is)
30 {
31 s.erase();
32 if(is.bad()) return;
33 //
34 // attempt to grow string buffer to match file size,
35 // this doesn't always work...
36 s.reserve(static_cast<std::string::size_type>(is.rdbuf()->in_avail()));
37 char c;
38 while(is.get(c))
39 {
40 // use logarithmic growth stategy, in case
41 // in_avail (above) returned zero:
42 if(s.capacity() == s.size())
43 s.reserve(s.capacity() * 3);
44 s.append(1, c);
45 }
46 }
47
main(int argc,char ** argv)48 int main(int argc, char** argv)
49 {
50 std::string s;
51 std::list<std::string> l;
52 int i;
53 for(i = 1; i < argc; ++i)
54 {
55 std::cout << "Findings URL's in " << argv[i] << ":" << std::endl;
56 s.erase();
57 std::ifstream is(argv[i]);
58 load_file(s, is);
59 is.close();
60 boost::regex_split(std::back_inserter(l), s, e);
61 while(l.size())
62 {
63 s = *(l.begin());
64 l.pop_front();
65 std::cout << s << std::endl;
66 }
67 }
68 //
69 // alternative method:
70 // split one match at a time and output direct to
71 // cout via ostream_iterator<std::string>....
72 //
73 for(i = 1; i < argc; ++i)
74 {
75 std::cout << "Findings URL's in " << argv[i] << ":" << std::endl;
76 s.erase();
77 std::ifstream is(argv[i]);
78 load_file(s, is);
79 is.close();
80 while(boost::regex_split(std::ostream_iterator<std::string>(std::cout), s, e, boost::match_default, 1)) std::cout << std::endl;
81 }
82
83 return 0;
84 }
85
86
87