• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *
3  * Copyright (c) 1998-2002
4  * John Maddock
5  *
6  * Use, modification and distribution are subject to the
7  * Boost Software License, Version 1.0. (See accompanying file
8  * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9  *
10  */
11 
12  /*
13   *   LOCATION:    see http://www.boost.org for most recent version.
14   *   FILE         regex_split_example_2.cpp
15   *   VERSION      see <boost/version.hpp>
16   *   DESCRIPTION: regex_split example: spit out linked URL's.
17   */
18 
19 
20 #include <boost/regex.hpp>
21 #include <list>
22 #include <fstream>
23 #include <iostream>
24 #include <iterator>
25 
26 boost::regex e("<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\"",
27                boost::regex::normal | boost::regbase::icase);
28 
load_file(std::string & s,std::istream & is)29 void load_file(std::string& s, std::istream& is)
30 {
31    s.erase();
32    if(is.bad()) return;
33    //
34    // attempt to grow string buffer to match file size,
35    // this doesn't always work...
36    s.reserve(static_cast<std::string::size_type>(is.rdbuf()->in_avail()));
37    char c;
38    while(is.get(c))
39    {
40       // use logarithmic growth stategy, in case
41       // in_avail (above) returned zero:
42       if(s.capacity() == s.size())
43          s.reserve(s.capacity() * 3);
44       s.append(1, c);
45    }
46 }
47 
main(int argc,char ** argv)48 int main(int argc, char** argv)
49 {
50    std::string s;
51    std::list<std::string> l;
52    int i;
53    for(i = 1; i < argc; ++i)
54    {
55       std::cout << "Findings URL's in " << argv[i] << ":" << std::endl;
56       s.erase();
57       std::ifstream is(argv[i]);
58       load_file(s, is);
59       is.close();
60       boost::regex_split(std::back_inserter(l), s, e);
61       while(l.size())
62       {
63          s = *(l.begin());
64          l.pop_front();
65          std::cout << s << std::endl;
66       }
67    }
68    //
69    // alternative method:
70    // split one match at a time and output direct to
71    // cout via ostream_iterator<std::string>....
72    //
73    for(i = 1; i < argc; ++i)
74    {
75       std::cout << "Findings URL's in " << argv[i] << ":" << std::endl;
76       s.erase();
77       std::ifstream is(argv[i]);
78       load_file(s, is);
79       is.close();
80       while(boost::regex_split(std::ostream_iterator<std::string>(std::cout), s, e, boost::match_default, 1)) std::cout << std::endl;
81    }
82 
83    return 0;
84 }
85 
86 
87