• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1[/
2  Copyright 2006-2007 John Maddock.
3  Distributed under the Boost Software License, Version 1.0.
4  (See accompanying file LICENSE_1_0.txt or copy at
5  http://www.boost.org/LICENSE_1_0.txt).
6]
7
8
9[section:regex_token_iterator regex_token_iterator]
10
11The template class [regex_token_iterator] is an iterator adapter; that is to
12say it represents a new view of an existing iterator sequence,
13by enumerating all the occurrences of a regular expression within that
14sequence, and presenting one or more character sequence for each match found.
15Each position enumerated by the iterator is a [sub_match] object that represents
16what matched a particular sub-expression within the regular expression.
17When class [regex_token_iterator] is used to enumerate a single sub-expression
18with index -1, then the iterator performs field splitting: that is
19to say it enumerates one character sequence for each section of the character
20container sequence that does not match the regular expression specified.
21
22   template <class BidirectionalIterator,
23            class charT = iterator_traits<BidirectionalIterator>::value_type,
24            class traits = regex_traits<charT> >
25   class regex_token_iterator
26   {
27   public:
28      typedef          basic_regex<charT, traits>                              regex_type;
29      typedef          sub_match<BidirectionalIterator>                        value_type;
30      typedef typename iterator_traits<BidirectionalIterator>::difference_type difference_type;
31      typedef          const value_type*                                       pointer;
32      typedef          const value_type&                                       reference;
33      typedef          std::forward_iterator_tag                               iterator_category;
34
35      ``[link boost_regex.regex_token_iterator.construct1 regex_token_iterator]``();
36      ``[link boost_regex.regex_token_iterator.construct2 regex_token_iterator]``(BidirectionalIterator a,
37                           BidirectionalIterator b,
38                           const regex_type& re,
39                           int submatch = 0,
40                           match_flag_type m = match_default);
41      ``[link boost_regex.regex_token_iterator.construct3 regex_token_iterator]``(BidirectionalIterator a,
42                           BidirectionalIterator b,
43                           const regex_type& re,
44                           const std::vector<int>& submatches,
45                           match_flag_type m = match_default);
46      template <std::size_t N>
47      ``[link boost_regex.regex_token_iterator.construct4 regex_token_iterator]``(BidirectionalIterator a,
48                           BidirectionalIterator b,
49                           const regex_type& re,
50                           const int (&submatches)[N],
51                           match_flag_type m = match_default);
52      ``[link boost_regex.regex_token_iterator.construct5 regex_token_iterator]``(const regex_token_iterator&);
53      regex_token_iterator& ``[link boost_regex.regex_token_iterator.assign operator=]``(const regex_token_iterator&);
54      bool ``[link boost_regex.regex_token_iterator.op_eq operator==]``(const regex_token_iterator&)const;
55      bool ``[link boost_regex.regex_token_iterator.op_ne operator!=]``(const regex_token_iterator&)const;
56      const value_type& ``[link boost_regex.regex_token_iterator.op_deref operator*]``()const;
57      const value_type* ``[link boost_regex.regex_token_iterator.op_arrow operator->]``()const;
58      regex_token_iterator& ``[link boost_regex.regex_token_iterator.op_inc1 operator++]``();
59      regex_token_iterator ``[link boost_regex.regex_token_iterator.op_inc2 operator++]``(int);
60   };
61
62   typedef regex_token_iterator<const char*>                   cregex_token_iterator;
63   typedef regex_token_iterator<std::string::const_iterator>   sregex_token_iterator;
64   #ifndef BOOST_NO_WREGEX
65   typedef regex_token_iterator<const wchar_t*>                wcregex_token_iterator;
66   typedef regex_token_iterator<<std::wstring::const_iterator> wsregex_token_iterator;
67   #endif
68
69   template <class charT, class traits>
70   regex_token_iterator<const charT*, charT, traits>
71      ``[link boost_regex.regex_token_iterator.make make_regex_token_iterator]``(
72            const charT* p,
73            const basic_regex<charT, traits>& e,
74            int submatch = 0,
75            regex_constants::match_flag_type m = regex_constants::match_default);
76
77   template <class charT, class traits, class ST, class SA>
78   regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits>
79      ``[link boost_regex.regex_token_iterator.make make_regex_token_iterator]``(
80            const std::basic_string<charT, ST, SA>& p,
81            const basic_regex<charT, traits>& e,
82            int submatch = 0,
83            regex_constants::match_flag_type m = regex_constants::match_default);
84
85   template <class charT, class traits, std::size_t N>
86   regex_token_iterator<const charT*, charT, traits>
87   ``[link boost_regex.regex_token_iterator.make make_regex_token_iterator]``(
88            const charT* p,
89            const basic_regex<charT, traits>& e,
90            const int (&submatch)[N],
91            regex_constants::match_flag_type m = regex_constants::match_default);
92
93   template <class charT, class traits, class ST, class SA, std::size_t N>
94   regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits>
95      ``[link boost_regex.regex_token_iterator.make make_regex_token_iterator]``(
96            const std::basic_string<charT, ST, SA>& p,
97            const basic_regex<charT, traits>& e,
98            const int (&submatch)[N],
99            regex_constants::match_flag_type m = regex_constants::match_default);
100
101   template <class charT, class traits>
102   regex_token_iterator<const charT*, charT, traits>
103      ``[link boost_regex.regex_token_iterator.make make_regex_token_iterator]``(
104            const charT* p,
105            const basic_regex<charT, traits>& e,
106            const std::vector<int>& submatch,
107            regex_constants::match_flag_type m = regex_constants::match_default);
108
109   template <class charT, class traits, class ST, class SA>
110   regex_token_iterator<
111         typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits>
112      ``[link boost_regex.regex_token_iterator.make make_regex_token_iterator]``(
113            const std::basic_string<charT, ST, SA>& p,
114            const basic_regex<charT, traits>& e,
115            const std::vector<int>& submatch,
116            regex_constants::match_flag_type m = regex_constants::match_default);
117
118[h4 Description]
119
120[#boost_regex.regex_token_iterator.construct1]
121
122   regex_token_iterator();
123
124[*Effects]: constructs an end of sequence iterator.
125
126[#boost_regex.regex_token_iterator.construct2]
127
128   regex_token_iterator(BidirectionalIterator a,
129                        BidirectionalIterator b,
130                        const regex_type& re,
131                        int submatch = 0,
132                        match_flag_type m = match_default);
133
134[*Preconditions]: `!re.empty()`.  Object /re/ shall exist for the lifetime of
135the iterator constructed from it.
136
137[*Effects]: constructs a [regex_token_iterator] that will enumerate one string for
138each regular expression match of the expression /re/ found within the sequence \[a,b),
139using match flags /m/ (see [match_flag_type]).  The string enumerated is the sub-expression /submatch/
140for each match found; if /submatch/ is -1, then enumerates all the text
141sequences that did not match the expression /re/ (that is to performs field
142splitting).
143
144[*Throws]: `std::runtime_error` if the complexity of matching the expression against
145an N character string begins to exceed O(N[super 2]), or if the program runs
146out of stack space while matching the expression (if Boost.Regex is configured
147in recursive mode), or if the matcher exhausts its permitted memory
148allocation (if Boost.Regex is configured in non-recursive mode).
149
150[#boost_regex.regex_token_iterator.construct3]
151
152   regex_token_iterator(BidirectionalIterator a,
153                        BidirectionalIterator b,
154                        const regex_type& re,
155                        const std::vector<int>& submatches,
156                        match_flag_type m = match_default);
157
158[*Preconditions]: `submatches.size() && !re.empty()`.  Object /re/ shall
159exist for the lifetime of the iterator constructed from it.
160
161[*Effects]: constructs a [regex_token_iterator] that will enumerate
162`submatches.size()` strings for each regular expression match of
163the expression /re/ found within the sequence \[a,b), using match flags /m/
164(see [match_flag_type]).  For each match found one string will be enumerated
165for each sub-expression index contained within submatches vector; if
166`submatches[0]` is -1, then the first string enumerated for each match will be
167all of the text from end of the last match to the start of the current match,
168in addition there will be one extra string enumerated when no more matches can
169be found: from the end of the last match found, to the end of the underlying sequence.
170
171[*Throws]: `std::runtime_error` if the complexity of matching the expression
172against an N character string begins to exceed O(N[super 2]), or if the
173program runs out of stack space while matching the expression (if Boost.Regex is
174configured in recursive mode), or if the matcher exhausts its permitted memory
175allocation (if Boost.Regex is configured in non-recursive mode).
176
177[#boost_regex.regex_token_iterator.construct4]
178
179   template <std::size_t N>
180   regex_token_iterator(BidirectionalIterator a,
181                        BidirectionalIterator b,
182                        const regex_type& re,
183                        const int (&submatches)[R],
184                        match_flag_type m = match_default);
185
186[*Preconditions]: `!re.empty()`.  Object /re/ shall exist for the lifetime of the iterator constructed from it.
187
188[*Effects]: constructs a [regex_token_iterator] that will enumerate /R/ strings
189for each regular expression match of the expression /re/ found within the sequence
190\[a,b), using match flags /m/ (see [match_flag_type]).  For each match found one
191string will be enumerated for each sub-expression index contained within the
192/submatches/ array; if `submatches[0]` is -1, then the first string enumerated for
193each match will be all of the text from end of the last match to the start
194of the current match, in addition there will be one extra string enumerated when
195no more matches can be found: from the end of the last match found, to
196the end of the underlying sequence.
197
198[*Throws]: `std::runtime_error` if the complexity of matching the expression
199against an N character string begins to exceed O(N[super 2]), or if the
200program runs out of stack space while matching the expression (if Boost.Regex
201is configured in recursive mode), or if the matcher exhausts its
202permitted memory allocation (if Boost.Regex is configured in non-recursive mode).
203
204[#boost_regex.regex_token_iterator.construct5]
205
206   regex_token_iterator(const regex_token_iterator& that);
207
208[*Effects]: constructs a copy of `that`.
209
210[*Postconditions]: `*this == that`.
211
212[#boost_regex.regex_token_iterator.assign]
213
214   regex_token_iterator& operator=(const regex_token_iterator& that);
215
216[*Effects]: sets `*this` to be equal to `that`.
217
218[*Postconditions]: `*this == that`.
219
220[#boost_regex.regex_token_iterator.op_eq]
221
222   bool operator==(const regex_token_iterator&)const;
223
224[*Effects]: returns true if `*this` is the same position as `that`.
225
226[#boost_regex.regex_token_iterator.op_ne]
227
228   bool operator!=(const regex_token_iterator&)const;
229
230[*Effects]: returns `!(*this == that)`.
231
232[#boost_regex.regex_token_iterator.op_deref]
233
234   const value_type& operator*()const;
235
236[*Effects]: returns the current character sequence being enumerated.
237
238[#boost_regex.regex_token_iterator.op_arrow]
239
240   const value_type* operator->()const;
241
242[*Effects]: returns `&(*this)`.
243
244[#boost_regex.regex_token_iterator.op_inc1]
245
246   regex_token_iterator& operator++();
247
248[*Effects]: Moves on to the next character sequence to be enumerated.
249
250[*Throws]: `std::runtime_error` if the complexity of matching the expression
251against an N character string begins to exceed O(N[super 2]), or if the program
252runs out of stack space while matching the expression (if Boost.Regex is
253configured in recursive mode), or if the matcher exhausts its permitted
254memory allocation (if Boost.Regex is configured in non-recursive mode).
255
256[*Returns]: `*this`.
257
258[#boost_regex.regex_token_iterator.op_inc2]
259
260   regex_token_iterator& operator++(int);
261
262[*Effects]: constructs a copy result of `*this`, then calls `++(*this)`.
263
264[*Returns]: result.
265
266[#boost_regex.regex_token_iterator.make]
267
268   template <class charT, class traits>
269   regex_token_iterator<const charT*, charT, traits>
270      make_regex_token_iterator(
271            const charT* p,
272            const basic_regex<charT, traits>& e,
273            int submatch = 0,
274            regex_constants::match_flag_type m = regex_constants::match_default);
275
276   template <class charT, class traits, class ST, class SA>
277   regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits>
278      make_regex_token_iterator(
279            const std::basic_string<charT, ST, SA>& p,
280            const basic_regex<charT, traits>& e,
281            int submatch = 0,
282            regex_constants::match_flag_type m = regex_constants::match_default);
283
284   template <class charT, class traits, std::size_t N>
285   regex_token_iterator<const charT*, charT, traits>
286   make_regex_token_iterator(
287            const charT* p,
288            const basic_regex<charT, traits>& e,
289            const int (&submatch)[N],
290            regex_constants::match_flag_type m = regex_constants::match_default);
291
292   template <class charT, class traits, class ST, class SA, std::size_t N>
293   regex_token_iterator<
294         typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits>
295      make_regex_token_iterator(
296            const std::basic_string<charT, ST, SA>& p,
297            const basic_regex<charT, traits>& e,
298            const int (&submatch)[N],
299            regex_constants::match_flag_type m = regex_constants::match_default);
300
301   template <class charT, class traits>
302   regex_token_iterator<const charT*, charT, traits>
303      make_regex_token_iterator(
304            const charT* p,
305            const basic_regex<charT, traits>& e,
306            const std::vector<int>& submatch,
307            regex_constants::match_flag_type m = regex_constants::match_default);
308
309   template <class charT, class traits, class ST, class SA>
310   regex_token_iterator<
311         typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits>
312      make_regex_token_iterator(
313            const std::basic_string<charT, ST, SA>& p,
314            const basic_regex<charT, traits>& e,
315            const std::vector<int>& submatch,
316            regex_constants::match_flag_type m = regex_constants::match_default);
317
318[*Effects]: returns a [regex_token_iterator] that enumerates one [sub_match]
319for each value in /submatch/ for each occurrence of regular expression /e/
320in string /p/, matched using [match_flag_type] /m/.
321
322[h4 Examples]
323
324The following example takes a string and splits it into a series of tokens:
325
326   #include <iostream>
327   #include <boost/regex.hpp>
328
329   using namespace std;
330
331   int main(int argc)
332   {
333      string s;
334      do{
335         if(argc == 1)
336         {
337            cout << "Enter text to split (or \"quit\" to exit): ";
338            getline(cin, s);
339            if(s == "quit") break;
340         }
341         else
342            s = "This is a string of tokens";
343
344         boost::regex re("\\s+");
345         boost::sregex_token_iterator i(s.begin(), s.end(), re, -1);
346         boost::sregex_token_iterator j;
347
348         unsigned count = 0;
349         while(i != j)
350         {
351            cout << *i++ << endl;
352            count++;
353         }
354         cout << "There were " << count << " tokens found." << endl;
355
356      }while(argc == 1);
357      return 0;
358   }
359
360
361The following example takes a html file and outputs a list of all the linked files:
362
363   #include <fstream>
364   #include <iostream>
365   #include <iterator>
366   #include <boost/regex.hpp>
367
368   boost::regex e("<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\"",
369                  boost::regex::normal | boost::regbase::icase);
370
371   void load_file(std::string& s, std::istream& is)
372   {
373      s.erase();
374      //
375      // attempt to grow string buffer to match file size,
376      // this doesn't always work...
377      s.reserve(is.rdbuf()->in_avail());
378      char c;
379      while(is.get(c))
380      {
381         // use logarithmic growth strategy, in case
382         // in_avail (above) returned zero:
383         if(s.capacity() == s.size())
384            s.reserve(s.capacity() * 3);
385         s.append(1, c);
386      }
387   }
388
389   int main(int argc, char** argv)
390   {
391      std::string s;
392      int i;
393      for(i = 1; i < argc; ++i)
394      {
395         std::cout << "Findings URL's in " << argv[i] << ":" << std::endl;
396         s.erase();
397         std::ifstream is(argv[i]);
398         load_file(s, is);
399         boost::sregex_token_iterator i(s.begin(), s.end(), e, 1);
400         boost::sregex_token_iterator j;
401         while(i != j)
402         {
403            std::cout << *i++ << std::endl;
404         }
405      }
406      //
407      // alternative method:
408      // test the array-literal constructor, and split out the whole
409      // match as well as $1....
410      //
411      for(i = 1; i < argc; ++i)
412      {
413         std::cout << "Findings URL's in " << argv[i] << ":" << std::endl;
414         s.erase();
415         std::ifstream is(argv[i]);
416         load_file(s, is);
417         const int subs[] = {1, 0,};
418         boost::sregex_token_iterator i(s.begin(), s.end(), e, subs);
419         boost::sregex_token_iterator j;
420         while(i != j)
421         {
422            std::cout << *i++ << std::endl;
423         }
424      }
425
426      return 0;
427   }
428
429
430[endsect]
431
432