• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //  (C) Copyright Jeremy Siek 2004
2 //  Distributed under the Boost Software License, Version 1.0. (See
3 //  accompanying file LICENSE_1_0.txt or copy at
4 //  http://www.boost.org/LICENSE_1_0.txt)
5 
6 #ifndef BOOST_STRINGTOK_HPP
7 #define BOOST_STRINGTOK_HPP
8 
9 /*
10  * stringtok.hpp -- Breaks a string into tokens.  This is an example for lib3.
11  *
12  * Template function looks like this:
13  *
14  *    template <typename Container>
15  *    void stringtok (Container &l,
16  *                    string const &s,
17  *                    char const * const ws = " \t\n");
18  *
19  * A nondestructive version of strtok() that handles its own memory and can
20  * be broken up by any character(s).  Does all the work at once rather than
21  * in an invocation loop like strtok() requires.
22  *
23  * Container is any type that supports push_back(a_string), although using
24  * list<string> and deque<string> are indicated due to their O(1) push_back.
25  * (I prefer deque<> because op[]/at() is available as well.)  The first
26  * parameter references an existing Container.
27  *
28  * s is the string to be tokenized.  From the parameter declaration, it can
29  * be seen that s is not affected.  Since references-to-const may refer to
30  * temporaries, you could use stringtok(some_container, readline("")) when
31  * using the GNU readline library.
32  *
33  * The final parameter is an array of characters that serve as whitespace.
34  * Whitespace characters default to one or more of tab, space, and newline,
35  * in any combination.
36  *
37  * 'l' need not be empty on entry.  On return, 'l' will have the token
38  * strings appended.
39  *
40  *
41  * [Example:
42  *       list<string>       ls;
43  *       stringtok (ls, " this  \t is\t\n  a test  ");
44  *       for (list<string>::const_iterator i = ls.begin();
45  *            i != ls.end(); ++i)
46  *       {
47  *            cerr << ':' << (*i) << ":\n";
48  *       }
49  *
50  *  would print
51  *       :this:
52  *       :is:
53  *       :a:
54  *       :test:
55  * -end example]
56  *
57  * pedwards@jaj.com  May 1999
58  */
59 
60 #include <string>
61 #include <cstring> // for strchr
62 
63 /*****************************************************************
64  * This is the only part of the implementation that I don't like.
65  * It can probably be improved upon by the reader...
66  */
67 
isws(char c,char const * const wstr)68 inline bool isws(char c, char const* const wstr)
69 {
70     using namespace std;
71     return (strchr(wstr, c) != NULL);
72 }
73 
74 namespace boost
75 {
76 
77 /*****************************************************************
78  * Simplistic and quite Standard, but a bit slow.  This should be
79  * templatized on basic_string instead, or on a more generic StringT
80  * that just happens to support ::size_type, .substr(), and so on.
81  * I had hoped that "whitespace" would be a trait, but it isn't, so
82  * the user must supply it.  Enh, this lets them break up strings on
83  * different things easier than traits would anyhow.
84  */
85 template < typename Container >
stringtok(Container & l,std::string const & s,char const * const ws=" \\t\\n")86 void stringtok(
87     Container& l, std::string const& s, char const* const ws = " \t\n")
88 {
89     typedef std::string::size_type size_type;
90     const size_type S = s.size();
91     size_type i = 0;
92 
93     while (i < S)
94     {
95         // eat leading whitespace
96         while ((i < S) && (isws(s[i], ws)))
97             ++i;
98         if (i == S)
99             return; // nothing left but WS
100 
101         // find end of word
102         size_type j = i + 1;
103         while ((j < S) && (!isws(s[j], ws)))
104             ++j;
105 
106         // add word
107         l.push_back(s.substr(i, j - i));
108 
109         // set up for next loop
110         i = j + 1;
111     }
112 }
113 
114 } // namespace boost
115 
116 #endif // BOOST_STRINGTOK_HPP
117