• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2015, Intel Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without modification,
6  * are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice, this
9  * list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation and/or
13  * other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its contributors
16  * may be used to endorse or promote products derived from this software without
17  * specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
23  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 #pragma once
31 
32 #include <string>
33 #include <vector>
34 
35 /** Tokenizer class
36  *
37  * Must be initialized with a string to be tokenized and, optionally, a string
38  * of delimiters (@see Tokenizer::defaultDelimiters).
39  *
40  * Multiple consecutive delimiters (even if different) are considered as a
41  * single one. As a result, there can't be empty tokens.
42  */
43 class Tokenizer
44 {
45 public:
46     /** Constructs a Tokenizer
47      *
48      * @param[in] input The string to be tokenized
49      * @param[in] delimiters A string containing all the token delimiters
50      *            (hence, each delimiter can only be a single character)
51      */
52     Tokenizer(const std::string &input, const std::string &delimiters=defaultDelimiters);
~Tokenizer()53     ~Tokenizer() {};
54 
55     /** Return the next token or an empty string if no more token
56      *
57      * Multiple consecutive delimiters are considered as a single one - i.e.
58      * "a     bc d   " will be tokenized as ("a", "bc", "d") if the delimiter
59      * is ' '.
60      */
61     std::string next();
62 
63     /** Return a vector of all tokens
64      */
65     std::vector<std::string> split();
66 
67     /** Default list of delimiters (" \n\r\t\v\f") */
68     static const std::string defaultDelimiters;
69 
70 private:
71     const std::string _input; //< string to be tokenized
72     const std::string _delimiters; //< token delimiters
73 
74     std::string::size_type _position; //< end of the last returned token
75 };
76