1 /*
2 * Copyright (c) 2015, Intel Corporation
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without modification,
6 * are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice, this
9 * list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation and/or
13 * other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its contributors
16 * may be used to endorse or promote products derived from this software without
17 * specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
23 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30 #include "Tokenizer.h"
31
32 using std::string;
33 using std::vector;
34
35 const string Tokenizer::defaultDelimiters = " \n\r\t\v\f";
36
Tokenizer(const string & input,const string & delimiters,bool mergeDelimiters)37 Tokenizer::Tokenizer(const string &input, const string &delimiters, bool mergeDelimiters)
38 : _input(input), _delimiters(delimiters), _mergeDelimiters(mergeDelimiters)
39 {
40 }
41
split()42 vector<string> Tokenizer::split()
43 {
44 vector<string> result;
45 string token;
46 bool leftover = false;
47
48 for (const auto character : _input) {
49 if (_delimiters.find(character) != string::npos) {
50 if (_mergeDelimiters) {
51 leftover = false;
52 if (token.empty()) {
53 // skip consecutive delimiters
54 continue;
55 }
56 } else {
57 // We've encountered a delimiter, which means that there is a
58 // left-hand token and a right-side token. We are going to add
59 // the left-hand one but must not forget that there is a
60 // right-hand one (possibly empty)
61 leftover = true;
62 }
63
64 result.push_back(token);
65 token.clear();
66 continue;
67 }
68 token += character;
69 leftover = true;
70 }
71
72 // push any leftover token:
73 if (leftover) {
74 result.push_back(token);
75 }
76
77 return result;
78 }
79