1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/strings/string_split.h"
6
7 #include <stddef.h>
8
9 #include "base/logging.h"
10 #include "base/strings/string_util.h"
11 #include "base/third_party/icu/icu_utf.h"
12
13 namespace base {
14
15 namespace {
16
17 // Returns either the ASCII or UTF-16 whitespace.
18 template <typename char_type>
19 std::basic_string_view<char_type> WhitespaceForType();
20 template <>
WhitespaceForType()21 std::u16string_view WhitespaceForType<char16_t>() {
22 return kWhitespaceUTF16;
23 }
24 template <>
WhitespaceForType()25 std::string_view WhitespaceForType<char>() {
26 return kWhitespaceASCII;
27 }
28
29 // Optimize the single-character case to call find() on the string instead,
30 // since this is the common case and can be made faster. This could have been
31 // done with template specialization too, but would have been less clear.
32 //
33 // There is no corresponding FindFirstNotOf because std::string_view already
34 // implements these different versions that do the optimized searching.
FindFirstOf(std::string_view piece,char c,size_t pos)35 size_t FindFirstOf(std::string_view piece, char c, size_t pos) {
36 return piece.find(c, pos);
37 }
FindFirstOf(std::u16string_view piece,char16_t c,size_t pos)38 size_t FindFirstOf(std::u16string_view piece, char16_t c, size_t pos) {
39 return piece.find(c, pos);
40 }
FindFirstOf(std::string_view piece,std::string_view one_of,size_t pos)41 size_t FindFirstOf(std::string_view piece,
42 std::string_view one_of,
43 size_t pos) {
44 return piece.find_first_of(one_of, pos);
45 }
FindFirstOf(std::u16string_view piece,std::u16string_view one_of,size_t pos)46 size_t FindFirstOf(std::u16string_view piece,
47 std::u16string_view one_of,
48 size_t pos) {
49 return piece.find_first_of(one_of, pos);
50 }
51
52 // General string splitter template. Can take 8- or 16-bit input, can produce
53 // the corresponding string or std::string_view output, and can take single- or
54 // multiple-character delimiters.
55 //
56 // DelimiterType is either a character (Str::value_type) or a string piece of
57 // multiple characters (std::basic_string_view<char>). std::string_view has a
58 // version of find for both of these cases, and the single-character version is
59 // the most common and can be implemented faster, which is why this is a
60 // template.
61 template <typename char_type, typename OutputStringType, typename DelimiterType>
SplitStringT(std::basic_string_view<char_type> str,DelimiterType delimiter,WhitespaceHandling whitespace,SplitResult result_type)62 static std::vector<OutputStringType> SplitStringT(
63 std::basic_string_view<char_type> str,
64 DelimiterType delimiter,
65 WhitespaceHandling whitespace,
66 SplitResult result_type) {
67 std::vector<OutputStringType> result;
68 if (str.empty())
69 return result;
70
71 using ViewType = std::basic_string_view<char_type>;
72
73 size_t start = 0;
74 while (start != ViewType::npos) {
75 size_t end = FindFirstOf(str, delimiter, start);
76
77 ViewType piece;
78 if (end == ViewType::npos) {
79 piece = str.substr(start);
80 start = ViewType::npos;
81 } else {
82 piece = str.substr(start, end - start);
83 start = end + 1;
84 }
85
86 if (whitespace == TRIM_WHITESPACE)
87 piece = TrimString(piece, WhitespaceForType<char_type>(), TRIM_ALL);
88
89 if (result_type == SPLIT_WANT_ALL || !piece.empty())
90 result.emplace_back(piece);
91 }
92 return result;
93 }
94
AppendStringKeyValue(std::string_view input,char delimiter,StringPairs * result)95 bool AppendStringKeyValue(std::string_view input,
96 char delimiter,
97 StringPairs* result) {
98 // Always append a new item regardless of success (it might be empty). The
99 // below code will copy the strings directly into the result pair.
100 result->resize(result->size() + 1);
101 auto& result_pair = result->back();
102
103 // Find the delimiter.
104 size_t end_key_pos = input.find_first_of(delimiter);
105 if (end_key_pos == std::string::npos) {
106 return false; // No delimiter.
107 }
108 result_pair.first.assign(input.substr(0, end_key_pos));
109
110 // Find the value string.
111 std::string_view remains =
112 input.substr(end_key_pos, input.size() - end_key_pos);
113 size_t begin_value_pos = remains.find_first_not_of(delimiter);
114 if (begin_value_pos == std::string_view::npos) {
115 return false; // No value.
116 }
117 result_pair.second.assign(
118 remains.substr(begin_value_pos, remains.size() - begin_value_pos));
119
120 return true;
121 }
122
123 template <typename char_type, typename OutputStringType>
SplitStringUsingSubstrT(std::basic_string_view<char_type> input,std::basic_string_view<char_type> delimiter,WhitespaceHandling whitespace,SplitResult result_type,std::vector<OutputStringType> * result)124 void SplitStringUsingSubstrT(std::basic_string_view<char_type> input,
125 std::basic_string_view<char_type> delimiter,
126 WhitespaceHandling whitespace,
127 SplitResult result_type,
128 std::vector<OutputStringType>* result) {
129 using Piece = std::basic_string_view<char_type>;
130 using size_type = typename Piece::size_type;
131
132 result->clear();
133 for (size_type begin_index = 0, end_index = 0; end_index != Piece::npos;
134 begin_index = end_index + delimiter.size()) {
135 end_index = input.find(delimiter, begin_index);
136 Piece term = end_index == Piece::npos
137 ? input.substr(begin_index)
138 : input.substr(begin_index, end_index - begin_index);
139
140 if (whitespace == TRIM_WHITESPACE)
141 term = TrimString(term, WhitespaceForType<char_type>(), TRIM_ALL);
142
143 if (result_type == SPLIT_WANT_ALL || !term.empty())
144 result->emplace_back(term);
145 }
146 }
147
148 } // namespace
149
SplitString(std::string_view input,std::string_view separators,WhitespaceHandling whitespace,SplitResult result_type)150 std::vector<std::string> SplitString(std::string_view input,
151 std::string_view separators,
152 WhitespaceHandling whitespace,
153 SplitResult result_type) {
154 if (separators.size() == 1) {
155 return SplitStringT<char, std::string, char>(input, separators[0],
156 whitespace, result_type);
157 }
158 return SplitStringT<char, std::string, std::string_view>(
159 input, separators, whitespace, result_type);
160 }
161
SplitString(std::u16string_view input,std::u16string_view separators,WhitespaceHandling whitespace,SplitResult result_type)162 std::vector<std::u16string> SplitString(std::u16string_view input,
163 std::u16string_view separators,
164 WhitespaceHandling whitespace,
165 SplitResult result_type) {
166 if (separators.size() == 1) {
167 return SplitStringT<char16_t, std::u16string, char16_t>(
168 input, separators[0], whitespace, result_type);
169 }
170 return SplitStringT<char16_t, std::u16string, std::u16string_view>(
171 input, separators, whitespace, result_type);
172 }
173
SplitStringPiece(std::string_view input,std::string_view separators,WhitespaceHandling whitespace,SplitResult result_type)174 std::vector<std::string_view> SplitStringPiece(std::string_view input,
175 std::string_view separators,
176 WhitespaceHandling whitespace,
177 SplitResult result_type) {
178 if (separators.size() == 1) {
179 return SplitStringT<char, std::string_view, char>(input, separators[0],
180 whitespace, result_type);
181 }
182 return SplitStringT<char, std::string_view, std::string_view>(
183 input, separators, whitespace, result_type);
184 }
185
SplitStringPiece(std::u16string_view input,std::u16string_view separators,WhitespaceHandling whitespace,SplitResult result_type)186 std::vector<std::u16string_view> SplitStringPiece(
187 std::u16string_view input,
188 std::u16string_view separators,
189 WhitespaceHandling whitespace,
190 SplitResult result_type) {
191 if (separators.size() == 1) {
192 return SplitStringT<char16_t, std::u16string_view, char16_t>(
193 input, separators[0], whitespace, result_type);
194 }
195 return SplitStringT<char16_t, std::u16string_view, std::u16string_view>(
196 input, separators, whitespace, result_type);
197 }
198
SplitStringIntoKeyValuePairs(std::string_view input,char key_value_delimiter,char key_value_pair_delimiter,StringPairs * key_value_pairs)199 bool SplitStringIntoKeyValuePairs(std::string_view input,
200 char key_value_delimiter,
201 char key_value_pair_delimiter,
202 StringPairs* key_value_pairs) {
203 key_value_pairs->clear();
204
205 std::vector<std::string_view> pairs =
206 SplitStringPiece(input, std::string(1, key_value_pair_delimiter),
207 TRIM_WHITESPACE, SPLIT_WANT_NONEMPTY);
208 key_value_pairs->reserve(pairs.size());
209
210 bool success = true;
211 for (const std::string_view& pair : pairs) {
212 if (!AppendStringKeyValue(pair, key_value_delimiter, key_value_pairs)) {
213 // Don't return here, to allow for pairs without associated
214 // value or key; just record that the split failed.
215 success = false;
216 }
217 }
218 return success;
219 }
220
SplitStringUsingSubstr(std::u16string_view input,std::u16string_view delimiter,WhitespaceHandling whitespace,SplitResult result_type)221 std::vector<std::u16string> SplitStringUsingSubstr(
222 std::u16string_view input,
223 std::u16string_view delimiter,
224 WhitespaceHandling whitespace,
225 SplitResult result_type) {
226 std::vector<std::u16string> result;
227 SplitStringUsingSubstrT(input, delimiter, whitespace, result_type, &result);
228 return result;
229 }
230
SplitStringUsingSubstr(std::string_view input,std::string_view delimiter,WhitespaceHandling whitespace,SplitResult result_type)231 std::vector<std::string> SplitStringUsingSubstr(std::string_view input,
232 std::string_view delimiter,
233 WhitespaceHandling whitespace,
234 SplitResult result_type) {
235 std::vector<std::string> result;
236 SplitStringUsingSubstrT(input, delimiter, whitespace, result_type, &result);
237 return result;
238 }
239
SplitStringPieceUsingSubstr(std::u16string_view input,std::u16string_view delimiter,WhitespaceHandling whitespace,SplitResult result_type)240 std::vector<std::u16string_view> SplitStringPieceUsingSubstr(
241 std::u16string_view input,
242 std::u16string_view delimiter,
243 WhitespaceHandling whitespace,
244 SplitResult result_type) {
245 std::vector<std::u16string_view> result;
246 SplitStringUsingSubstrT(input, delimiter, whitespace, result_type, &result);
247 return result;
248 }
249
SplitStringPieceUsingSubstr(std::string_view input,std::string_view delimiter,WhitespaceHandling whitespace,SplitResult result_type)250 std::vector<std::string_view> SplitStringPieceUsingSubstr(
251 std::string_view input,
252 std::string_view delimiter,
253 WhitespaceHandling whitespace,
254 SplitResult result_type) {
255 std::vector<std::string_view> result;
256 SplitStringUsingSubstrT(input, delimiter, whitespace, result_type, &result);
257 return result;
258 }
259
260 } // namespace base
261