1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/string_split.h"
6
7 #include "base/logging.h"
8 #include "base/string_util.h"
9 #include "base/third_party/icu/icu_utf.h"
10 #include "base/utf_string_conversions.h"
11
12 namespace base {
13
14 template<typename STR>
SplitStringT(const STR & str,const typename STR::value_type s,bool trim_whitespace,std::vector<STR> * r)15 static void SplitStringT(const STR& str,
16 const typename STR::value_type s,
17 bool trim_whitespace,
18 std::vector<STR>* r) {
19 size_t last = 0;
20 size_t i;
21 size_t c = str.size();
22 for (i = 0; i <= c; ++i) {
23 if (i == c || str[i] == s) {
24 size_t len = i - last;
25 STR tmp = str.substr(last, len);
26 if (trim_whitespace) {
27 STR t_tmp;
28 TrimWhitespace(tmp, TRIM_ALL, &t_tmp);
29 r->push_back(t_tmp);
30 } else {
31 r->push_back(tmp);
32 }
33 last = i + 1;
34 }
35 }
36 }
37
SplitString(const std::wstring & str,wchar_t c,std::vector<std::wstring> * r)38 void SplitString(const std::wstring& str,
39 wchar_t c,
40 std::vector<std::wstring>* r) {
41 SplitStringT(str, c, true, r);
42 }
43
44 #if !defined(WCHAR_T_IS_UTF16)
SplitString(const string16 & str,char16 c,std::vector<string16> * r)45 void SplitString(const string16& str,
46 char16 c,
47 std::vector<string16>* r) {
48 DCHECK(CBU16_IS_SINGLE(c));
49 SplitStringT(str, c, true, r);
50 }
51 #endif
52
SplitString(const std::string & str,char c,std::vector<std::string> * r)53 void SplitString(const std::string& str,
54 char c,
55 std::vector<std::string>* r) {
56 #if CHAR_MIN < 0
57 DCHECK(c >= 0);
58 #endif
59 DCHECK(c < 0x7F);
60 SplitStringT(str, c, true, r);
61 }
62
SplitStringIntoKeyValues(const std::string & line,char key_value_delimiter,std::string * key,std::vector<std::string> * values)63 bool SplitStringIntoKeyValues(
64 const std::string& line,
65 char key_value_delimiter,
66 std::string* key, std::vector<std::string>* values) {
67 key->clear();
68 values->clear();
69
70 // Find the key string.
71 size_t end_key_pos = line.find_first_of(key_value_delimiter);
72 if (end_key_pos == std::string::npos) {
73 DVLOG(1) << "cannot parse key from line: " << line;
74 return false; // no key
75 }
76 key->assign(line, 0, end_key_pos);
77
78 // Find the values string.
79 std::string remains(line, end_key_pos, line.size() - end_key_pos);
80 size_t begin_values_pos = remains.find_first_not_of(key_value_delimiter);
81 if (begin_values_pos == std::string::npos) {
82 DVLOG(1) << "cannot parse value from line: " << line;
83 return false; // no value
84 }
85 std::string values_string(remains, begin_values_pos,
86 remains.size() - begin_values_pos);
87
88 // Construct the values vector.
89 values->push_back(values_string);
90 return true;
91 }
92
SplitStringIntoKeyValuePairs(const std::string & line,char key_value_delimiter,char key_value_pair_delimiter,std::vector<std::pair<std::string,std::string>> * kv_pairs)93 bool SplitStringIntoKeyValuePairs(
94 const std::string& line,
95 char key_value_delimiter,
96 char key_value_pair_delimiter,
97 std::vector<std::pair<std::string, std::string> >* kv_pairs) {
98 kv_pairs->clear();
99
100 std::vector<std::string> pairs;
101 SplitString(line, key_value_pair_delimiter, &pairs);
102
103 bool success = true;
104 for (size_t i = 0; i < pairs.size(); ++i) {
105 // Empty pair. SplitStringIntoKeyValues is more strict about an empty pair
106 // line, so continue with the next pair.
107 if (pairs[i].empty())
108 continue;
109
110 std::string key;
111 std::vector<std::string> value;
112 if (!SplitStringIntoKeyValues(pairs[i],
113 key_value_delimiter,
114 &key, &value)) {
115 // Don't return here, to allow for keys without associated
116 // values; just record that our split failed.
117 success = false;
118 }
119 DCHECK_LE(value.size(), 1U);
120 kv_pairs->push_back(make_pair(key, value.empty()? "" : value[0]));
121 }
122 return success;
123 }
124
125 template <typename STR>
SplitStringUsingSubstrT(const STR & str,const STR & s,std::vector<STR> * r)126 static void SplitStringUsingSubstrT(const STR& str,
127 const STR& s,
128 std::vector<STR>* r) {
129 typename STR::size_type begin_index = 0;
130 while (true) {
131 const typename STR::size_type end_index = str.find(s, begin_index);
132 if (end_index == STR::npos) {
133 const STR term = str.substr(begin_index);
134 STR tmp;
135 TrimWhitespace(term, TRIM_ALL, &tmp);
136 r->push_back(tmp);
137 return;
138 }
139 const STR term = str.substr(begin_index, end_index - begin_index);
140 STR tmp;
141 TrimWhitespace(term, TRIM_ALL, &tmp);
142 r->push_back(tmp);
143 begin_index = end_index + s.size();
144 }
145 }
146
SplitStringUsingSubstr(const string16 & str,const string16 & s,std::vector<string16> * r)147 void SplitStringUsingSubstr(const string16& str,
148 const string16& s,
149 std::vector<string16>* r) {
150 SplitStringUsingSubstrT(str, s, r);
151 }
152
SplitStringUsingSubstr(const std::string & str,const std::string & s,std::vector<std::string> * r)153 void SplitStringUsingSubstr(const std::string& str,
154 const std::string& s,
155 std::vector<std::string>* r) {
156 SplitStringUsingSubstrT(str, s, r);
157 }
158
SplitStringDontTrim(const string16 & str,char16 c,std::vector<string16> * r)159 void SplitStringDontTrim(const string16& str,
160 char16 c,
161 std::vector<string16>* r) {
162 DCHECK(CBU16_IS_SINGLE(c));
163 SplitStringT(str, c, false, r);
164 }
165
SplitStringDontTrim(const std::string & str,char c,std::vector<std::string> * r)166 void SplitStringDontTrim(const std::string& str,
167 char c,
168 std::vector<std::string>* r) {
169 DCHECK(IsStringUTF8(str));
170 #if CHAR_MIN < 0
171 DCHECK(c >= 0);
172 #endif
173 DCHECK(c < 0x7F);
174 SplitStringT(str, c, false, r);
175 }
176
177 template<typename STR>
SplitStringAlongWhitespaceT(const STR & str,std::vector<STR> * result)178 void SplitStringAlongWhitespaceT(const STR& str, std::vector<STR>* result) {
179 const size_t length = str.length();
180 if (!length)
181 return;
182
183 bool last_was_ws = false;
184 size_t last_non_ws_start = 0;
185 for (size_t i = 0; i < length; ++i) {
186 switch (str[i]) {
187 // HTML 5 defines whitespace as: space, tab, LF, line tab, FF, or CR.
188 case L' ':
189 case L'\t':
190 case L'\xA':
191 case L'\xB':
192 case L'\xC':
193 case L'\xD':
194 if (!last_was_ws) {
195 if (i > 0) {
196 result->push_back(
197 str.substr(last_non_ws_start, i - last_non_ws_start));
198 }
199 last_was_ws = true;
200 }
201 break;
202
203 default: // Not a space character.
204 if (last_was_ws) {
205 last_was_ws = false;
206 last_non_ws_start = i;
207 }
208 break;
209 }
210 }
211 if (!last_was_ws) {
212 result->push_back(
213 str.substr(last_non_ws_start, length - last_non_ws_start));
214 }
215 }
216
SplitStringAlongWhitespace(const std::wstring & str,std::vector<std::wstring> * result)217 void SplitStringAlongWhitespace(const std::wstring& str,
218 std::vector<std::wstring>* result) {
219 SplitStringAlongWhitespaceT(str, result);
220 }
221
222 #if !defined(WCHAR_T_IS_UTF16)
SplitStringAlongWhitespace(const string16 & str,std::vector<string16> * result)223 void SplitStringAlongWhitespace(const string16& str,
224 std::vector<string16>* result) {
225 SplitStringAlongWhitespaceT(str, result);
226 }
227 #endif
228
SplitStringAlongWhitespace(const std::string & str,std::vector<std::string> * result)229 void SplitStringAlongWhitespace(const std::string& str,
230 std::vector<std::string>* result) {
231 SplitStringAlongWhitespaceT(str, result);
232 }
233
234 } // namespace base
235