1 // Copyright 2020 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "absl/strings/internal/str_format/parser.h"
16
17 #include <assert.h>
18 #include <string.h>
19 #include <wchar.h>
20 #include <cctype>
21 #include <cstdint>
22
23 #include <algorithm>
24 #include <initializer_list>
25 #include <limits>
26 #include <ostream>
27 #include <string>
28 #include <unordered_set>
29
30 namespace absl {
31 ABSL_NAMESPACE_BEGIN
32 namespace str_format_internal {
33
34 using CC = FormatConversionCharInternal;
35 using LM = LengthMod;
36
37 // Abbreviations to fit in the table below.
38 constexpr auto f_sign = Flags::kSignCol;
39 constexpr auto f_alt = Flags::kAlt;
40 constexpr auto f_pos = Flags::kShowPos;
41 constexpr auto f_left = Flags::kLeft;
42 constexpr auto f_zero = Flags::kZero;
43
44 ABSL_CONST_INIT const ConvTag kTags[256] = {
45 {}, {}, {}, {}, {}, {}, {}, {}, // 00-07
46 {}, {}, {}, {}, {}, {}, {}, {}, // 08-0f
47 {}, {}, {}, {}, {}, {}, {}, {}, // 10-17
48 {}, {}, {}, {}, {}, {}, {}, {}, // 18-1f
49 f_sign, {}, {}, f_alt, {}, {}, {}, {}, // !"#$%&'
50 {}, {}, {}, f_pos, {}, f_left, {}, {}, // ()*+,-./
51 f_zero, {}, {}, {}, {}, {}, {}, {}, // 01234567
52 {}, {}, {}, {}, {}, {}, {}, {}, // 89:;<=>?
53 {}, CC::A, {}, {}, {}, CC::E, CC::F, CC::G, // @ABCDEFG
54 {}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO
55 {}, {}, {}, {}, {}, {}, {}, {}, // PQRSTUVW
56 CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_
57 {}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg
58 LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno
59 CC::p, LM::q, {}, CC::s, LM::t, CC::u, {}, {}, // pqrstuvw
60 CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}!
61 {}, {}, {}, {}, {}, {}, {}, {}, // 80-87
62 {}, {}, {}, {}, {}, {}, {}, {}, // 88-8f
63 {}, {}, {}, {}, {}, {}, {}, {}, // 90-97
64 {}, {}, {}, {}, {}, {}, {}, {}, // 98-9f
65 {}, {}, {}, {}, {}, {}, {}, {}, // a0-a7
66 {}, {}, {}, {}, {}, {}, {}, {}, // a8-af
67 {}, {}, {}, {}, {}, {}, {}, {}, // b0-b7
68 {}, {}, {}, {}, {}, {}, {}, {}, // b8-bf
69 {}, {}, {}, {}, {}, {}, {}, {}, // c0-c7
70 {}, {}, {}, {}, {}, {}, {}, {}, // c8-cf
71 {}, {}, {}, {}, {}, {}, {}, {}, // d0-d7
72 {}, {}, {}, {}, {}, {}, {}, {}, // d8-df
73 {}, {}, {}, {}, {}, {}, {}, {}, // e0-e7
74 {}, {}, {}, {}, {}, {}, {}, {}, // e8-ef
75 {}, {}, {}, {}, {}, {}, {}, {}, // f0-f7
76 {}, {}, {}, {}, {}, {}, {}, {}, // f8-ff
77 };
78
79 namespace {
80
CheckFastPathSetting(const UnboundConversion & conv)81 bool CheckFastPathSetting(const UnboundConversion& conv) {
82 bool width_precision_needed =
83 conv.width.value() >= 0 || conv.precision.value() >= 0;
84 if (width_precision_needed && conv.flags == Flags::kBasic) {
85 fprintf(stderr,
86 "basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d "
87 "width=%d precision=%d\n",
88 conv.flags == Flags::kBasic ? 1 : 0,
89 FlagsContains(conv.flags, Flags::kLeft) ? 1 : 0,
90 FlagsContains(conv.flags, Flags::kShowPos) ? 1 : 0,
91 FlagsContains(conv.flags, Flags::kSignCol) ? 1 : 0,
92 FlagsContains(conv.flags, Flags::kAlt) ? 1 : 0,
93 FlagsContains(conv.flags, Flags::kZero) ? 1 : 0, conv.width.value(),
94 conv.precision.value());
95 return false;
96 }
97 return true;
98 }
99
100 template <bool is_positional>
ConsumeConversion(const char * pos,const char * const end,UnboundConversion * conv,int * next_arg)101 const char *ConsumeConversion(const char *pos, const char *const end,
102 UnboundConversion *conv, int *next_arg) {
103 const char* const original_pos = pos;
104 char c;
105 // Read the next char into `c` and update `pos`. Returns false if there are
106 // no more chars to read.
107 #define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR() \
108 do { \
109 if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \
110 c = *pos++; \
111 } while (0)
112
113 const auto parse_digits = [&] {
114 int digits = c - '0';
115 // We do not want to overflow `digits` so we consume at most digits10
116 // digits. If there are more digits the parsing will fail later on when the
117 // digit doesn't match the expected characters.
118 int num_digits = std::numeric_limits<int>::digits10;
119 for (;;) {
120 if (ABSL_PREDICT_FALSE(pos == end)) break;
121 c = *pos++;
122 if (!std::isdigit(c)) break;
123 --num_digits;
124 if (ABSL_PREDICT_FALSE(!num_digits)) break;
125 digits = 10 * digits + c - '0';
126 }
127 return digits;
128 };
129
130 if (is_positional) {
131 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
132 if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
133 conv->arg_position = parse_digits();
134 assert(conv->arg_position > 0);
135 if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
136 }
137
138 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
139
140 // We should start with the basic flag on.
141 assert(conv->flags == Flags::kBasic);
142
143 // Any non alpha character makes this conversion not basic.
144 // This includes flags (-+ #0), width (1-9, *) or precision (.).
145 // All conversion characters and length modifiers are alpha characters.
146 if (c < 'A') {
147 while (c <= '0') {
148 auto tag = GetTagForChar(c);
149 if (tag.is_flags()) {
150 conv->flags = conv->flags | tag.as_flags();
151 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
152 } else {
153 break;
154 }
155 }
156
157 if (c <= '9') {
158 if (c >= '0') {
159 int maybe_width = parse_digits();
160 if (!is_positional && c == '$') {
161 if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr;
162 // Positional conversion.
163 *next_arg = -1;
164 return ConsumeConversion<true>(original_pos, end, conv, next_arg);
165 }
166 conv->flags = conv->flags | Flags::kNonBasic;
167 conv->width.set_value(maybe_width);
168 } else if (c == '*') {
169 conv->flags = conv->flags | Flags::kNonBasic;
170 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
171 if (is_positional) {
172 if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
173 conv->width.set_from_arg(parse_digits());
174 if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
175 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
176 } else {
177 conv->width.set_from_arg(++*next_arg);
178 }
179 }
180 }
181
182 if (c == '.') {
183 conv->flags = conv->flags | Flags::kNonBasic;
184 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
185 if (std::isdigit(c)) {
186 conv->precision.set_value(parse_digits());
187 } else if (c == '*') {
188 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
189 if (is_positional) {
190 if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
191 conv->precision.set_from_arg(parse_digits());
192 if (c != '$') return nullptr;
193 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
194 } else {
195 conv->precision.set_from_arg(++*next_arg);
196 }
197 } else {
198 conv->precision.set_value(0);
199 }
200 }
201 }
202
203 auto tag = GetTagForChar(c);
204
205 if (ABSL_PREDICT_FALSE(!tag.is_conv())) {
206 if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr;
207
208 // It is a length modifier.
209 using str_format_internal::LengthMod;
210 LengthMod length_mod = tag.as_length();
211 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
212 if (c == 'h' && length_mod == LengthMod::h) {
213 conv->length_mod = LengthMod::hh;
214 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
215 } else if (c == 'l' && length_mod == LengthMod::l) {
216 conv->length_mod = LengthMod::ll;
217 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
218 } else {
219 conv->length_mod = length_mod;
220 }
221 tag = GetTagForChar(c);
222 if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr;
223 }
224
225 assert(CheckFastPathSetting(*conv));
226 (void)(&CheckFastPathSetting);
227
228 conv->conv = tag.as_conv();
229 if (!is_positional) conv->arg_position = ++*next_arg;
230 return pos;
231 }
232
233 } // namespace
234
LengthModToString(LengthMod v)235 std::string LengthModToString(LengthMod v) {
236 switch (v) {
237 case LengthMod::h:
238 return "h";
239 case LengthMod::hh:
240 return "hh";
241 case LengthMod::l:
242 return "l";
243 case LengthMod::ll:
244 return "ll";
245 case LengthMod::L:
246 return "L";
247 case LengthMod::j:
248 return "j";
249 case LengthMod::z:
250 return "z";
251 case LengthMod::t:
252 return "t";
253 case LengthMod::q:
254 return "q";
255 case LengthMod::none:
256 return "";
257 }
258 return "";
259 }
260
ConsumeUnboundConversion(const char * p,const char * end,UnboundConversion * conv,int * next_arg)261 const char *ConsumeUnboundConversion(const char *p, const char *end,
262 UnboundConversion *conv, int *next_arg) {
263 if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg);
264 return ConsumeConversion<false>(p, end, conv, next_arg);
265 }
266
267 struct ParsedFormatBase::ParsedFormatConsumer {
ParsedFormatConsumerabsl::str_format_internal::ParsedFormatBase::ParsedFormatConsumer268 explicit ParsedFormatConsumer(ParsedFormatBase *parsedformat)
269 : parsed(parsedformat), data_pos(parsedformat->data_.get()) {}
270
Appendabsl::str_format_internal::ParsedFormatBase::ParsedFormatConsumer271 bool Append(string_view s) {
272 if (s.empty()) return true;
273
274 size_t text_end = AppendText(s);
275
276 if (!parsed->items_.empty() && !parsed->items_.back().is_conversion) {
277 // Let's extend the existing text run.
278 parsed->items_.back().text_end = text_end;
279 } else {
280 // Let's make a new text run.
281 parsed->items_.push_back({false, text_end, {}});
282 }
283 return true;
284 }
285
ConvertOneabsl::str_format_internal::ParsedFormatBase::ParsedFormatConsumer286 bool ConvertOne(const UnboundConversion &conv, string_view s) {
287 size_t text_end = AppendText(s);
288 parsed->items_.push_back({true, text_end, conv});
289 return true;
290 }
291
AppendTextabsl::str_format_internal::ParsedFormatBase::ParsedFormatConsumer292 size_t AppendText(string_view s) {
293 memcpy(data_pos, s.data(), s.size());
294 data_pos += s.size();
295 return static_cast<size_t>(data_pos - parsed->data_.get());
296 }
297
298 ParsedFormatBase *parsed;
299 char* data_pos;
300 };
301
ParsedFormatBase(string_view format,bool allow_ignored,std::initializer_list<FormatConversionCharSet> convs)302 ParsedFormatBase::ParsedFormatBase(
303 string_view format, bool allow_ignored,
304 std::initializer_list<FormatConversionCharSet> convs)
305 : data_(format.empty() ? nullptr : new char[format.size()]) {
306 has_error_ = !ParseFormatString(format, ParsedFormatConsumer(this)) ||
307 !MatchesConversions(allow_ignored, convs);
308 }
309
MatchesConversions(bool allow_ignored,std::initializer_list<FormatConversionCharSet> convs) const310 bool ParsedFormatBase::MatchesConversions(
311 bool allow_ignored,
312 std::initializer_list<FormatConversionCharSet> convs) const {
313 std::unordered_set<int> used;
314 auto add_if_valid_conv = [&](int pos, char c) {
315 if (static_cast<size_t>(pos) > convs.size() ||
316 !Contains(convs.begin()[pos - 1], c))
317 return false;
318 used.insert(pos);
319 return true;
320 };
321 for (const ConversionItem &item : items_) {
322 if (!item.is_conversion) continue;
323 auto &conv = item.conv;
324 if (conv.precision.is_from_arg() &&
325 !add_if_valid_conv(conv.precision.get_from_arg(), '*'))
326 return false;
327 if (conv.width.is_from_arg() &&
328 !add_if_valid_conv(conv.width.get_from_arg(), '*'))
329 return false;
330 if (!add_if_valid_conv(conv.arg_position,
331 FormatConversionCharToChar(conv.conv)))
332 return false;
333 }
334 return used.size() == convs.size() || allow_ignored;
335 }
336
337 } // namespace str_format_internal
338 ABSL_NAMESPACE_END
339 } // namespace absl
340