1 // Copyright 2022 The Abseil Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_
16 #define ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_
17
18 #include <cassert>
19 #include <cstdint>
20 #include <cstdio>
21 #include <limits>
22
23 #include "absl/base/config.h"
24 #include "absl/base/const_init.h"
25 #include "absl/base/optimization.h"
26 #include "absl/strings/internal/str_format/extension.h"
27
28 namespace absl {
29 ABSL_NAMESPACE_BEGIN
30 namespace str_format_internal {
31
32 // The analyzed properties of a single specified conversion.
33 struct UnboundConversion {
34 // This is a user defined default constructor on purpose to skip the
35 // initialization of parts of the object that are not necessary.
UnboundConversionUnboundConversion36 UnboundConversion() {} // NOLINT
37
38 // This constructor is provided for the static checker. We don't want to do
39 // the unnecessary initialization in the normal case.
UnboundConversionUnboundConversion40 explicit constexpr UnboundConversion(absl::ConstInitType)
41 : arg_position{}, width{}, precision{} {}
42
43 class InputValue {
44 public:
set_valueUnboundConversion45 constexpr void set_value(int value) {
46 assert(value >= 0);
47 value_ = value;
48 }
valueUnboundConversion49 constexpr int value() const { return value_; }
50
51 // Marks the value as "from arg". aka the '*' format.
52 // Requires `value >= 1`.
53 // When set, is_from_arg() return true and get_from_arg() returns the
54 // original value.
55 // `value()`'s return value is unspecified in this state.
set_from_argUnboundConversion56 constexpr void set_from_arg(int value) {
57 assert(value > 0);
58 value_ = -value - 1;
59 }
is_from_argUnboundConversion60 constexpr bool is_from_arg() const { return value_ < -1; }
get_from_argUnboundConversion61 constexpr int get_from_arg() const {
62 assert(is_from_arg());
63 return -value_ - 1;
64 }
65
66 private:
67 int value_ = -1;
68 };
69
70 // No need to initialize. It will always be set in the parser.
71 int arg_position;
72
73 InputValue width;
74 InputValue precision;
75
76 Flags flags = Flags::kBasic;
77 LengthMod length_mod = LengthMod::none;
78 FormatConversionChar conv = FormatConversionCharInternal::kNone;
79 };
80
81 // Helper tag class for the table below.
82 // It allows fast `char -> ConversionChar/LengthMod/Flags` checking and
83 // conversions.
84 class ConvTag {
85 public:
ConvTag(FormatConversionChar conversion_char)86 constexpr ConvTag(FormatConversionChar conversion_char) // NOLINT
87 : tag_(static_cast<uint8_t>(conversion_char)) {}
ConvTag(LengthMod length_mod)88 constexpr ConvTag(LengthMod length_mod) // NOLINT
89 : tag_(0x80 | static_cast<uint8_t>(length_mod)) {}
ConvTag(Flags flags)90 constexpr ConvTag(Flags flags) // NOLINT
91 : tag_(0xc0 | static_cast<uint8_t>(flags)) {}
ConvTag()92 constexpr ConvTag() : tag_(0xFF) {}
93
is_conv()94 constexpr bool is_conv() const { return (tag_ & 0x80) == 0; }
is_length()95 constexpr bool is_length() const { return (tag_ & 0xC0) == 0x80; }
is_flags()96 constexpr bool is_flags() const { return (tag_ & 0xE0) == 0xC0; }
97
as_conv()98 constexpr FormatConversionChar as_conv() const {
99 assert(is_conv());
100 assert(!is_length());
101 assert(!is_flags());
102 return static_cast<FormatConversionChar>(tag_);
103 }
as_length()104 constexpr LengthMod as_length() const {
105 assert(!is_conv());
106 assert(is_length());
107 assert(!is_flags());
108 return static_cast<LengthMod>(tag_ & 0x3F);
109 }
as_flags()110 constexpr Flags as_flags() const {
111 assert(!is_conv());
112 assert(!is_length());
113 assert(is_flags());
114 return static_cast<Flags>(tag_ & 0x1F);
115 }
116
117 private:
118 uint8_t tag_;
119 };
120
121 struct ConvTagHolder {
122 using CC = FormatConversionCharInternal;
123 using LM = LengthMod;
124
125 // Abbreviations to fit in the table below.
126 static constexpr auto kFSign = Flags::kSignCol;
127 static constexpr auto kFAlt = Flags::kAlt;
128 static constexpr auto kFPos = Flags::kShowPos;
129 static constexpr auto kFLeft = Flags::kLeft;
130 static constexpr auto kFZero = Flags::kZero;
131
132 static constexpr ConvTag value[256] = {
133 {}, {}, {}, {}, {}, {}, {}, {}, // 00-07
134 {}, {}, {}, {}, {}, {}, {}, {}, // 08-0f
135 {}, {}, {}, {}, {}, {}, {}, {}, // 10-17
136 {}, {}, {}, {}, {}, {}, {}, {}, // 18-1f
137 kFSign, {}, {}, kFAlt, {}, {}, {}, {}, // !"#$%&'
138 {}, {}, {}, kFPos, {}, kFLeft, {}, {}, // ()*+,-./
139 kFZero, {}, {}, {}, {}, {}, {}, {}, // 01234567
140 {}, {}, {}, {}, {}, {}, {}, {}, // 89:;<=>?
141 {}, CC::A, {}, {}, {}, CC::E, CC::F, CC::G, // @ABCDEFG
142 {}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO
143 {}, {}, {}, {}, {}, {}, {}, {}, // PQRSTUVW
144 CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_
145 {}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg
146 LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno
147 CC::p, LM::q, {}, CC::s, LM::t, CC::u, CC::v, {}, // pqrstuvw
148 CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}!
149 {}, {}, {}, {}, {}, {}, {}, {}, // 80-87
150 {}, {}, {}, {}, {}, {}, {}, {}, // 88-8f
151 {}, {}, {}, {}, {}, {}, {}, {}, // 90-97
152 {}, {}, {}, {}, {}, {}, {}, {}, // 98-9f
153 {}, {}, {}, {}, {}, {}, {}, {}, // a0-a7
154 {}, {}, {}, {}, {}, {}, {}, {}, // a8-af
155 {}, {}, {}, {}, {}, {}, {}, {}, // b0-b7
156 {}, {}, {}, {}, {}, {}, {}, {}, // b8-bf
157 {}, {}, {}, {}, {}, {}, {}, {}, // c0-c7
158 {}, {}, {}, {}, {}, {}, {}, {}, // c8-cf
159 {}, {}, {}, {}, {}, {}, {}, {}, // d0-d7
160 {}, {}, {}, {}, {}, {}, {}, {}, // d8-df
161 {}, {}, {}, {}, {}, {}, {}, {}, // e0-e7
162 {}, {}, {}, {}, {}, {}, {}, {}, // e8-ef
163 {}, {}, {}, {}, {}, {}, {}, {}, // f0-f7
164 {}, {}, {}, {}, {}, {}, {}, {}, // f8-ff
165 };
166 };
167
168 // Keep a single table for all the conversion chars and length modifiers.
GetTagForChar(char c)169 constexpr ConvTag GetTagForChar(char c) {
170 return ConvTagHolder::value[static_cast<unsigned char>(c)];
171 }
172
CheckFastPathSetting(const UnboundConversion & conv)173 constexpr bool CheckFastPathSetting(const UnboundConversion& conv) {
174 bool width_precision_needed =
175 conv.width.value() >= 0 || conv.precision.value() >= 0;
176 if (width_precision_needed && conv.flags == Flags::kBasic) {
177 #if defined(__clang__)
178 // Some compilers complain about this in constexpr even when not executed,
179 // so only enable the error dump in clang.
180 fprintf(stderr,
181 "basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d "
182 "width=%d precision=%d\n",
183 conv.flags == Flags::kBasic ? 1 : 0,
184 FlagsContains(conv.flags, Flags::kLeft) ? 1 : 0,
185 FlagsContains(conv.flags, Flags::kShowPos) ? 1 : 0,
186 FlagsContains(conv.flags, Flags::kSignCol) ? 1 : 0,
187 FlagsContains(conv.flags, Flags::kAlt) ? 1 : 0,
188 FlagsContains(conv.flags, Flags::kZero) ? 1 : 0, conv.width.value(),
189 conv.precision.value());
190 #endif // defined(__clang__)
191 return false;
192 }
193 return true;
194 }
195
ParseDigits(char & c,const char * & pos,const char * const end)196 constexpr int ParseDigits(char& c, const char*& pos, const char* const end) {
197 int digits = c - '0';
198 // We do not want to overflow `digits` so we consume at most digits10
199 // digits. If there are more digits the parsing will fail later on when the
200 // digit doesn't match the expected characters.
201 int num_digits = std::numeric_limits<int>::digits10;
202 for (;;) {
203 if (ABSL_PREDICT_FALSE(pos == end)) break;
204 c = *pos++;
205 if ('0' > c || c > '9') break;
206 --num_digits;
207 if (ABSL_PREDICT_FALSE(!num_digits)) break;
208 digits = 10 * digits + c - '0';
209 }
210 return digits;
211 }
212
213 template <bool is_positional>
ConsumeConversion(const char * pos,const char * const end,UnboundConversion * conv,int * next_arg)214 constexpr const char* ConsumeConversion(const char* pos, const char* const end,
215 UnboundConversion* conv,
216 int* next_arg) {
217 const char* const original_pos = pos;
218 char c = 0;
219 // Read the next char into `c` and update `pos`. Returns false if there are
220 // no more chars to read.
221 #define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR() \
222 do { \
223 if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \
224 c = *pos++; \
225 } while (0)
226
227 if (is_positional) {
228 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
229 if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
230 conv->arg_position = ParseDigits(c, pos, end);
231 assert(conv->arg_position > 0);
232 if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
233 }
234
235 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
236
237 // We should start with the basic flag on.
238 assert(conv->flags == Flags::kBasic);
239
240 // Any non alpha character makes this conversion not basic.
241 // This includes flags (-+ #0), width (1-9, *) or precision (.).
242 // All conversion characters and length modifiers are alpha characters.
243 if (c < 'A') {
244 while (c <= '0') {
245 auto tag = GetTagForChar(c);
246 if (tag.is_flags()) {
247 conv->flags = conv->flags | tag.as_flags();
248 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
249 } else {
250 break;
251 }
252 }
253
254 if (c <= '9') {
255 if (c >= '0') {
256 int maybe_width = ParseDigits(c, pos, end);
257 if (!is_positional && c == '$') {
258 if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr;
259 // Positional conversion.
260 *next_arg = -1;
261 return ConsumeConversion<true>(original_pos, end, conv, next_arg);
262 }
263 conv->flags = conv->flags | Flags::kNonBasic;
264 conv->width.set_value(maybe_width);
265 } else if (c == '*') {
266 conv->flags = conv->flags | Flags::kNonBasic;
267 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
268 if (is_positional) {
269 if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
270 conv->width.set_from_arg(ParseDigits(c, pos, end));
271 if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
272 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
273 } else {
274 conv->width.set_from_arg(++*next_arg);
275 }
276 }
277 }
278
279 if (c == '.') {
280 conv->flags = conv->flags | Flags::kNonBasic;
281 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
282 if ('0' <= c && c <= '9') {
283 conv->precision.set_value(ParseDigits(c, pos, end));
284 } else if (c == '*') {
285 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
286 if (is_positional) {
287 if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
288 conv->precision.set_from_arg(ParseDigits(c, pos, end));
289 if (c != '$') return nullptr;
290 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
291 } else {
292 conv->precision.set_from_arg(++*next_arg);
293 }
294 } else {
295 conv->precision.set_value(0);
296 }
297 }
298 }
299
300 auto tag = GetTagForChar(c);
301
302 if (ABSL_PREDICT_FALSE(c == 'v' && conv->flags != Flags::kBasic)) {
303 return nullptr;
304 }
305
306 if (ABSL_PREDICT_FALSE(!tag.is_conv())) {
307 if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr;
308
309 // It is a length modifier.
310 LengthMod length_mod = tag.as_length();
311 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
312 if (c == 'h' && length_mod == LengthMod::h) {
313 conv->length_mod = LengthMod::hh;
314 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
315 } else if (c == 'l' && length_mod == LengthMod::l) {
316 conv->length_mod = LengthMod::ll;
317 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
318 } else {
319 conv->length_mod = length_mod;
320 }
321 tag = GetTagForChar(c);
322
323 if (ABSL_PREDICT_FALSE(c == 'v')) return nullptr;
324 if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr;
325
326 // `wchar_t` args are marked non-basic so `Bind()` will copy the length mod.
327 if (conv->length_mod == LengthMod::l && c == 'c') {
328 conv->flags = conv->flags | Flags::kNonBasic;
329 }
330 }
331 #undef ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR
332
333 assert(CheckFastPathSetting(*conv));
334 (void)(&CheckFastPathSetting);
335
336 conv->conv = tag.as_conv();
337 if (!is_positional) conv->arg_position = ++*next_arg;
338 return pos;
339 }
340
341 // Consume conversion spec prefix (not including '%') of [p, end) if valid.
342 // Examples of valid specs would be e.g.: "s", "d", "-12.6f".
343 // If valid, it returns the first character following the conversion spec,
344 // and the spec part is broken down and returned in 'conv'.
345 // If invalid, returns nullptr.
ConsumeUnboundConversion(const char * p,const char * end,UnboundConversion * conv,int * next_arg)346 constexpr const char* ConsumeUnboundConversion(const char* p, const char* end,
347 UnboundConversion* conv,
348 int* next_arg) {
349 if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg);
350 return ConsumeConversion<false>(p, end, conv, next_arg);
351 }
352
353 } // namespace str_format_internal
354 ABSL_NAMESPACE_END
355 } // namespace absl
356
357 #endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_
358