1 // Copyright 2022 The Abseil Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_
16 #define ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_
17
18 #include <cassert>
19 #include <cstdint>
20 #include <limits>
21
22 #include "absl/base/const_init.h"
23 #include "absl/strings/internal/str_format/extension.h"
24
25 namespace absl {
26 ABSL_NAMESPACE_BEGIN
27 namespace str_format_internal {
28
29 enum class LengthMod : std::uint8_t { h, hh, l, ll, L, j, z, t, q, none };
30
31 // The analyzed properties of a single specified conversion.
32 struct UnboundConversion {
33 // This is a user defined default constructor on purpose to skip the
34 // initialization of parts of the object that are not necessary.
UnboundConversionUnboundConversion35 UnboundConversion() {} // NOLINT
36
37 // This constructor is provided for the static checker. We don't want to do
38 // the unnecessary initialization in the normal case.
UnboundConversionUnboundConversion39 explicit constexpr UnboundConversion(absl::ConstInitType)
40 : arg_position{}, width{}, precision{} {}
41
42 class InputValue {
43 public:
set_valueUnboundConversion44 constexpr void set_value(int value) {
45 assert(value >= 0);
46 value_ = value;
47 }
valueUnboundConversion48 constexpr int value() const { return value_; }
49
50 // Marks the value as "from arg". aka the '*' format.
51 // Requires `value >= 1`.
52 // When set, is_from_arg() return true and get_from_arg() returns the
53 // original value.
54 // `value()`'s return value is unspecified in this state.
set_from_argUnboundConversion55 constexpr void set_from_arg(int value) {
56 assert(value > 0);
57 value_ = -value - 1;
58 }
is_from_argUnboundConversion59 constexpr bool is_from_arg() const { return value_ < -1; }
get_from_argUnboundConversion60 constexpr int get_from_arg() const {
61 assert(is_from_arg());
62 return -value_ - 1;
63 }
64
65 private:
66 int value_ = -1;
67 };
68
69 // No need to initialize. It will always be set in the parser.
70 int arg_position;
71
72 InputValue width;
73 InputValue precision;
74
75 Flags flags = Flags::kBasic;
76 LengthMod length_mod = LengthMod::none;
77 FormatConversionChar conv = FormatConversionCharInternal::kNone;
78 };
79
80 // Helper tag class for the table below.
81 // It allows fast `char -> ConversionChar/LengthMod/Flags` checking and
82 // conversions.
83 class ConvTag {
84 public:
ConvTag(FormatConversionChar conversion_char)85 constexpr ConvTag(FormatConversionChar conversion_char) // NOLINT
86 : tag_(static_cast<uint8_t>(conversion_char)) {}
ConvTag(LengthMod length_mod)87 constexpr ConvTag(LengthMod length_mod) // NOLINT
88 : tag_(0x80 | static_cast<uint8_t>(length_mod)) {}
ConvTag(Flags flags)89 constexpr ConvTag(Flags flags) // NOLINT
90 : tag_(0xc0 | static_cast<uint8_t>(flags)) {}
ConvTag()91 constexpr ConvTag() : tag_(0xFF) {}
92
is_conv()93 constexpr bool is_conv() const { return (tag_ & 0x80) == 0; }
is_length()94 constexpr bool is_length() const { return (tag_ & 0xC0) == 0x80; }
is_flags()95 constexpr bool is_flags() const { return (tag_ & 0xE0) == 0xC0; }
96
as_conv()97 constexpr FormatConversionChar as_conv() const {
98 assert(is_conv());
99 assert(!is_length());
100 assert(!is_flags());
101 return static_cast<FormatConversionChar>(tag_);
102 }
as_length()103 constexpr LengthMod as_length() const {
104 assert(!is_conv());
105 assert(is_length());
106 assert(!is_flags());
107 return static_cast<LengthMod>(tag_ & 0x3F);
108 }
as_flags()109 constexpr Flags as_flags() const {
110 assert(!is_conv());
111 assert(!is_length());
112 assert(is_flags());
113 return static_cast<Flags>(tag_ & 0x1F);
114 }
115
116 private:
117 uint8_t tag_;
118 };
119
120 struct ConvTagHolder {
121 using CC = FormatConversionCharInternal;
122 using LM = LengthMod;
123
124 // Abbreviations to fit in the table below.
125 static constexpr auto kFSign = Flags::kSignCol;
126 static constexpr auto kFAlt = Flags::kAlt;
127 static constexpr auto kFPos = Flags::kShowPos;
128 static constexpr auto kFLeft = Flags::kLeft;
129 static constexpr auto kFZero = Flags::kZero;
130
131 static constexpr ConvTag value[256] = {
132 {}, {}, {}, {}, {}, {}, {}, {}, // 00-07
133 {}, {}, {}, {}, {}, {}, {}, {}, // 08-0f
134 {}, {}, {}, {}, {}, {}, {}, {}, // 10-17
135 {}, {}, {}, {}, {}, {}, {}, {}, // 18-1f
136 kFSign, {}, {}, kFAlt, {}, {}, {}, {}, // !"#$%&'
137 {}, {}, {}, kFPos, {}, kFLeft, {}, {}, // ()*+,-./
138 kFZero, {}, {}, {}, {}, {}, {}, {}, // 01234567
139 {}, {}, {}, {}, {}, {}, {}, {}, // 89:;<=>?
140 {}, CC::A, {}, {}, {}, CC::E, CC::F, CC::G, // @ABCDEFG
141 {}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO
142 {}, {}, {}, {}, {}, {}, {}, {}, // PQRSTUVW
143 CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_
144 {}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg
145 LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno
146 CC::p, LM::q, {}, CC::s, LM::t, CC::u, CC::v, {}, // pqrstuvw
147 CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}!
148 {}, {}, {}, {}, {}, {}, {}, {}, // 80-87
149 {}, {}, {}, {}, {}, {}, {}, {}, // 88-8f
150 {}, {}, {}, {}, {}, {}, {}, {}, // 90-97
151 {}, {}, {}, {}, {}, {}, {}, {}, // 98-9f
152 {}, {}, {}, {}, {}, {}, {}, {}, // a0-a7
153 {}, {}, {}, {}, {}, {}, {}, {}, // a8-af
154 {}, {}, {}, {}, {}, {}, {}, {}, // b0-b7
155 {}, {}, {}, {}, {}, {}, {}, {}, // b8-bf
156 {}, {}, {}, {}, {}, {}, {}, {}, // c0-c7
157 {}, {}, {}, {}, {}, {}, {}, {}, // c8-cf
158 {}, {}, {}, {}, {}, {}, {}, {}, // d0-d7
159 {}, {}, {}, {}, {}, {}, {}, {}, // d8-df
160 {}, {}, {}, {}, {}, {}, {}, {}, // e0-e7
161 {}, {}, {}, {}, {}, {}, {}, {}, // e8-ef
162 {}, {}, {}, {}, {}, {}, {}, {}, // f0-f7
163 {}, {}, {}, {}, {}, {}, {}, {}, // f8-ff
164 };
165 };
166
167 // Keep a single table for all the conversion chars and length modifiers.
GetTagForChar(char c)168 constexpr ConvTag GetTagForChar(char c) {
169 return ConvTagHolder::value[static_cast<unsigned char>(c)];
170 }
171
CheckFastPathSetting(const UnboundConversion & conv)172 constexpr bool CheckFastPathSetting(const UnboundConversion& conv) {
173 bool width_precision_needed =
174 conv.width.value() >= 0 || conv.precision.value() >= 0;
175 if (width_precision_needed && conv.flags == Flags::kBasic) {
176 #if defined(__clang__)
177 // Some compilers complain about this in constexpr even when not executed,
178 // so only enable the error dump in clang.
179 fprintf(stderr,
180 "basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d "
181 "width=%d precision=%d\n",
182 conv.flags == Flags::kBasic ? 1 : 0,
183 FlagsContains(conv.flags, Flags::kLeft) ? 1 : 0,
184 FlagsContains(conv.flags, Flags::kShowPos) ? 1 : 0,
185 FlagsContains(conv.flags, Flags::kSignCol) ? 1 : 0,
186 FlagsContains(conv.flags, Flags::kAlt) ? 1 : 0,
187 FlagsContains(conv.flags, Flags::kZero) ? 1 : 0, conv.width.value(),
188 conv.precision.value());
189 #endif // defined(__clang__)
190 return false;
191 }
192 return true;
193 }
194
ParseDigits(char & c,const char * & pos,const char * const end)195 constexpr int ParseDigits(char& c, const char*& pos, const char* const end) {
196 int digits = c - '0';
197 // We do not want to overflow `digits` so we consume at most digits10
198 // digits. If there are more digits the parsing will fail later on when the
199 // digit doesn't match the expected characters.
200 int num_digits = std::numeric_limits<int>::digits10;
201 for (;;) {
202 if (ABSL_PREDICT_FALSE(pos == end)) break;
203 c = *pos++;
204 if ('0' > c || c > '9') break;
205 --num_digits;
206 if (ABSL_PREDICT_FALSE(!num_digits)) break;
207 digits = 10 * digits + c - '0';
208 }
209 return digits;
210 }
211
212 template <bool is_positional>
ConsumeConversion(const char * pos,const char * const end,UnboundConversion * conv,int * next_arg)213 constexpr const char* ConsumeConversion(const char* pos, const char* const end,
214 UnboundConversion* conv,
215 int* next_arg) {
216 const char* const original_pos = pos;
217 char c = 0;
218 // Read the next char into `c` and update `pos`. Returns false if there are
219 // no more chars to read.
220 #define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR() \
221 do { \
222 if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \
223 c = *pos++; \
224 } while (0)
225
226 if (is_positional) {
227 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
228 if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
229 conv->arg_position = ParseDigits(c, pos, end);
230 assert(conv->arg_position > 0);
231 if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
232 }
233
234 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
235
236 // We should start with the basic flag on.
237 assert(conv->flags == Flags::kBasic);
238
239 // Any non alpha character makes this conversion not basic.
240 // This includes flags (-+ #0), width (1-9, *) or precision (.).
241 // All conversion characters and length modifiers are alpha characters.
242 if (c < 'A') {
243 while (c <= '0') {
244 auto tag = GetTagForChar(c);
245 if (tag.is_flags()) {
246 conv->flags = conv->flags | tag.as_flags();
247 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
248 } else {
249 break;
250 }
251 }
252
253 if (c <= '9') {
254 if (c >= '0') {
255 int maybe_width = ParseDigits(c, pos, end);
256 if (!is_positional && c == '$') {
257 if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr;
258 // Positional conversion.
259 *next_arg = -1;
260 return ConsumeConversion<true>(original_pos, end, conv, next_arg);
261 }
262 conv->flags = conv->flags | Flags::kNonBasic;
263 conv->width.set_value(maybe_width);
264 } else if (c == '*') {
265 conv->flags = conv->flags | Flags::kNonBasic;
266 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
267 if (is_positional) {
268 if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
269 conv->width.set_from_arg(ParseDigits(c, pos, end));
270 if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
271 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
272 } else {
273 conv->width.set_from_arg(++*next_arg);
274 }
275 }
276 }
277
278 if (c == '.') {
279 conv->flags = conv->flags | Flags::kNonBasic;
280 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
281 if ('0' <= c && c <= '9') {
282 conv->precision.set_value(ParseDigits(c, pos, end));
283 } else if (c == '*') {
284 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
285 if (is_positional) {
286 if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
287 conv->precision.set_from_arg(ParseDigits(c, pos, end));
288 if (c != '$') return nullptr;
289 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
290 } else {
291 conv->precision.set_from_arg(++*next_arg);
292 }
293 } else {
294 conv->precision.set_value(0);
295 }
296 }
297 }
298
299 auto tag = GetTagForChar(c);
300
301 if (ABSL_PREDICT_FALSE(c == 'v' && conv->flags != Flags::kBasic)) {
302 return nullptr;
303 }
304
305 if (ABSL_PREDICT_FALSE(!tag.is_conv())) {
306 if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr;
307
308 // It is a length modifier.
309 using str_format_internal::LengthMod;
310 LengthMod length_mod = tag.as_length();
311 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
312 if (c == 'h' && length_mod == LengthMod::h) {
313 conv->length_mod = LengthMod::hh;
314 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
315 } else if (c == 'l' && length_mod == LengthMod::l) {
316 conv->length_mod = LengthMod::ll;
317 ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
318 } else {
319 conv->length_mod = length_mod;
320 }
321 tag = GetTagForChar(c);
322
323 if (ABSL_PREDICT_FALSE(c == 'v')) return nullptr;
324 if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr;
325 }
326 #undef ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR
327
328 assert(CheckFastPathSetting(*conv));
329 (void)(&CheckFastPathSetting);
330
331 conv->conv = tag.as_conv();
332 if (!is_positional) conv->arg_position = ++*next_arg;
333 return pos;
334 }
335
336 // Consume conversion spec prefix (not including '%') of [p, end) if valid.
337 // Examples of valid specs would be e.g.: "s", "d", "-12.6f".
338 // If valid, it returns the first character following the conversion spec,
339 // and the spec part is broken down and returned in 'conv'.
340 // If invalid, returns nullptr.
ConsumeUnboundConversion(const char * p,const char * end,UnboundConversion * conv,int * next_arg)341 constexpr const char* ConsumeUnboundConversion(const char* p, const char* end,
342 UnboundConversion* conv,
343 int* next_arg) {
344 if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg);
345 return ConsumeConversion<false>(p, end, conv, next_arg);
346 }
347
348 } // namespace str_format_internal
349 ABSL_NAMESPACE_END
350 } // namespace absl
351
352 #endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_
353