• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2022 The Abseil Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_
16 #define ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_
17 
18 #include <cassert>
19 #include <cstdint>
20 #include <cstdio>
21 #include <limits>
22 
23 #include "absl/base/config.h"
24 #include "absl/base/const_init.h"
25 #include "absl/base/optimization.h"
26 #include "absl/strings/internal/str_format/extension.h"
27 
28 namespace absl {
29 ABSL_NAMESPACE_BEGIN
30 namespace str_format_internal {
31 
32 // The analyzed properties of a single specified conversion.
33 struct UnboundConversion {
34   // This is a user defined default constructor on purpose to skip the
35   // initialization of parts of the object that are not necessary.
UnboundConversionUnboundConversion36   UnboundConversion() {}  // NOLINT
37 
38   // This constructor is provided for the static checker. We don't want to do
39   // the unnecessary initialization in the normal case.
UnboundConversionUnboundConversion40   explicit constexpr UnboundConversion(absl::ConstInitType)
41       : arg_position{}, width{}, precision{} {}
42 
43   class InputValue {
44    public:
set_valueUnboundConversion45     constexpr void set_value(int value) {
46       assert(value >= 0);
47       value_ = value;
48     }
valueUnboundConversion49     constexpr int value() const { return value_; }
50 
51     // Marks the value as "from arg". aka the '*' format.
52     // Requires `value >= 1`.
53     // When set, is_from_arg() return true and get_from_arg() returns the
54     // original value.
55     // `value()`'s return value is unspecified in this state.
set_from_argUnboundConversion56     constexpr void set_from_arg(int value) {
57       assert(value > 0);
58       value_ = -value - 1;
59     }
is_from_argUnboundConversion60     constexpr bool is_from_arg() const { return value_ < -1; }
get_from_argUnboundConversion61     constexpr int get_from_arg() const {
62       assert(is_from_arg());
63       return -value_ - 1;
64     }
65 
66    private:
67     int value_ = -1;
68   };
69 
70   // No need to initialize. It will always be set in the parser.
71   int arg_position;
72 
73   InputValue width;
74   InputValue precision;
75 
76   Flags flags = Flags::kBasic;
77   LengthMod length_mod = LengthMod::none;
78   FormatConversionChar conv = FormatConversionCharInternal::kNone;
79 };
80 
81 // Helper tag class for the table below.
82 // It allows fast `char -> ConversionChar/LengthMod/Flags` checking and
83 // conversions.
84 class ConvTag {
85  public:
ConvTag(FormatConversionChar conversion_char)86   constexpr ConvTag(FormatConversionChar conversion_char)  // NOLINT
87       : tag_(static_cast<uint8_t>(conversion_char)) {}
ConvTag(LengthMod length_mod)88   constexpr ConvTag(LengthMod length_mod)  // NOLINT
89       : tag_(0x80 | static_cast<uint8_t>(length_mod)) {}
ConvTag(Flags flags)90   constexpr ConvTag(Flags flags)  // NOLINT
91       : tag_(0xc0 | static_cast<uint8_t>(flags)) {}
ConvTag()92   constexpr ConvTag() : tag_(0xFF) {}
93 
is_conv()94   constexpr bool is_conv() const { return (tag_ & 0x80) == 0; }
is_length()95   constexpr bool is_length() const { return (tag_ & 0xC0) == 0x80; }
is_flags()96   constexpr bool is_flags() const { return (tag_ & 0xE0) == 0xC0; }
97 
as_conv()98   constexpr FormatConversionChar as_conv() const {
99     assert(is_conv());
100     assert(!is_length());
101     assert(!is_flags());
102     return static_cast<FormatConversionChar>(tag_);
103   }
as_length()104   constexpr LengthMod as_length() const {
105     assert(!is_conv());
106     assert(is_length());
107     assert(!is_flags());
108     return static_cast<LengthMod>(tag_ & 0x3F);
109   }
as_flags()110   constexpr Flags as_flags() const {
111     assert(!is_conv());
112     assert(!is_length());
113     assert(is_flags());
114     return static_cast<Flags>(tag_ & 0x1F);
115   }
116 
117  private:
118   uint8_t tag_;
119 };
120 
121 struct ConvTagHolder {
122   using CC = FormatConversionCharInternal;
123   using LM = LengthMod;
124 
125   // Abbreviations to fit in the table below.
126   static constexpr auto kFSign = Flags::kSignCol;
127   static constexpr auto kFAlt = Flags::kAlt;
128   static constexpr auto kFPos = Flags::kShowPos;
129   static constexpr auto kFLeft = Flags::kLeft;
130   static constexpr auto kFZero = Flags::kZero;
131 
132   static constexpr ConvTag value[256] = {
133       {},     {},    {},    {},    {},    {},     {},    {},     // 00-07
134       {},     {},    {},    {},    {},    {},     {},    {},     // 08-0f
135       {},     {},    {},    {},    {},    {},     {},    {},     // 10-17
136       {},     {},    {},    {},    {},    {},     {},    {},     // 18-1f
137       kFSign, {},    {},    kFAlt, {},    {},     {},    {},     //  !"#$%&'
138       {},     {},    {},    kFPos, {},    kFLeft, {},    {},     // ()*+,-./
139       kFZero, {},    {},    {},    {},    {},     {},    {},     // 01234567
140       {},     {},    {},    {},    {},    {},     {},    {},     // 89:;<=>?
141       {},     CC::A, {},    {},    {},    CC::E,  CC::F, CC::G,  // @ABCDEFG
142       {},     {},    {},    {},    LM::L, {},     {},    {},     // HIJKLMNO
143       {},     {},    {},    {},    {},    {},     {},    {},     // PQRSTUVW
144       CC::X,  {},    {},    {},    {},    {},     {},    {},     // XYZ[\]^_
145       {},     CC::a, {},    CC::c, CC::d, CC::e,  CC::f, CC::g,  // `abcdefg
146       LM::h,  CC::i, LM::j, {},    LM::l, {},     CC::n, CC::o,  // hijklmno
147       CC::p,  LM::q, {},    CC::s, LM::t, CC::u,  CC::v, {},     // pqrstuvw
148       CC::x,  {},    LM::z, {},    {},    {},     {},    {},     // xyz{|}!
149       {},     {},    {},    {},    {},    {},     {},    {},     // 80-87
150       {},     {},    {},    {},    {},    {},     {},    {},     // 88-8f
151       {},     {},    {},    {},    {},    {},     {},    {},     // 90-97
152       {},     {},    {},    {},    {},    {},     {},    {},     // 98-9f
153       {},     {},    {},    {},    {},    {},     {},    {},     // a0-a7
154       {},     {},    {},    {},    {},    {},     {},    {},     // a8-af
155       {},     {},    {},    {},    {},    {},     {},    {},     // b0-b7
156       {},     {},    {},    {},    {},    {},     {},    {},     // b8-bf
157       {},     {},    {},    {},    {},    {},     {},    {},     // c0-c7
158       {},     {},    {},    {},    {},    {},     {},    {},     // c8-cf
159       {},     {},    {},    {},    {},    {},     {},    {},     // d0-d7
160       {},     {},    {},    {},    {},    {},     {},    {},     // d8-df
161       {},     {},    {},    {},    {},    {},     {},    {},     // e0-e7
162       {},     {},    {},    {},    {},    {},     {},    {},     // e8-ef
163       {},     {},    {},    {},    {},    {},     {},    {},     // f0-f7
164       {},     {},    {},    {},    {},    {},     {},    {},     // f8-ff
165   };
166 };
167 
168 // Keep a single table for all the conversion chars and length modifiers.
GetTagForChar(char c)169 constexpr ConvTag GetTagForChar(char c) {
170   return ConvTagHolder::value[static_cast<unsigned char>(c)];
171 }
172 
CheckFastPathSetting(const UnboundConversion & conv)173 constexpr bool CheckFastPathSetting(const UnboundConversion& conv) {
174   bool width_precision_needed =
175       conv.width.value() >= 0 || conv.precision.value() >= 0;
176   if (width_precision_needed && conv.flags == Flags::kBasic) {
177 #if defined(__clang__)
178     // Some compilers complain about this in constexpr even when not executed,
179     // so only enable the error dump in clang.
180     fprintf(stderr,
181             "basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d "
182             "width=%d precision=%d\n",
183             conv.flags == Flags::kBasic ? 1 : 0,
184             FlagsContains(conv.flags, Flags::kLeft) ? 1 : 0,
185             FlagsContains(conv.flags, Flags::kShowPos) ? 1 : 0,
186             FlagsContains(conv.flags, Flags::kSignCol) ? 1 : 0,
187             FlagsContains(conv.flags, Flags::kAlt) ? 1 : 0,
188             FlagsContains(conv.flags, Flags::kZero) ? 1 : 0, conv.width.value(),
189             conv.precision.value());
190 #endif  // defined(__clang__)
191     return false;
192   }
193   return true;
194 }
195 
ParseDigits(char & c,const char * & pos,const char * const end)196 constexpr int ParseDigits(char& c, const char*& pos, const char* const end) {
197   int digits = c - '0';
198   // We do not want to overflow `digits` so we consume at most digits10
199   // digits. If there are more digits the parsing will fail later on when the
200   // digit doesn't match the expected characters.
201   int num_digits = std::numeric_limits<int>::digits10;
202   for (;;) {
203     if (ABSL_PREDICT_FALSE(pos == end)) break;
204     c = *pos++;
205     if ('0' > c || c > '9') break;
206     --num_digits;
207     if (ABSL_PREDICT_FALSE(!num_digits)) break;
208     digits = 10 * digits + c - '0';
209   }
210   return digits;
211 }
212 
213 template <bool is_positional>
ConsumeConversion(const char * pos,const char * const end,UnboundConversion * conv,int * next_arg)214 constexpr const char* ConsumeConversion(const char* pos, const char* const end,
215                                         UnboundConversion* conv,
216                                         int* next_arg) {
217   const char* const original_pos = pos;
218   char c = 0;
219   // Read the next char into `c` and update `pos`. Returns false if there are
220   // no more chars to read.
221 #define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR()          \
222   do {                                                  \
223     if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \
224     c = *pos++;                                         \
225   } while (0)
226 
227   if (is_positional) {
228     ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
229     if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
230     conv->arg_position = ParseDigits(c, pos, end);
231     assert(conv->arg_position > 0);
232     if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
233   }
234 
235   ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
236 
237   // We should start with the basic flag on.
238   assert(conv->flags == Flags::kBasic);
239 
240   // Any non alpha character makes this conversion not basic.
241   // This includes flags (-+ #0), width (1-9, *) or precision (.).
242   // All conversion characters and length modifiers are alpha characters.
243   if (c < 'A') {
244     while (c <= '0') {
245       auto tag = GetTagForChar(c);
246       if (tag.is_flags()) {
247         conv->flags = conv->flags | tag.as_flags();
248         ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
249       } else {
250         break;
251       }
252     }
253 
254     if (c <= '9') {
255       if (c >= '0') {
256         int maybe_width = ParseDigits(c, pos, end);
257         if (!is_positional && c == '$') {
258           if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr;
259           // Positional conversion.
260           *next_arg = -1;
261           return ConsumeConversion<true>(original_pos, end, conv, next_arg);
262         }
263         conv->flags = conv->flags | Flags::kNonBasic;
264         conv->width.set_value(maybe_width);
265       } else if (c == '*') {
266         conv->flags = conv->flags | Flags::kNonBasic;
267         ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
268         if (is_positional) {
269           if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
270           conv->width.set_from_arg(ParseDigits(c, pos, end));
271           if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
272           ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
273         } else {
274           conv->width.set_from_arg(++*next_arg);
275         }
276       }
277     }
278 
279     if (c == '.') {
280       conv->flags = conv->flags | Flags::kNonBasic;
281       ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
282       if ('0' <= c && c <= '9') {
283         conv->precision.set_value(ParseDigits(c, pos, end));
284       } else if (c == '*') {
285         ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
286         if (is_positional) {
287           if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
288           conv->precision.set_from_arg(ParseDigits(c, pos, end));
289           if (c != '$') return nullptr;
290           ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
291         } else {
292           conv->precision.set_from_arg(++*next_arg);
293         }
294       } else {
295         conv->precision.set_value(0);
296       }
297     }
298   }
299 
300   auto tag = GetTagForChar(c);
301 
302   if (ABSL_PREDICT_FALSE(c == 'v' && conv->flags != Flags::kBasic)) {
303     return nullptr;
304   }
305 
306   if (ABSL_PREDICT_FALSE(!tag.is_conv())) {
307     if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr;
308 
309     // It is a length modifier.
310     LengthMod length_mod = tag.as_length();
311     ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
312     if (c == 'h' && length_mod == LengthMod::h) {
313       conv->length_mod = LengthMod::hh;
314       ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
315     } else if (c == 'l' && length_mod == LengthMod::l) {
316       conv->length_mod = LengthMod::ll;
317       ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
318     } else {
319       conv->length_mod = length_mod;
320     }
321     tag = GetTagForChar(c);
322 
323     if (ABSL_PREDICT_FALSE(c == 'v')) return nullptr;
324     if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr;
325 
326     // `wchar_t` args are marked non-basic so `Bind()` will copy the length mod.
327     if (conv->length_mod == LengthMod::l && c == 'c') {
328       conv->flags = conv->flags | Flags::kNonBasic;
329     }
330   }
331 #undef ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR
332 
333   assert(CheckFastPathSetting(*conv));
334   (void)(&CheckFastPathSetting);
335 
336   conv->conv = tag.as_conv();
337   if (!is_positional) conv->arg_position = ++*next_arg;
338   return pos;
339 }
340 
341 // Consume conversion spec prefix (not including '%') of [p, end) if valid.
342 // Examples of valid specs would be e.g.: "s", "d", "-12.6f".
343 // If valid, it returns the first character following the conversion spec,
344 // and the spec part is broken down and returned in 'conv'.
345 // If invalid, returns nullptr.
ConsumeUnboundConversion(const char * p,const char * end,UnboundConversion * conv,int * next_arg)346 constexpr const char* ConsumeUnboundConversion(const char* p, const char* end,
347                                                UnboundConversion* conv,
348                                                int* next_arg) {
349   if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg);
350   return ConsumeConversion<false>(p, end, conv, next_arg);
351 }
352 
353 }  // namespace str_format_internal
354 ABSL_NAMESPACE_END
355 }  // namespace absl
356 
357 #endif  // ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_
358