• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2022 The Abseil Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_
16 #define ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_
17 
18 #include <cassert>
19 #include <cstdint>
20 #include <limits>
21 
22 #include "absl/base/const_init.h"
23 #include "absl/strings/internal/str_format/extension.h"
24 
25 namespace absl {
26 ABSL_NAMESPACE_BEGIN
27 namespace str_format_internal {
28 
29 enum class LengthMod : std::uint8_t { h, hh, l, ll, L, j, z, t, q, none };
30 
31 // The analyzed properties of a single specified conversion.
32 struct UnboundConversion {
33   // This is a user defined default constructor on purpose to skip the
34   // initialization of parts of the object that are not necessary.
UnboundConversionUnboundConversion35   UnboundConversion() {}  // NOLINT
36 
37   // This constructor is provided for the static checker. We don't want to do
38   // the unnecessary initialization in the normal case.
UnboundConversionUnboundConversion39   explicit constexpr UnboundConversion(absl::ConstInitType)
40       : arg_position{}, width{}, precision{} {}
41 
42   class InputValue {
43    public:
set_valueUnboundConversion44     constexpr void set_value(int value) {
45       assert(value >= 0);
46       value_ = value;
47     }
valueUnboundConversion48     constexpr int value() const { return value_; }
49 
50     // Marks the value as "from arg". aka the '*' format.
51     // Requires `value >= 1`.
52     // When set, is_from_arg() return true and get_from_arg() returns the
53     // original value.
54     // `value()`'s return value is unspecified in this state.
set_from_argUnboundConversion55     constexpr void set_from_arg(int value) {
56       assert(value > 0);
57       value_ = -value - 1;
58     }
is_from_argUnboundConversion59     constexpr bool is_from_arg() const { return value_ < -1; }
get_from_argUnboundConversion60     constexpr int get_from_arg() const {
61       assert(is_from_arg());
62       return -value_ - 1;
63     }
64 
65    private:
66     int value_ = -1;
67   };
68 
69   // No need to initialize. It will always be set in the parser.
70   int arg_position;
71 
72   InputValue width;
73   InputValue precision;
74 
75   Flags flags = Flags::kBasic;
76   LengthMod length_mod = LengthMod::none;
77   FormatConversionChar conv = FormatConversionCharInternal::kNone;
78 };
79 
80 // Helper tag class for the table below.
81 // It allows fast `char -> ConversionChar/LengthMod/Flags` checking and
82 // conversions.
83 class ConvTag {
84  public:
ConvTag(FormatConversionChar conversion_char)85   constexpr ConvTag(FormatConversionChar conversion_char)  // NOLINT
86       : tag_(static_cast<uint8_t>(conversion_char)) {}
ConvTag(LengthMod length_mod)87   constexpr ConvTag(LengthMod length_mod)  // NOLINT
88       : tag_(0x80 | static_cast<uint8_t>(length_mod)) {}
ConvTag(Flags flags)89   constexpr ConvTag(Flags flags)  // NOLINT
90       : tag_(0xc0 | static_cast<uint8_t>(flags)) {}
ConvTag()91   constexpr ConvTag() : tag_(0xFF) {}
92 
is_conv()93   constexpr bool is_conv() const { return (tag_ & 0x80) == 0; }
is_length()94   constexpr bool is_length() const { return (tag_ & 0xC0) == 0x80; }
is_flags()95   constexpr bool is_flags() const { return (tag_ & 0xE0) == 0xC0; }
96 
as_conv()97   constexpr FormatConversionChar as_conv() const {
98     assert(is_conv());
99     assert(!is_length());
100     assert(!is_flags());
101     return static_cast<FormatConversionChar>(tag_);
102   }
as_length()103   constexpr LengthMod as_length() const {
104     assert(!is_conv());
105     assert(is_length());
106     assert(!is_flags());
107     return static_cast<LengthMod>(tag_ & 0x3F);
108   }
as_flags()109   constexpr Flags as_flags() const {
110     assert(!is_conv());
111     assert(!is_length());
112     assert(is_flags());
113     return static_cast<Flags>(tag_ & 0x1F);
114   }
115 
116  private:
117   uint8_t tag_;
118 };
119 
120 struct ConvTagHolder {
121   using CC = FormatConversionCharInternal;
122   using LM = LengthMod;
123 
124   // Abbreviations to fit in the table below.
125   static constexpr auto kFSign = Flags::kSignCol;
126   static constexpr auto kFAlt = Flags::kAlt;
127   static constexpr auto kFPos = Flags::kShowPos;
128   static constexpr auto kFLeft = Flags::kLeft;
129   static constexpr auto kFZero = Flags::kZero;
130 
131   static constexpr ConvTag value[256] = {
132       {},     {},    {},    {},    {},    {},     {},    {},     // 00-07
133       {},     {},    {},    {},    {},    {},     {},    {},     // 08-0f
134       {},     {},    {},    {},    {},    {},     {},    {},     // 10-17
135       {},     {},    {},    {},    {},    {},     {},    {},     // 18-1f
136       kFSign, {},    {},    kFAlt, {},    {},     {},    {},     //  !"#$%&'
137       {},     {},    {},    kFPos, {},    kFLeft, {},    {},     // ()*+,-./
138       kFZero, {},    {},    {},    {},    {},     {},    {},     // 01234567
139       {},     {},    {},    {},    {},    {},     {},    {},     // 89:;<=>?
140       {},     CC::A, {},    {},    {},    CC::E,  CC::F, CC::G,  // @ABCDEFG
141       {},     {},    {},    {},    LM::L, {},     {},    {},     // HIJKLMNO
142       {},     {},    {},    {},    {},    {},     {},    {},     // PQRSTUVW
143       CC::X,  {},    {},    {},    {},    {},     {},    {},     // XYZ[\]^_
144       {},     CC::a, {},    CC::c, CC::d, CC::e,  CC::f, CC::g,  // `abcdefg
145       LM::h,  CC::i, LM::j, {},    LM::l, {},     CC::n, CC::o,  // hijklmno
146       CC::p,  LM::q, {},    CC::s, LM::t, CC::u,  CC::v, {},     // pqrstuvw
147       CC::x,  {},    LM::z, {},    {},    {},     {},    {},     // xyz{|}!
148       {},     {},    {},    {},    {},    {},     {},    {},     // 80-87
149       {},     {},    {},    {},    {},    {},     {},    {},     // 88-8f
150       {},     {},    {},    {},    {},    {},     {},    {},     // 90-97
151       {},     {},    {},    {},    {},    {},     {},    {},     // 98-9f
152       {},     {},    {},    {},    {},    {},     {},    {},     // a0-a7
153       {},     {},    {},    {},    {},    {},     {},    {},     // a8-af
154       {},     {},    {},    {},    {},    {},     {},    {},     // b0-b7
155       {},     {},    {},    {},    {},    {},     {},    {},     // b8-bf
156       {},     {},    {},    {},    {},    {},     {},    {},     // c0-c7
157       {},     {},    {},    {},    {},    {},     {},    {},     // c8-cf
158       {},     {},    {},    {},    {},    {},     {},    {},     // d0-d7
159       {},     {},    {},    {},    {},    {},     {},    {},     // d8-df
160       {},     {},    {},    {},    {},    {},     {},    {},     // e0-e7
161       {},     {},    {},    {},    {},    {},     {},    {},     // e8-ef
162       {},     {},    {},    {},    {},    {},     {},    {},     // f0-f7
163       {},     {},    {},    {},    {},    {},     {},    {},     // f8-ff
164   };
165 };
166 
167 // Keep a single table for all the conversion chars and length modifiers.
GetTagForChar(char c)168 constexpr ConvTag GetTagForChar(char c) {
169   return ConvTagHolder::value[static_cast<unsigned char>(c)];
170 }
171 
CheckFastPathSetting(const UnboundConversion & conv)172 constexpr bool CheckFastPathSetting(const UnboundConversion& conv) {
173   bool width_precision_needed =
174       conv.width.value() >= 0 || conv.precision.value() >= 0;
175   if (width_precision_needed && conv.flags == Flags::kBasic) {
176 #if defined(__clang__)
177     // Some compilers complain about this in constexpr even when not executed,
178     // so only enable the error dump in clang.
179     fprintf(stderr,
180             "basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d "
181             "width=%d precision=%d\n",
182             conv.flags == Flags::kBasic ? 1 : 0,
183             FlagsContains(conv.flags, Flags::kLeft) ? 1 : 0,
184             FlagsContains(conv.flags, Flags::kShowPos) ? 1 : 0,
185             FlagsContains(conv.flags, Flags::kSignCol) ? 1 : 0,
186             FlagsContains(conv.flags, Flags::kAlt) ? 1 : 0,
187             FlagsContains(conv.flags, Flags::kZero) ? 1 : 0, conv.width.value(),
188             conv.precision.value());
189 #endif  // defined(__clang__)
190     return false;
191   }
192   return true;
193 }
194 
ParseDigits(char & c,const char * & pos,const char * const end)195 constexpr int ParseDigits(char& c, const char*& pos, const char* const end) {
196   int digits = c - '0';
197   // We do not want to overflow `digits` so we consume at most digits10
198   // digits. If there are more digits the parsing will fail later on when the
199   // digit doesn't match the expected characters.
200   int num_digits = std::numeric_limits<int>::digits10;
201   for (;;) {
202     if (ABSL_PREDICT_FALSE(pos == end)) break;
203     c = *pos++;
204     if ('0' > c || c > '9') break;
205     --num_digits;
206     if (ABSL_PREDICT_FALSE(!num_digits)) break;
207     digits = 10 * digits + c - '0';
208   }
209   return digits;
210 }
211 
212 template <bool is_positional>
ConsumeConversion(const char * pos,const char * const end,UnboundConversion * conv,int * next_arg)213 constexpr const char* ConsumeConversion(const char* pos, const char* const end,
214                                         UnboundConversion* conv,
215                                         int* next_arg) {
216   const char* const original_pos = pos;
217   char c = 0;
218   // Read the next char into `c` and update `pos`. Returns false if there are
219   // no more chars to read.
220 #define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR()          \
221   do {                                                  \
222     if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \
223     c = *pos++;                                         \
224   } while (0)
225 
226   if (is_positional) {
227     ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
228     if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
229     conv->arg_position = ParseDigits(c, pos, end);
230     assert(conv->arg_position > 0);
231     if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
232   }
233 
234   ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
235 
236   // We should start with the basic flag on.
237   assert(conv->flags == Flags::kBasic);
238 
239   // Any non alpha character makes this conversion not basic.
240   // This includes flags (-+ #0), width (1-9, *) or precision (.).
241   // All conversion characters and length modifiers are alpha characters.
242   if (c < 'A') {
243     while (c <= '0') {
244       auto tag = GetTagForChar(c);
245       if (tag.is_flags()) {
246         conv->flags = conv->flags | tag.as_flags();
247         ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
248       } else {
249         break;
250       }
251     }
252 
253     if (c <= '9') {
254       if (c >= '0') {
255         int maybe_width = ParseDigits(c, pos, end);
256         if (!is_positional && c == '$') {
257           if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr;
258           // Positional conversion.
259           *next_arg = -1;
260           return ConsumeConversion<true>(original_pos, end, conv, next_arg);
261         }
262         conv->flags = conv->flags | Flags::kNonBasic;
263         conv->width.set_value(maybe_width);
264       } else if (c == '*') {
265         conv->flags = conv->flags | Flags::kNonBasic;
266         ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
267         if (is_positional) {
268           if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
269           conv->width.set_from_arg(ParseDigits(c, pos, end));
270           if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
271           ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
272         } else {
273           conv->width.set_from_arg(++*next_arg);
274         }
275       }
276     }
277 
278     if (c == '.') {
279       conv->flags = conv->flags | Flags::kNonBasic;
280       ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
281       if ('0' <= c && c <= '9') {
282         conv->precision.set_value(ParseDigits(c, pos, end));
283       } else if (c == '*') {
284         ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
285         if (is_positional) {
286           if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
287           conv->precision.set_from_arg(ParseDigits(c, pos, end));
288           if (c != '$') return nullptr;
289           ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
290         } else {
291           conv->precision.set_from_arg(++*next_arg);
292         }
293       } else {
294         conv->precision.set_value(0);
295       }
296     }
297   }
298 
299   auto tag = GetTagForChar(c);
300 
301   if (ABSL_PREDICT_FALSE(c == 'v' && conv->flags != Flags::kBasic)) {
302     return nullptr;
303   }
304 
305   if (ABSL_PREDICT_FALSE(!tag.is_conv())) {
306     if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr;
307 
308     // It is a length modifier.
309     using str_format_internal::LengthMod;
310     LengthMod length_mod = tag.as_length();
311     ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
312     if (c == 'h' && length_mod == LengthMod::h) {
313       conv->length_mod = LengthMod::hh;
314       ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
315     } else if (c == 'l' && length_mod == LengthMod::l) {
316       conv->length_mod = LengthMod::ll;
317       ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
318     } else {
319       conv->length_mod = length_mod;
320     }
321     tag = GetTagForChar(c);
322 
323     if (ABSL_PREDICT_FALSE(c == 'v')) return nullptr;
324     if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr;
325   }
326 #undef ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR
327 
328   assert(CheckFastPathSetting(*conv));
329   (void)(&CheckFastPathSetting);
330 
331   conv->conv = tag.as_conv();
332   if (!is_positional) conv->arg_position = ++*next_arg;
333   return pos;
334 }
335 
336 // Consume conversion spec prefix (not including '%') of [p, end) if valid.
337 // Examples of valid specs would be e.g.: "s", "d", "-12.6f".
338 // If valid, it returns the first character following the conversion spec,
339 // and the spec part is broken down and returned in 'conv'.
340 // If invalid, returns nullptr.
ConsumeUnboundConversion(const char * p,const char * end,UnboundConversion * conv,int * next_arg)341 constexpr const char* ConsumeUnboundConversion(const char* p, const char* end,
342                                                UnboundConversion* conv,
343                                                int* next_arg) {
344   if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg);
345   return ConsumeConversion<false>(p, end, conv, next_arg);
346 }
347 
348 }  // namespace str_format_internal
349 ABSL_NAMESPACE_END
350 }  // namespace absl
351 
352 #endif  // ABSL_STRINGS_INTERNAL_STR_FORMAT_CONSTEXPR_PARSER_H_
353