• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
2 #define ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
3 
4 #include <limits.h>
5 #include <stddef.h>
6 #include <stdlib.h>
7 
8 #include <cassert>
9 #include <cstdint>
10 #include <initializer_list>
11 #include <iosfwd>
12 #include <iterator>
13 #include <memory>
14 #include <string>
15 #include <vector>
16 
17 #include "absl/strings/internal/str_format/checker.h"
18 #include "absl/strings/internal/str_format/extension.h"
19 
20 namespace absl {
21 ABSL_NAMESPACE_BEGIN
22 namespace str_format_internal {
23 
24 enum class LengthMod : std::uint8_t { h, hh, l, ll, L, j, z, t, q, none };
25 
26 std::string LengthModToString(LengthMod v);
27 
28 // The analyzed properties of a single specified conversion.
29 struct UnboundConversion {
UnboundConversionUnboundConversion30   UnboundConversion()
31       : flags() /* This is required to zero all the fields of flags. */ {
32     flags.basic = true;
33   }
34 
35   class InputValue {
36    public:
set_valueUnboundConversion37     void set_value(int value) {
38       assert(value >= 0);
39       value_ = value;
40     }
valueUnboundConversion41     int value() const { return value_; }
42 
43     // Marks the value as "from arg". aka the '*' format.
44     // Requires `value >= 1`.
45     // When set, is_from_arg() return true and get_from_arg() returns the
46     // original value.
47     // `value()`'s return value is unspecfied in this state.
set_from_argUnboundConversion48     void set_from_arg(int value) {
49       assert(value > 0);
50       value_ = -value - 1;
51     }
is_from_argUnboundConversion52     bool is_from_arg() const { return value_ < -1; }
get_from_argUnboundConversion53     int get_from_arg() const {
54       assert(is_from_arg());
55       return -value_ - 1;
56     }
57 
58    private:
59     int value_ = -1;
60   };
61 
62   // No need to initialize. It will always be set in the parser.
63   int arg_position;
64 
65   InputValue width;
66   InputValue precision;
67 
68   Flags flags;
69   LengthMod length_mod = LengthMod::none;
70   FormatConversionChar conv = FormatConversionCharInternal::kNone;
71 };
72 
73 // Consume conversion spec prefix (not including '%') of [p, end) if valid.
74 // Examples of valid specs would be e.g.: "s", "d", "-12.6f".
75 // If valid, it returns the first character following the conversion spec,
76 // and the spec part is broken down and returned in 'conv'.
77 // If invalid, returns nullptr.
78 const char* ConsumeUnboundConversion(const char* p, const char* end,
79                                      UnboundConversion* conv, int* next_arg);
80 
81 // Helper tag class for the table below.
82 // It allows fast `char -> ConversionChar/LengthMod` checking and
83 // conversions.
84 class ConvTag {
85  public:
ConvTag(FormatConversionChar conversion_char)86   constexpr ConvTag(FormatConversionChar conversion_char)  // NOLINT
87       : tag_(static_cast<int8_t>(conversion_char)) {}
88   // We invert the length modifiers to make them negative so that we can easily
89   // test for them.
ConvTag(LengthMod length_mod)90   constexpr ConvTag(LengthMod length_mod)  // NOLINT
91       : tag_(~static_cast<std::int8_t>(length_mod)) {}
92   // Everything else is -128, which is negative to make is_conv() simpler.
ConvTag()93   constexpr ConvTag() : tag_(-128) {}
94 
is_conv()95   bool is_conv() const { return tag_ >= 0; }
is_length()96   bool is_length() const { return tag_ < 0 && tag_ != -128; }
as_conv()97   FormatConversionChar as_conv() const {
98     assert(is_conv());
99     return static_cast<FormatConversionChar>(tag_);
100   }
as_length()101   LengthMod as_length() const {
102     assert(is_length());
103     return static_cast<LengthMod>(~tag_);
104   }
105 
106  private:
107   std::int8_t tag_;
108 };
109 
110 extern const ConvTag kTags[256];
111 // Keep a single table for all the conversion chars and length modifiers.
GetTagForChar(char c)112 inline ConvTag GetTagForChar(char c) {
113   return kTags[static_cast<unsigned char>(c)];
114 }
115 
116 // Parse the format string provided in 'src' and pass the identified items into
117 // 'consumer'.
118 // Text runs will be passed by calling
119 //   Consumer::Append(string_view);
120 // ConversionItems will be passed by calling
121 //   Consumer::ConvertOne(UnboundConversion, string_view);
122 // In the case of ConvertOne, the string_view that is passed is the
123 // portion of the format string corresponding to the conversion, not including
124 // the leading %. On success, it returns true. On failure, it stops and returns
125 // false.
126 template <typename Consumer>
ParseFormatString(string_view src,Consumer consumer)127 bool ParseFormatString(string_view src, Consumer consumer) {
128   int next_arg = 0;
129   const char* p = src.data();
130   const char* const end = p + src.size();
131   while (p != end) {
132     const char* percent = static_cast<const char*>(memchr(p, '%', end - p));
133     if (!percent) {
134       // We found the last substring.
135       return consumer.Append(string_view(p, end - p));
136     }
137     // We found a percent, so push the text run then process the percent.
138     if (ABSL_PREDICT_FALSE(!consumer.Append(string_view(p, percent - p)))) {
139       return false;
140     }
141     if (ABSL_PREDICT_FALSE(percent + 1 >= end)) return false;
142 
143     auto tag = GetTagForChar(percent[1]);
144     if (tag.is_conv()) {
145       if (ABSL_PREDICT_FALSE(next_arg < 0)) {
146         // This indicates an error in the format string.
147         // The only way to get `next_arg < 0` here is to have a positional
148         // argument first which sets next_arg to -1 and then a non-positional
149         // argument.
150         return false;
151       }
152       p = percent + 2;
153 
154       // Keep this case separate from the one below.
155       // ConvertOne is more efficient when the compiler can see that the `basic`
156       // flag is set.
157       UnboundConversion conv;
158       conv.conv = tag.as_conv();
159       conv.arg_position = ++next_arg;
160       if (ABSL_PREDICT_FALSE(
161               !consumer.ConvertOne(conv, string_view(percent + 1, 1)))) {
162         return false;
163       }
164     } else if (percent[1] != '%') {
165       UnboundConversion conv;
166       p = ConsumeUnboundConversion(percent + 1, end, &conv, &next_arg);
167       if (ABSL_PREDICT_FALSE(p == nullptr)) return false;
168       if (ABSL_PREDICT_FALSE(!consumer.ConvertOne(
169           conv, string_view(percent + 1, p - (percent + 1))))) {
170         return false;
171       }
172     } else {
173       if (ABSL_PREDICT_FALSE(!consumer.Append("%"))) return false;
174       p = percent + 2;
175       continue;
176     }
177   }
178   return true;
179 }
180 
181 // Always returns true, or fails to compile in a constexpr context if s does not
182 // point to a constexpr char array.
EnsureConstexpr(string_view s)183 constexpr bool EnsureConstexpr(string_view s) {
184   return s.empty() || s[0] == s[0];
185 }
186 
187 class ParsedFormatBase {
188  public:
189   explicit ParsedFormatBase(
190       string_view format, bool allow_ignored,
191       std::initializer_list<FormatConversionCharSet> convs);
192 
ParsedFormatBase(const ParsedFormatBase & other)193   ParsedFormatBase(const ParsedFormatBase& other) { *this = other; }
194 
ParsedFormatBase(ParsedFormatBase && other)195   ParsedFormatBase(ParsedFormatBase&& other) { *this = std::move(other); }
196 
197   ParsedFormatBase& operator=(const ParsedFormatBase& other) {
198     if (this == &other) return *this;
199     has_error_ = other.has_error_;
200     items_ = other.items_;
201     size_t text_size = items_.empty() ? 0 : items_.back().text_end;
202     data_.reset(new char[text_size]);
203     memcpy(data_.get(), other.data_.get(), text_size);
204     return *this;
205   }
206 
207   ParsedFormatBase& operator=(ParsedFormatBase&& other) {
208     if (this == &other) return *this;
209     has_error_ = other.has_error_;
210     data_ = std::move(other.data_);
211     items_ = std::move(other.items_);
212     // Reset the vector to make sure the invariants hold.
213     other.items_.clear();
214     return *this;
215   }
216 
217   template <typename Consumer>
ProcessFormat(Consumer consumer)218   bool ProcessFormat(Consumer consumer) const {
219     const char* const base = data_.get();
220     string_view text(base, 0);
221     for (const auto& item : items_) {
222       const char* const end = text.data() + text.size();
223       text = string_view(end, (base + item.text_end) - end);
224       if (item.is_conversion) {
225         if (!consumer.ConvertOne(item.conv, text)) return false;
226       } else {
227         if (!consumer.Append(text)) return false;
228       }
229     }
230     return !has_error_;
231   }
232 
has_error()233   bool has_error() const { return has_error_; }
234 
235  private:
236   // Returns whether the conversions match and if !allow_ignored it verifies
237   // that all conversions are used by the format.
238   bool MatchesConversions(
239       bool allow_ignored,
240       std::initializer_list<FormatConversionCharSet> convs) const;
241 
242   struct ParsedFormatConsumer;
243 
244   struct ConversionItem {
245     bool is_conversion;
246     // Points to the past-the-end location of this element in the data_ array.
247     size_t text_end;
248     UnboundConversion conv;
249   };
250 
251   bool has_error_;
252   std::unique_ptr<char[]> data_;
253   std::vector<ConversionItem> items_;
254 };
255 
256 
257 // A value type representing a preparsed format.  These can be created, copied
258 // around, and reused to speed up formatting loops.
259 // The user must specify through the template arguments the conversion
260 // characters used in the format. This will be checked at compile time.
261 //
262 // This class uses Conv enum values to specify each argument.
263 // This allows for more flexibility as you can specify multiple possible
264 // conversion characters for each argument.
265 // ParsedFormat<char...> is a simplified alias for when the user only
266 // needs to specify a single conversion character for each argument.
267 //
268 // Example:
269 //   // Extended format supports multiple characters per argument:
270 //   using MyFormat = ExtendedParsedFormat<Conv::d | Conv::x>;
271 //   MyFormat GetFormat(bool use_hex) {
272 //     if (use_hex) return MyFormat("foo %x bar");
273 //     return MyFormat("foo %d bar");
274 //   }
275 //   // 'format' can be used with any value that supports 'd' and 'x',
276 //   // like `int`.
277 //   auto format = GetFormat(use_hex);
278 //   value = StringF(format, i);
279 //
280 // This class also supports runtime format checking with the ::New() and
281 // ::NewAllowIgnored() factory functions.
282 // This is the only API that allows the user to pass a runtime specified format
283 // string. These factory functions will return NULL if the format does not match
284 // the conversions requested by the user.
285 template <FormatConversionCharSet... C>
286 class ExtendedParsedFormat : public str_format_internal::ParsedFormatBase {
287  public:
ExtendedParsedFormat(string_view format)288   explicit ExtendedParsedFormat(string_view format)
289 #ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
290       __attribute__((
291           enable_if(str_format_internal::EnsureConstexpr(format),
292                     "Format string is not constexpr."),
293           enable_if(str_format_internal::ValidFormatImpl<C...>(format),
294                     "Format specified does not match the template arguments.")))
295 #endif  // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
296       : ExtendedParsedFormat(format, false) {
297   }
298 
299   // ExtendedParsedFormat factory function.
300   // The user still has to specify the conversion characters, but they will not
301   // be checked at compile time. Instead, it will be checked at runtime.
302   // This delays the checking to runtime, but allows the user to pass
303   // dynamically sourced formats.
304   // It returns NULL if the format does not match the conversion characters.
305   // The user is responsible for checking the return value before using it.
306   //
307   // The 'New' variant will check that all the specified arguments are being
308   // consumed by the format and return NULL if any argument is being ignored.
309   // The 'NewAllowIgnored' variant will not verify this and will allow formats
310   // that ignore arguments.
New(string_view format)311   static std::unique_ptr<ExtendedParsedFormat> New(string_view format) {
312     return New(format, false);
313   }
NewAllowIgnored(string_view format)314   static std::unique_ptr<ExtendedParsedFormat> NewAllowIgnored(
315       string_view format) {
316     return New(format, true);
317   }
318 
319  private:
New(string_view format,bool allow_ignored)320   static std::unique_ptr<ExtendedParsedFormat> New(string_view format,
321                                                    bool allow_ignored) {
322     std::unique_ptr<ExtendedParsedFormat> conv(
323         new ExtendedParsedFormat(format, allow_ignored));
324     if (conv->has_error()) return nullptr;
325     return conv;
326   }
327 
ExtendedParsedFormat(string_view s,bool allow_ignored)328   ExtendedParsedFormat(string_view s, bool allow_ignored)
329       : ParsedFormatBase(s, allow_ignored, {C...}) {}
330 };
331 }  // namespace str_format_internal
332 ABSL_NAMESPACE_END
333 }  // namespace absl
334 
335 #endif  // ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
336