• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
16 #define ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
17 
18 #include <limits.h>
19 #include <stddef.h>
20 #include <stdlib.h>
21 
22 #include <cassert>
23 #include <cstdint>
24 #include <initializer_list>
25 #include <iosfwd>
26 #include <iterator>
27 #include <memory>
28 #include <string>
29 #include <vector>
30 
31 #include "absl/strings/internal/str_format/checker.h"
32 #include "absl/strings/internal/str_format/extension.h"
33 
34 namespace absl {
35 ABSL_NAMESPACE_BEGIN
36 namespace str_format_internal {
37 
38 enum class LengthMod : std::uint8_t { h, hh, l, ll, L, j, z, t, q, none };
39 
40 std::string LengthModToString(LengthMod v);
41 
42 // The analyzed properties of a single specified conversion.
43 struct UnboundConversion {
UnboundConversionUnboundConversion44   UnboundConversion() {}
45 
46   class InputValue {
47    public:
set_valueUnboundConversion48     void set_value(int value) {
49       assert(value >= 0);
50       value_ = value;
51     }
valueUnboundConversion52     int value() const { return value_; }
53 
54     // Marks the value as "from arg". aka the '*' format.
55     // Requires `value >= 1`.
56     // When set, is_from_arg() return true and get_from_arg() returns the
57     // original value.
58     // `value()`'s return value is unspecfied in this state.
set_from_argUnboundConversion59     void set_from_arg(int value) {
60       assert(value > 0);
61       value_ = -value - 1;
62     }
is_from_argUnboundConversion63     bool is_from_arg() const { return value_ < -1; }
get_from_argUnboundConversion64     int get_from_arg() const {
65       assert(is_from_arg());
66       return -value_ - 1;
67     }
68 
69    private:
70     int value_ = -1;
71   };
72 
73   // No need to initialize. It will always be set in the parser.
74   int arg_position;
75 
76   InputValue width;
77   InputValue precision;
78 
79   Flags flags = Flags::kBasic;
80   LengthMod length_mod = LengthMod::none;
81   FormatConversionChar conv = FormatConversionCharInternal::kNone;
82 };
83 
84 // Consume conversion spec prefix (not including '%') of [p, end) if valid.
85 // Examples of valid specs would be e.g.: "s", "d", "-12.6f".
86 // If valid, it returns the first character following the conversion spec,
87 // and the spec part is broken down and returned in 'conv'.
88 // If invalid, returns nullptr.
89 const char* ConsumeUnboundConversion(const char* p, const char* end,
90                                      UnboundConversion* conv, int* next_arg);
91 
92 // Helper tag class for the table below.
93 // It allows fast `char -> ConversionChar/LengthMod/Flags` checking and
94 // conversions.
95 class ConvTag {
96  public:
ConvTag(FormatConversionChar conversion_char)97   constexpr ConvTag(FormatConversionChar conversion_char)  // NOLINT
98       : tag_(static_cast<uint8_t>(conversion_char)) {}
ConvTag(LengthMod length_mod)99   constexpr ConvTag(LengthMod length_mod)  // NOLINT
100       : tag_(0x80 | static_cast<uint8_t>(length_mod)) {}
ConvTag(Flags flags)101   constexpr ConvTag(Flags flags)  // NOLINT
102       : tag_(0xc0 | static_cast<uint8_t>(flags)) {}
ConvTag()103   constexpr ConvTag() : tag_(0xFF) {}
104 
is_conv()105   bool is_conv() const { return (tag_ & 0x80) == 0; }
is_length()106   bool is_length() const { return (tag_ & 0xC0) == 0x80; }
is_flags()107   bool is_flags() const { return (tag_ & 0xE0) == 0xC0; }
108 
as_conv()109   FormatConversionChar as_conv() const {
110     assert(is_conv());
111     assert(!is_length());
112     assert(!is_flags());
113     return static_cast<FormatConversionChar>(tag_);
114   }
as_length()115   LengthMod as_length() const {
116     assert(!is_conv());
117     assert(is_length());
118     assert(!is_flags());
119     return static_cast<LengthMod>(tag_ & 0x3F);
120   }
as_flags()121   Flags as_flags() const {
122     assert(!is_conv());
123     assert(!is_length());
124     assert(is_flags());
125     return static_cast<Flags>(tag_ & 0x1F);
126   }
127 
128  private:
129   uint8_t tag_;
130 };
131 
132 extern const ConvTag kTags[256];
133 // Keep a single table for all the conversion chars and length modifiers.
GetTagForChar(char c)134 inline ConvTag GetTagForChar(char c) {
135   return kTags[static_cast<unsigned char>(c)];
136 }
137 
138 // Parse the format string provided in 'src' and pass the identified items into
139 // 'consumer'.
140 // Text runs will be passed by calling
141 //   Consumer::Append(string_view);
142 // ConversionItems will be passed by calling
143 //   Consumer::ConvertOne(UnboundConversion, string_view);
144 // In the case of ConvertOne, the string_view that is passed is the
145 // portion of the format string corresponding to the conversion, not including
146 // the leading %. On success, it returns true. On failure, it stops and returns
147 // false.
148 template <typename Consumer>
ParseFormatString(string_view src,Consumer consumer)149 bool ParseFormatString(string_view src, Consumer consumer) {
150   int next_arg = 0;
151   const char* p = src.data();
152   const char* const end = p + src.size();
153   while (p != end) {
154     const char* percent = static_cast<const char*>(memchr(p, '%', end - p));
155     if (!percent) {
156       // We found the last substring.
157       return consumer.Append(string_view(p, end - p));
158     }
159     // We found a percent, so push the text run then process the percent.
160     if (ABSL_PREDICT_FALSE(!consumer.Append(string_view(p, percent - p)))) {
161       return false;
162     }
163     if (ABSL_PREDICT_FALSE(percent + 1 >= end)) return false;
164 
165     auto tag = GetTagForChar(percent[1]);
166     if (tag.is_conv()) {
167       if (ABSL_PREDICT_FALSE(next_arg < 0)) {
168         // This indicates an error in the format string.
169         // The only way to get `next_arg < 0` here is to have a positional
170         // argument first which sets next_arg to -1 and then a non-positional
171         // argument.
172         return false;
173       }
174       p = percent + 2;
175 
176       // Keep this case separate from the one below.
177       // ConvertOne is more efficient when the compiler can see that the `basic`
178       // flag is set.
179       UnboundConversion conv;
180       conv.conv = tag.as_conv();
181       conv.arg_position = ++next_arg;
182       if (ABSL_PREDICT_FALSE(
183               !consumer.ConvertOne(conv, string_view(percent + 1, 1)))) {
184         return false;
185       }
186     } else if (percent[1] != '%') {
187       UnboundConversion conv;
188       p = ConsumeUnboundConversion(percent + 1, end, &conv, &next_arg);
189       if (ABSL_PREDICT_FALSE(p == nullptr)) return false;
190       if (ABSL_PREDICT_FALSE(!consumer.ConvertOne(
191           conv, string_view(percent + 1, p - (percent + 1))))) {
192         return false;
193       }
194     } else {
195       if (ABSL_PREDICT_FALSE(!consumer.Append("%"))) return false;
196       p = percent + 2;
197       continue;
198     }
199   }
200   return true;
201 }
202 
203 // Always returns true, or fails to compile in a constexpr context if s does not
204 // point to a constexpr char array.
EnsureConstexpr(string_view s)205 constexpr bool EnsureConstexpr(string_view s) {
206   return s.empty() || s[0] == s[0];
207 }
208 
209 class ParsedFormatBase {
210  public:
211   explicit ParsedFormatBase(
212       string_view format, bool allow_ignored,
213       std::initializer_list<FormatConversionCharSet> convs);
214 
ParsedFormatBase(const ParsedFormatBase & other)215   ParsedFormatBase(const ParsedFormatBase& other) { *this = other; }
216 
ParsedFormatBase(ParsedFormatBase && other)217   ParsedFormatBase(ParsedFormatBase&& other) { *this = std::move(other); }
218 
219   ParsedFormatBase& operator=(const ParsedFormatBase& other) {
220     if (this == &other) return *this;
221     has_error_ = other.has_error_;
222     items_ = other.items_;
223     size_t text_size = items_.empty() ? 0 : items_.back().text_end;
224     data_.reset(new char[text_size]);
225     memcpy(data_.get(), other.data_.get(), text_size);
226     return *this;
227   }
228 
229   ParsedFormatBase& operator=(ParsedFormatBase&& other) {
230     if (this == &other) return *this;
231     has_error_ = other.has_error_;
232     data_ = std::move(other.data_);
233     items_ = std::move(other.items_);
234     // Reset the vector to make sure the invariants hold.
235     other.items_.clear();
236     return *this;
237   }
238 
239   template <typename Consumer>
ProcessFormat(Consumer consumer)240   bool ProcessFormat(Consumer consumer) const {
241     const char* const base = data_.get();
242     string_view text(base, 0);
243     for (const auto& item : items_) {
244       const char* const end = text.data() + text.size();
245       text = string_view(end, (base + item.text_end) - end);
246       if (item.is_conversion) {
247         if (!consumer.ConvertOne(item.conv, text)) return false;
248       } else {
249         if (!consumer.Append(text)) return false;
250       }
251     }
252     return !has_error_;
253   }
254 
has_error()255   bool has_error() const { return has_error_; }
256 
257  private:
258   // Returns whether the conversions match and if !allow_ignored it verifies
259   // that all conversions are used by the format.
260   bool MatchesConversions(
261       bool allow_ignored,
262       std::initializer_list<FormatConversionCharSet> convs) const;
263 
264   struct ParsedFormatConsumer;
265 
266   struct ConversionItem {
267     bool is_conversion;
268     // Points to the past-the-end location of this element in the data_ array.
269     size_t text_end;
270     UnboundConversion conv;
271   };
272 
273   bool has_error_;
274   std::unique_ptr<char[]> data_;
275   std::vector<ConversionItem> items_;
276 };
277 
278 
279 // A value type representing a preparsed format.  These can be created, copied
280 // around, and reused to speed up formatting loops.
281 // The user must specify through the template arguments the conversion
282 // characters used in the format. This will be checked at compile time.
283 //
284 // This class uses Conv enum values to specify each argument.
285 // This allows for more flexibility as you can specify multiple possible
286 // conversion characters for each argument.
287 // ParsedFormat<char...> is a simplified alias for when the user only
288 // needs to specify a single conversion character for each argument.
289 //
290 // Example:
291 //   // Extended format supports multiple characters per argument:
292 //   using MyFormat = ExtendedParsedFormat<Conv::d | Conv::x>;
293 //   MyFormat GetFormat(bool use_hex) {
294 //     if (use_hex) return MyFormat("foo %x bar");
295 //     return MyFormat("foo %d bar");
296 //   }
297 //   // 'format' can be used with any value that supports 'd' and 'x',
298 //   // like `int`.
299 //   auto format = GetFormat(use_hex);
300 //   value = StringF(format, i);
301 //
302 // This class also supports runtime format checking with the ::New() and
303 // ::NewAllowIgnored() factory functions.
304 // This is the only API that allows the user to pass a runtime specified format
305 // string. These factory functions will return NULL if the format does not match
306 // the conversions requested by the user.
307 template <FormatConversionCharSet... C>
308 class ExtendedParsedFormat : public str_format_internal::ParsedFormatBase {
309  public:
ExtendedParsedFormat(string_view format)310   explicit ExtendedParsedFormat(string_view format)
311 #ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
312       __attribute__((
313           enable_if(str_format_internal::EnsureConstexpr(format),
314                     "Format string is not constexpr."),
315           enable_if(str_format_internal::ValidFormatImpl<C...>(format),
316                     "Format specified does not match the template arguments.")))
317 #endif  // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
318       : ExtendedParsedFormat(format, false) {
319   }
320 
321   // ExtendedParsedFormat factory function.
322   // The user still has to specify the conversion characters, but they will not
323   // be checked at compile time. Instead, it will be checked at runtime.
324   // This delays the checking to runtime, but allows the user to pass
325   // dynamically sourced formats.
326   // It returns NULL if the format does not match the conversion characters.
327   // The user is responsible for checking the return value before using it.
328   //
329   // The 'New' variant will check that all the specified arguments are being
330   // consumed by the format and return NULL if any argument is being ignored.
331   // The 'NewAllowIgnored' variant will not verify this and will allow formats
332   // that ignore arguments.
New(string_view format)333   static std::unique_ptr<ExtendedParsedFormat> New(string_view format) {
334     return New(format, false);
335   }
NewAllowIgnored(string_view format)336   static std::unique_ptr<ExtendedParsedFormat> NewAllowIgnored(
337       string_view format) {
338     return New(format, true);
339   }
340 
341  private:
New(string_view format,bool allow_ignored)342   static std::unique_ptr<ExtendedParsedFormat> New(string_view format,
343                                                    bool allow_ignored) {
344     std::unique_ptr<ExtendedParsedFormat> conv(
345         new ExtendedParsedFormat(format, allow_ignored));
346     if (conv->has_error()) return nullptr;
347     return conv;
348   }
349 
ExtendedParsedFormat(string_view s,bool allow_ignored)350   ExtendedParsedFormat(string_view s, bool allow_ignored)
351       : ParsedFormatBase(s, allow_ignored, {C...}) {}
352 };
353 }  // namespace str_format_internal
354 ABSL_NAMESPACE_END
355 }  // namespace absl
356 
357 #endif  // ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
358