• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "absl/strings/internal/str_format/parser.h"
16 
17 #include <assert.h>
18 #include <string.h>
19 #include <wchar.h>
20 #include <cctype>
21 #include <cstdint>
22 
23 #include <algorithm>
24 #include <initializer_list>
25 #include <limits>
26 #include <ostream>
27 #include <string>
28 #include <unordered_set>
29 
30 namespace absl {
31 ABSL_NAMESPACE_BEGIN
32 namespace str_format_internal {
33 
34 using CC = FormatConversionCharInternal;
35 using LM = LengthMod;
36 
37 // Abbreviations to fit in the table below.
38 constexpr auto f_sign = Flags::kSignCol;
39 constexpr auto f_alt = Flags::kAlt;
40 constexpr auto f_pos = Flags::kShowPos;
41 constexpr auto f_left = Flags::kLeft;
42 constexpr auto f_zero = Flags::kZero;
43 
44 ABSL_CONST_INIT const ConvTag kTags[256] = {
45     {},     {},    {},    {},    {},    {},     {},    {},     // 00-07
46     {},     {},    {},    {},    {},    {},     {},    {},     // 08-0f
47     {},     {},    {},    {},    {},    {},     {},    {},     // 10-17
48     {},     {},    {},    {},    {},    {},     {},    {},     // 18-1f
49     f_sign, {},    {},    f_alt, {},    {},     {},    {},     //  !"#$%&'
50     {},     {},    {},    f_pos, {},    f_left, {},    {},     // ()*+,-./
51     f_zero, {},    {},    {},    {},    {},     {},    {},     // 01234567
52     {},     {},    {},    {},    {},    {},     {},    {},     // 89:;<=>?
53     {},     CC::A, {},    {},    {},    CC::E,  CC::F, CC::G,  // @ABCDEFG
54     {},     {},    {},    {},    LM::L, {},     {},    {},     // HIJKLMNO
55     {},     {},    {},    {},    {},    {},     {},    {},     // PQRSTUVW
56     CC::X,  {},    {},    {},    {},    {},     {},    {},     // XYZ[\]^_
57     {},     CC::a, {},    CC::c, CC::d, CC::e,  CC::f, CC::g,  // `abcdefg
58     LM::h,  CC::i, LM::j, {},    LM::l, {},     CC::n, CC::o,  // hijklmno
59     CC::p,  LM::q, {},    CC::s, LM::t, CC::u,  {},    {},     // pqrstuvw
60     CC::x,  {},    LM::z, {},    {},    {},     {},    {},     // xyz{|}!
61     {},     {},    {},    {},    {},    {},     {},    {},     // 80-87
62     {},     {},    {},    {},    {},    {},     {},    {},     // 88-8f
63     {},     {},    {},    {},    {},    {},     {},    {},     // 90-97
64     {},     {},    {},    {},    {},    {},     {},    {},     // 98-9f
65     {},     {},    {},    {},    {},    {},     {},    {},     // a0-a7
66     {},     {},    {},    {},    {},    {},     {},    {},     // a8-af
67     {},     {},    {},    {},    {},    {},     {},    {},     // b0-b7
68     {},     {},    {},    {},    {},    {},     {},    {},     // b8-bf
69     {},     {},    {},    {},    {},    {},     {},    {},     // c0-c7
70     {},     {},    {},    {},    {},    {},     {},    {},     // c8-cf
71     {},     {},    {},    {},    {},    {},     {},    {},     // d0-d7
72     {},     {},    {},    {},    {},    {},     {},    {},     // d8-df
73     {},     {},    {},    {},    {},    {},     {},    {},     // e0-e7
74     {},     {},    {},    {},    {},    {},     {},    {},     // e8-ef
75     {},     {},    {},    {},    {},    {},     {},    {},     // f0-f7
76     {},     {},    {},    {},    {},    {},     {},    {},     // f8-ff
77 };
78 
79 namespace {
80 
CheckFastPathSetting(const UnboundConversion & conv)81 bool CheckFastPathSetting(const UnboundConversion& conv) {
82   bool width_precision_needed =
83       conv.width.value() >= 0 || conv.precision.value() >= 0;
84   if (width_precision_needed && conv.flags == Flags::kBasic) {
85     fprintf(stderr,
86             "basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d "
87             "width=%d precision=%d\n",
88             conv.flags == Flags::kBasic ? 1 : 0,
89             FlagsContains(conv.flags, Flags::kLeft) ? 1 : 0,
90             FlagsContains(conv.flags, Flags::kShowPos) ? 1 : 0,
91             FlagsContains(conv.flags, Flags::kSignCol) ? 1 : 0,
92             FlagsContains(conv.flags, Flags::kAlt) ? 1 : 0,
93             FlagsContains(conv.flags, Flags::kZero) ? 1 : 0, conv.width.value(),
94             conv.precision.value());
95     return false;
96   }
97   return true;
98 }
99 
100 template <bool is_positional>
ConsumeConversion(const char * pos,const char * const end,UnboundConversion * conv,int * next_arg)101 const char *ConsumeConversion(const char *pos, const char *const end,
102                               UnboundConversion *conv, int *next_arg) {
103   const char* const original_pos = pos;
104   char c;
105   // Read the next char into `c` and update `pos`. Returns false if there are
106   // no more chars to read.
107 #define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR()          \
108   do {                                                  \
109     if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \
110     c = *pos++;                                         \
111   } while (0)
112 
113   const auto parse_digits = [&] {
114     int digits = c - '0';
115     // We do not want to overflow `digits` so we consume at most digits10
116     // digits. If there are more digits the parsing will fail later on when the
117     // digit doesn't match the expected characters.
118     int num_digits = std::numeric_limits<int>::digits10;
119     for (;;) {
120       if (ABSL_PREDICT_FALSE(pos == end)) break;
121       c = *pos++;
122       if (!std::isdigit(c)) break;
123       --num_digits;
124       if (ABSL_PREDICT_FALSE(!num_digits)) break;
125       digits = 10 * digits + c - '0';
126     }
127     return digits;
128   };
129 
130   if (is_positional) {
131     ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
132     if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
133     conv->arg_position = parse_digits();
134     assert(conv->arg_position > 0);
135     if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
136   }
137 
138   ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
139 
140   // We should start with the basic flag on.
141   assert(conv->flags == Flags::kBasic);
142 
143   // Any non alpha character makes this conversion not basic.
144   // This includes flags (-+ #0), width (1-9, *) or precision (.).
145   // All conversion characters and length modifiers are alpha characters.
146   if (c < 'A') {
147     while (c <= '0') {
148       auto tag = GetTagForChar(c);
149       if (tag.is_flags()) {
150         conv->flags = conv->flags | tag.as_flags();
151         ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
152       } else {
153         break;
154       }
155     }
156 
157     if (c <= '9') {
158       if (c >= '0') {
159         int maybe_width = parse_digits();
160         if (!is_positional && c == '$') {
161           if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr;
162           // Positional conversion.
163           *next_arg = -1;
164           return ConsumeConversion<true>(original_pos, end, conv, next_arg);
165         }
166         conv->flags = conv->flags | Flags::kNonBasic;
167         conv->width.set_value(maybe_width);
168       } else if (c == '*') {
169         conv->flags = conv->flags | Flags::kNonBasic;
170         ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
171         if (is_positional) {
172           if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
173           conv->width.set_from_arg(parse_digits());
174           if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
175           ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
176         } else {
177           conv->width.set_from_arg(++*next_arg);
178         }
179       }
180     }
181 
182     if (c == '.') {
183       conv->flags = conv->flags | Flags::kNonBasic;
184       ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
185       if (std::isdigit(c)) {
186         conv->precision.set_value(parse_digits());
187       } else if (c == '*') {
188         ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
189         if (is_positional) {
190           if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
191           conv->precision.set_from_arg(parse_digits());
192           if (c != '$') return nullptr;
193           ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
194         } else {
195           conv->precision.set_from_arg(++*next_arg);
196         }
197       } else {
198         conv->precision.set_value(0);
199       }
200     }
201   }
202 
203   auto tag = GetTagForChar(c);
204 
205   if (ABSL_PREDICT_FALSE(!tag.is_conv())) {
206     if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr;
207 
208     // It is a length modifier.
209     using str_format_internal::LengthMod;
210     LengthMod length_mod = tag.as_length();
211     ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
212     if (c == 'h' && length_mod == LengthMod::h) {
213       conv->length_mod = LengthMod::hh;
214       ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
215     } else if (c == 'l' && length_mod == LengthMod::l) {
216       conv->length_mod = LengthMod::ll;
217       ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
218     } else {
219       conv->length_mod = length_mod;
220     }
221     tag = GetTagForChar(c);
222     if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr;
223   }
224 
225   assert(CheckFastPathSetting(*conv));
226   (void)(&CheckFastPathSetting);
227 
228   conv->conv = tag.as_conv();
229   if (!is_positional) conv->arg_position = ++*next_arg;
230   return pos;
231 }
232 
233 }  // namespace
234 
LengthModToString(LengthMod v)235 std::string LengthModToString(LengthMod v) {
236   switch (v) {
237     case LengthMod::h:
238       return "h";
239     case LengthMod::hh:
240       return "hh";
241     case LengthMod::l:
242       return "l";
243     case LengthMod::ll:
244       return "ll";
245     case LengthMod::L:
246       return "L";
247     case LengthMod::j:
248       return "j";
249     case LengthMod::z:
250       return "z";
251     case LengthMod::t:
252       return "t";
253     case LengthMod::q:
254       return "q";
255     case LengthMod::none:
256       return "";
257   }
258   return "";
259 }
260 
ConsumeUnboundConversion(const char * p,const char * end,UnboundConversion * conv,int * next_arg)261 const char *ConsumeUnboundConversion(const char *p, const char *end,
262                                      UnboundConversion *conv, int *next_arg) {
263   if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg);
264   return ConsumeConversion<false>(p, end, conv, next_arg);
265 }
266 
267 struct ParsedFormatBase::ParsedFormatConsumer {
ParsedFormatConsumerabsl::str_format_internal::ParsedFormatBase::ParsedFormatConsumer268   explicit ParsedFormatConsumer(ParsedFormatBase *parsedformat)
269       : parsed(parsedformat), data_pos(parsedformat->data_.get()) {}
270 
Appendabsl::str_format_internal::ParsedFormatBase::ParsedFormatConsumer271   bool Append(string_view s) {
272     if (s.empty()) return true;
273 
274     size_t text_end = AppendText(s);
275 
276     if (!parsed->items_.empty() && !parsed->items_.back().is_conversion) {
277       // Let's extend the existing text run.
278       parsed->items_.back().text_end = text_end;
279     } else {
280       // Let's make a new text run.
281       parsed->items_.push_back({false, text_end, {}});
282     }
283     return true;
284   }
285 
ConvertOneabsl::str_format_internal::ParsedFormatBase::ParsedFormatConsumer286   bool ConvertOne(const UnboundConversion &conv, string_view s) {
287     size_t text_end = AppendText(s);
288     parsed->items_.push_back({true, text_end, conv});
289     return true;
290   }
291 
AppendTextabsl::str_format_internal::ParsedFormatBase::ParsedFormatConsumer292   size_t AppendText(string_view s) {
293     memcpy(data_pos, s.data(), s.size());
294     data_pos += s.size();
295     return static_cast<size_t>(data_pos - parsed->data_.get());
296   }
297 
298   ParsedFormatBase *parsed;
299   char* data_pos;
300 };
301 
ParsedFormatBase(string_view format,bool allow_ignored,std::initializer_list<FormatConversionCharSet> convs)302 ParsedFormatBase::ParsedFormatBase(
303     string_view format, bool allow_ignored,
304     std::initializer_list<FormatConversionCharSet> convs)
305     : data_(format.empty() ? nullptr : new char[format.size()]) {
306   has_error_ = !ParseFormatString(format, ParsedFormatConsumer(this)) ||
307                !MatchesConversions(allow_ignored, convs);
308 }
309 
MatchesConversions(bool allow_ignored,std::initializer_list<FormatConversionCharSet> convs) const310 bool ParsedFormatBase::MatchesConversions(
311     bool allow_ignored,
312     std::initializer_list<FormatConversionCharSet> convs) const {
313   std::unordered_set<int> used;
314   auto add_if_valid_conv = [&](int pos, char c) {
315       if (static_cast<size_t>(pos) > convs.size() ||
316           !Contains(convs.begin()[pos - 1], c))
317         return false;
318       used.insert(pos);
319       return true;
320   };
321   for (const ConversionItem &item : items_) {
322     if (!item.is_conversion) continue;
323     auto &conv = item.conv;
324     if (conv.precision.is_from_arg() &&
325         !add_if_valid_conv(conv.precision.get_from_arg(), '*'))
326       return false;
327     if (conv.width.is_from_arg() &&
328         !add_if_valid_conv(conv.width.get_from_arg(), '*'))
329       return false;
330     if (!add_if_valid_conv(conv.arg_position,
331                            FormatConversionCharToChar(conv.conv)))
332       return false;
333   }
334   return used.size() == convs.size() || allow_ignored;
335 }
336 
337 }  // namespace str_format_internal
338 ABSL_NAMESPACE_END
339 }  // namespace absl
340