• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "absl/strings/internal/str_format/parser.h"
2 
3 #include <assert.h>
4 #include <string.h>
5 #include <wchar.h>
6 #include <cctype>
7 #include <cstdint>
8 
9 #include <algorithm>
10 #include <initializer_list>
11 #include <limits>
12 #include <ostream>
13 #include <string>
14 #include <unordered_set>
15 
16 namespace absl {
17 ABSL_NAMESPACE_BEGIN
18 namespace str_format_internal {
19 
20 using CC = FormatConversionCharInternal;
21 using LM = LengthMod;
22 
23 ABSL_CONST_INIT const ConvTag kTags[256] = {
24     {},    {},    {},    {},    {},    {},    {},    {},     // 00-07
25     {},    {},    {},    {},    {},    {},    {},    {},     // 08-0f
26     {},    {},    {},    {},    {},    {},    {},    {},     // 10-17
27     {},    {},    {},    {},    {},    {},    {},    {},     // 18-1f
28     {},    {},    {},    {},    {},    {},    {},    {},     // 20-27
29     {},    {},    {},    {},    {},    {},    {},    {},     // 28-2f
30     {},    {},    {},    {},    {},    {},    {},    {},     // 30-37
31     {},    {},    {},    {},    {},    {},    {},    {},     // 38-3f
32     {},    CC::A, {},    {},    {},    CC::E, CC::F, CC::G,  // @ABCDEFG
33     {},    {},    {},    {},    LM::L, {},    {},    {},     // HIJKLMNO
34     {},    {},    {},    {},    {},    {},    {},    {},     // PQRSTUVW
35     CC::X, {},    {},    {},    {},    {},    {},    {},     // XYZ[\]^_
36     {},    CC::a, {},    CC::c, CC::d, CC::e, CC::f, CC::g,  // `abcdefg
37     LM::h, CC::i, LM::j, {},    LM::l, {},    CC::n, CC::o,  // hijklmno
38     CC::p, LM::q, {},    CC::s, LM::t, CC::u, {},    {},     // pqrstuvw
39     CC::x, {},    LM::z, {},    {},    {},    {},    {},     // xyz{|}!
40     {},    {},    {},    {},    {},    {},    {},    {},     // 80-87
41     {},    {},    {},    {},    {},    {},    {},    {},     // 88-8f
42     {},    {},    {},    {},    {},    {},    {},    {},     // 90-97
43     {},    {},    {},    {},    {},    {},    {},    {},     // 98-9f
44     {},    {},    {},    {},    {},    {},    {},    {},     // a0-a7
45     {},    {},    {},    {},    {},    {},    {},    {},     // a8-af
46     {},    {},    {},    {},    {},    {},    {},    {},     // b0-b7
47     {},    {},    {},    {},    {},    {},    {},    {},     // b8-bf
48     {},    {},    {},    {},    {},    {},    {},    {},     // c0-c7
49     {},    {},    {},    {},    {},    {},    {},    {},     // c8-cf
50     {},    {},    {},    {},    {},    {},    {},    {},     // d0-d7
51     {},    {},    {},    {},    {},    {},    {},    {},     // d8-df
52     {},    {},    {},    {},    {},    {},    {},    {},     // e0-e7
53     {},    {},    {},    {},    {},    {},    {},    {},     // e8-ef
54     {},    {},    {},    {},    {},    {},    {},    {},     // f0-f7
55     {},    {},    {},    {},    {},    {},    {},    {},     // f8-ff
56 };
57 
58 namespace {
59 
CheckFastPathSetting(const UnboundConversion & conv)60 bool CheckFastPathSetting(const UnboundConversion& conv) {
61   bool should_be_basic = !conv.flags.left &&      //
62                          !conv.flags.show_pos &&  //
63                          !conv.flags.sign_col &&  //
64                          !conv.flags.alt &&       //
65                          !conv.flags.zero &&      //
66                          (conv.width.value() == -1) &&
67                          (conv.precision.value() == -1);
68   if (should_be_basic != conv.flags.basic) {
69     fprintf(stderr,
70             "basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d "
71             "width=%d precision=%d\n",
72             conv.flags.basic, conv.flags.left, conv.flags.show_pos,
73             conv.flags.sign_col, conv.flags.alt, conv.flags.zero,
74             conv.width.value(), conv.precision.value());
75   }
76   return should_be_basic == conv.flags.basic;
77 }
78 
79 template <bool is_positional>
ConsumeConversion(const char * pos,const char * const end,UnboundConversion * conv,int * next_arg)80 const char *ConsumeConversion(const char *pos, const char *const end,
81                               UnboundConversion *conv, int *next_arg) {
82   const char* const original_pos = pos;
83   char c;
84   // Read the next char into `c` and update `pos`. Returns false if there are
85   // no more chars to read.
86 #define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR()          \
87   do {                                                  \
88     if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \
89     c = *pos++;                                         \
90   } while (0)
91 
92   const auto parse_digits = [&] {
93     int digits = c - '0';
94     // We do not want to overflow `digits` so we consume at most digits10
95     // digits. If there are more digits the parsing will fail later on when the
96     // digit doesn't match the expected characters.
97     int num_digits = std::numeric_limits<int>::digits10;
98     for (;;) {
99       if (ABSL_PREDICT_FALSE(pos == end)) break;
100       c = *pos++;
101       if (!std::isdigit(c)) break;
102       --num_digits;
103       if (ABSL_PREDICT_FALSE(!num_digits)) break;
104       digits = 10 * digits + c - '0';
105     }
106     return digits;
107   };
108 
109   if (is_positional) {
110     ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
111     if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
112     conv->arg_position = parse_digits();
113     assert(conv->arg_position > 0);
114     if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
115   }
116 
117   ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
118 
119   // We should start with the basic flag on.
120   assert(conv->flags.basic);
121 
122   // Any non alpha character makes this conversion not basic.
123   // This includes flags (-+ #0), width (1-9, *) or precision (.).
124   // All conversion characters and length modifiers are alpha characters.
125   if (c < 'A') {
126     conv->flags.basic = false;
127 
128     for (; c <= '0';) {
129       // FIXME: We might be able to speed this up reusing the lookup table from
130       // above. It might require changing Flags to be a plain integer where we
131       // can |= a value.
132       switch (c) {
133         case '-':
134           conv->flags.left = true;
135           break;
136         case '+':
137           conv->flags.show_pos = true;
138           break;
139         case ' ':
140           conv->flags.sign_col = true;
141           break;
142         case '#':
143           conv->flags.alt = true;
144           break;
145         case '0':
146           conv->flags.zero = true;
147           break;
148         default:
149           goto flags_done;
150       }
151       ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
152     }
153 flags_done:
154 
155     if (c <= '9') {
156       if (c >= '0') {
157         int maybe_width = parse_digits();
158         if (!is_positional && c == '$') {
159           if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr;
160           // Positional conversion.
161           *next_arg = -1;
162           conv->flags = Flags();
163           conv->flags.basic = true;
164           return ConsumeConversion<true>(original_pos, end, conv, next_arg);
165         }
166         conv->width.set_value(maybe_width);
167       } else if (c == '*') {
168         ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
169         if (is_positional) {
170           if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
171           conv->width.set_from_arg(parse_digits());
172           if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
173           ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
174         } else {
175           conv->width.set_from_arg(++*next_arg);
176         }
177       }
178     }
179 
180     if (c == '.') {
181       ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
182       if (std::isdigit(c)) {
183         conv->precision.set_value(parse_digits());
184       } else if (c == '*') {
185         ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
186         if (is_positional) {
187           if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
188           conv->precision.set_from_arg(parse_digits());
189           if (c != '$') return nullptr;
190           ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
191         } else {
192           conv->precision.set_from_arg(++*next_arg);
193         }
194       } else {
195         conv->precision.set_value(0);
196       }
197     }
198   }
199 
200   auto tag = GetTagForChar(c);
201 
202   if (ABSL_PREDICT_FALSE(!tag.is_conv())) {
203     if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr;
204 
205     // It is a length modifier.
206     using str_format_internal::LengthMod;
207     LengthMod length_mod = tag.as_length();
208     ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
209     if (c == 'h' && length_mod == LengthMod::h) {
210       conv->length_mod = LengthMod::hh;
211       ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
212     } else if (c == 'l' && length_mod == LengthMod::l) {
213       conv->length_mod = LengthMod::ll;
214       ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
215     } else {
216       conv->length_mod = length_mod;
217     }
218     tag = GetTagForChar(c);
219     if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr;
220   }
221 
222   assert(CheckFastPathSetting(*conv));
223   (void)(&CheckFastPathSetting);
224 
225   conv->conv = tag.as_conv();
226   if (!is_positional) conv->arg_position = ++*next_arg;
227   return pos;
228 }
229 
230 }  // namespace
231 
LengthModToString(LengthMod v)232 std::string LengthModToString(LengthMod v) {
233   switch (v) {
234     case LengthMod::h:
235       return "h";
236     case LengthMod::hh:
237       return "hh";
238     case LengthMod::l:
239       return "l";
240     case LengthMod::ll:
241       return "ll";
242     case LengthMod::L:
243       return "L";
244     case LengthMod::j:
245       return "j";
246     case LengthMod::z:
247       return "z";
248     case LengthMod::t:
249       return "t";
250     case LengthMod::q:
251       return "q";
252     case LengthMod::none:
253       return "";
254   }
255   return "";
256 }
257 
ConsumeUnboundConversion(const char * p,const char * end,UnboundConversion * conv,int * next_arg)258 const char *ConsumeUnboundConversion(const char *p, const char *end,
259                                      UnboundConversion *conv, int *next_arg) {
260   if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg);
261   return ConsumeConversion<false>(p, end, conv, next_arg);
262 }
263 
264 struct ParsedFormatBase::ParsedFormatConsumer {
ParsedFormatConsumerabsl::str_format_internal::ParsedFormatBase::ParsedFormatConsumer265   explicit ParsedFormatConsumer(ParsedFormatBase *parsedformat)
266       : parsed(parsedformat), data_pos(parsedformat->data_.get()) {}
267 
Appendabsl::str_format_internal::ParsedFormatBase::ParsedFormatConsumer268   bool Append(string_view s) {
269     if (s.empty()) return true;
270 
271     size_t text_end = AppendText(s);
272 
273     if (!parsed->items_.empty() && !parsed->items_.back().is_conversion) {
274       // Let's extend the existing text run.
275       parsed->items_.back().text_end = text_end;
276     } else {
277       // Let's make a new text run.
278       parsed->items_.push_back({false, text_end, {}});
279     }
280     return true;
281   }
282 
ConvertOneabsl::str_format_internal::ParsedFormatBase::ParsedFormatConsumer283   bool ConvertOne(const UnboundConversion &conv, string_view s) {
284     size_t text_end = AppendText(s);
285     parsed->items_.push_back({true, text_end, conv});
286     return true;
287   }
288 
AppendTextabsl::str_format_internal::ParsedFormatBase::ParsedFormatConsumer289   size_t AppendText(string_view s) {
290     memcpy(data_pos, s.data(), s.size());
291     data_pos += s.size();
292     return static_cast<size_t>(data_pos - parsed->data_.get());
293   }
294 
295   ParsedFormatBase *parsed;
296   char* data_pos;
297 };
298 
ParsedFormatBase(string_view format,bool allow_ignored,std::initializer_list<FormatConversionCharSet> convs)299 ParsedFormatBase::ParsedFormatBase(
300     string_view format, bool allow_ignored,
301     std::initializer_list<FormatConversionCharSet> convs)
302     : data_(format.empty() ? nullptr : new char[format.size()]) {
303   has_error_ = !ParseFormatString(format, ParsedFormatConsumer(this)) ||
304                !MatchesConversions(allow_ignored, convs);
305 }
306 
MatchesConversions(bool allow_ignored,std::initializer_list<FormatConversionCharSet> convs) const307 bool ParsedFormatBase::MatchesConversions(
308     bool allow_ignored,
309     std::initializer_list<FormatConversionCharSet> convs) const {
310   std::unordered_set<int> used;
311   auto add_if_valid_conv = [&](int pos, char c) {
312       if (static_cast<size_t>(pos) > convs.size() ||
313           !Contains(convs.begin()[pos - 1], c))
314         return false;
315       used.insert(pos);
316       return true;
317   };
318   for (const ConversionItem &item : items_) {
319     if (!item.is_conversion) continue;
320     auto &conv = item.conv;
321     if (conv.precision.is_from_arg() &&
322         !add_if_valid_conv(conv.precision.get_from_arg(), '*'))
323       return false;
324     if (conv.width.is_from_arg() &&
325         !add_if_valid_conv(conv.width.get_from_arg(), '*'))
326       return false;
327     if (!add_if_valid_conv(conv.arg_position,
328                            FormatConversionCharToChar(conv.conv)))
329       return false;
330   }
331   return used.size() == convs.size() || allow_ignored;
332 }
333 
334 }  // namespace str_format_internal
335 ABSL_NAMESPACE_END
336 }  // namespace absl
337