• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // -*- C++ -*-
2 //===----------------------------------------------------------------------===//
3 //
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
11 #define _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
12 
13 /// \file Contains the std-format-spec parser.
14 ///
15 /// Most of the code can be reused in the chrono-format-spec.
16 /// This header has some support for the chrono-format-spec since it doesn't
17 /// affect the std-format-spec.
18 
19 #include <__algorithm/find_if.h>
20 #include <__algorithm/min.h>
21 #include <__assert>
22 #include <__concepts/arithmetic.h>
23 #include <__concepts/same_as.h>
24 #include <__config>
25 #include <__debug>
26 #include <__format/format_arg.h>
27 #include <__format/format_error.h>
28 #include <__format/format_parse_context.h>
29 #include <__format/format_string.h>
30 #include <__format/unicode.h>
31 #include <__iterator/concepts.h>
32 #include <__iterator/readable_traits.h> // iter_value_t
33 #include <__type_traits/common_type.h>
34 #include <__type_traits/is_trivially_copyable.h>
35 #include <__variant/monostate.h>
36 #include <cstdint>
37 #include <string_view>
38 
39 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
40 #  pragma GCC system_header
41 #endif
42 
43 _LIBCPP_PUSH_MACROS
44 #include <__undef_macros>
45 
46 _LIBCPP_BEGIN_NAMESPACE_STD
47 
48 #if _LIBCPP_STD_VER >= 20
49 
50 namespace __format_spec {
51 
52 template <contiguous_iterator _Iterator>
53 _LIBCPP_HIDE_FROM_ABI constexpr __format::__parse_number_result<_Iterator>
__parse_arg_id(_Iterator __begin,_Iterator __end,auto & __parse_ctx)54 __parse_arg_id(_Iterator __begin, _Iterator __end, auto& __parse_ctx) {
55   using _CharT = iter_value_t<_Iterator>;
56   // This function is a wrapper to call the real parser. But it does the
57   // validation for the pre-conditions and post-conditions.
58   if (__begin == __end)
59     std::__throw_format_error("End of input while parsing format-spec arg-id");
60 
61   __format::__parse_number_result __r = __format::__parse_arg_id(__begin, __end, __parse_ctx);
62 
63   if (__r.__last == __end || *__r.__last != _CharT('}'))
64     std::__throw_format_error("Invalid arg-id");
65 
66   ++__r.__last;
67   return __r;
68 }
69 
70 template <class _Context>
71 _LIBCPP_HIDE_FROM_ABI constexpr uint32_t
__substitute_arg_id(basic_format_arg<_Context> __format_arg)72 __substitute_arg_id(basic_format_arg<_Context> __format_arg) {
73   // [format.string.std]/8
74   //   If the corresponding formatting argument is not of integral type...
75   // This wording allows char and bool too. LWG-3720 changes the wording to
76   //    If the corresponding formatting argument is not of standard signed or
77   //    unsigned integer type,
78   // This means the 128-bit will not be valid anymore.
79   // TODO FMT Verify this resolution is accepted and add a test to verify
80   //          128-bit integrals fail and switch to visit_format_arg.
81   return _VSTD::__visit_format_arg(
82       [](auto __arg) -> uint32_t {
83         using _Type = decltype(__arg);
84         if constexpr (same_as<_Type, monostate>)
85           std::__throw_format_error("Argument index out of bounds");
86 
87         // [format.string.std]/8
88         // If { arg-idopt } is used in a width or precision, the value of the
89         // corresponding formatting argument is used in its place. If the
90         // corresponding formatting argument is not of standard signed or unsigned
91         // integer type, or its value is negative for precision or non-positive for
92         // width, an exception of type format_error is thrown.
93         //
94         // When an integral is used in a format function, it is stored as one of
95         // the types checked below. Other integral types are promoted. For example,
96         // a signed char is stored as an int.
97         if constexpr (same_as<_Type, int> || same_as<_Type, unsigned int> || //
98                       same_as<_Type, long long> || same_as<_Type, unsigned long long>) {
99           if constexpr (signed_integral<_Type>) {
100             if (__arg < 0)
101               std::__throw_format_error("A format-spec arg-id replacement shouldn't have a negative value");
102           }
103 
104           using _CT = common_type_t<_Type, decltype(__format::__number_max)>;
105           if (static_cast<_CT>(__arg) > static_cast<_CT>(__format::__number_max))
106             std::__throw_format_error("A format-spec arg-id replacement exceeds the maximum supported value");
107 
108           return __arg;
109         } else
110           std::__throw_format_error("Replacement argument isn't a standard signed or unsigned integer type");
111       },
112       __format_arg);
113 }
114 
115 /// These fields are a filter for which elements to parse.
116 ///
117 /// They default to false so when a new field is added it needs to be opted in
118 /// explicitly.
119 // TODO FMT Use an ABI tag for this struct.
120 struct __fields {
121   uint8_t __sign_ : 1 {false};
122   uint8_t __alternate_form_ : 1 {false};
123   uint8_t __zero_padding_ : 1 {false};
124   uint8_t __precision_ : 1 {false};
125   uint8_t __locale_specific_form_ : 1 {false};
126   uint8_t __type_ : 1 {false};
127   // Determines the valid values for fill.
128   //
129   // Originally the fill could be any character except { and }. Range-based
130   // formatters use the colon to mark the beginning of the
131   // underlying-format-spec. To avoid parsing ambiguities these formatter
132   // specializations prohibit the use of the colon as a fill character.
133   uint8_t __use_range_fill_ : 1 {false};
134 };
135 
136 // By not placing this constant in the formatter class it's not duplicated for
137 // char and wchar_t.
138 inline constexpr __fields __fields_integral{
139     .__sign_                 = true,
140     .__alternate_form_       = true,
141     .__zero_padding_         = true,
142     .__locale_specific_form_ = true,
143     .__type_                 = true};
144 inline constexpr __fields __fields_floating_point{
145     .__sign_                 = true,
146     .__alternate_form_       = true,
147     .__zero_padding_         = true,
148     .__precision_            = true,
149     .__locale_specific_form_ = true,
150     .__type_                 = true};
151 inline constexpr __fields __fields_string{.__precision_ = true, .__type_ = true};
152 inline constexpr __fields __fields_pointer{.__type_ = true};
153 
154 #  if _LIBCPP_STD_VER >= 23
155 inline constexpr __fields __fields_tuple{.__use_range_fill_ = true};
156 inline constexpr __fields __fields_range{.__use_range_fill_ = true};
157 inline constexpr __fields __fields_fill_align_width{};
158 #  endif
159 
160 enum class _LIBCPP_ENUM_VIS __alignment : uint8_t {
161   /// No alignment is set in the format string.
162   __default,
163   __left,
164   __center,
165   __right,
166   __zero_padding
167 };
168 
169 enum class _LIBCPP_ENUM_VIS __sign : uint8_t {
170   /// No sign is set in the format string.
171   ///
172   /// The sign isn't allowed for certain format-types. By using this value
173   /// it's possible to detect whether or not the user explicitly set the sign
174   /// flag. For formatting purposes it behaves the same as \ref __minus.
175   __default,
176   __minus,
177   __plus,
178   __space
179 };
180 
181 enum class _LIBCPP_ENUM_VIS __type : uint8_t {
182   __default,
183   __string,
184   __binary_lower_case,
185   __binary_upper_case,
186   __octal,
187   __decimal,
188   __hexadecimal_lower_case,
189   __hexadecimal_upper_case,
190   __pointer,
191   __char,
192   __hexfloat_lower_case,
193   __hexfloat_upper_case,
194   __scientific_lower_case,
195   __scientific_upper_case,
196   __fixed_lower_case,
197   __fixed_upper_case,
198   __general_lower_case,
199   __general_upper_case,
200   __debug
201 };
202 
203 struct __std {
204   __alignment __alignment_ : 3;
205   __sign __sign_ : 2;
206   bool __alternate_form_ : 1;
207   bool __locale_specific_form_ : 1;
208   __type __type_;
209 };
210 
211 struct __chrono {
212   __alignment __alignment_ : 3;
213   bool __locale_specific_form_ : 1;
214   bool __hour_                 : 1;
215   bool __weekday_name_ : 1;
216   bool __weekday_              : 1;
217   bool __day_of_year_          : 1;
218   bool __week_of_year_         : 1;
219   bool __month_name_ : 1;
220 };
221 
222 /// Contains the parsed formatting specifications.
223 ///
224 /// This contains information for both the std-format-spec and the
225 /// chrono-format-spec. This results in some unused members for both
226 /// specifications. However these unused members don't increase the size
227 /// of the structure.
228 ///
229 /// This struct doesn't cross ABI boundaries so its layout doesn't need to be
230 /// kept stable.
231 template <class _CharT>
232 struct __parsed_specifications {
233   union {
234     // The field __alignment_ is the first element in __std_ and __chrono_.
235     // This allows the code to always inspect this value regards which member
236     // of the union is the active member [class.union.general]/2.
237     //
238     // This is needed since the generic output routines handle the alignment of
239     // the output.
240     __alignment __alignment_ : 3;
241     __std __std_;
242     __chrono __chrono_;
243   };
244 
245   /// The requested width.
246   ///
247   /// When the format-spec used an arg-id for this field it has already been
248   /// replaced with the value of that arg-id.
249   int32_t __width_;
250 
251   /// The requested precision.
252   ///
253   /// When the format-spec used an arg-id for this field it has already been
254   /// replaced with the value of that arg-id.
255   int32_t __precision_;
256 
257   _CharT __fill_;
258 
__has_width__parsed_specifications259   _LIBCPP_HIDE_FROM_ABI constexpr bool __has_width() const { return __width_ > 0; }
260 
__has_precision__parsed_specifications261   _LIBCPP_HIDE_FROM_ABI constexpr bool __has_precision() const { return __precision_ >= 0; }
262 };
263 
264 // Validate the struct is small and cheap to copy since the struct is passed by
265 // value in formatting functions.
266 static_assert(sizeof(__parsed_specifications<char>) == 16);
267 static_assert(is_trivially_copyable_v<__parsed_specifications<char>>);
268 #  ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
269 static_assert(sizeof(__parsed_specifications<wchar_t>) == 16);
270 static_assert(is_trivially_copyable_v<__parsed_specifications<wchar_t>>);
271 #  endif
272 
273 /// The parser for the std-format-spec.
274 ///
275 /// Note this class is a member of std::formatter specializations. It's
276 /// expected developers will create their own formatter specializations that
277 /// inherit from the std::formatter specializations. This means this class
278 /// must be ABI stable. To aid the stability the unused bits in the class are
279 /// set to zero. That way they can be repurposed if a future revision of the
280 /// Standards adds new fields to std-format-spec.
281 template <class _CharT>
282 class _LIBCPP_TEMPLATE_VIS __parser {
283 public:
284   _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(basic_format_parse_context<_CharT>& __parse_ctx, __fields __fields)
285       -> decltype(__parse_ctx.begin()) {
286 
287     auto __begin = __parse_ctx.begin();
288     auto __end = __parse_ctx.end();
289     if (__begin == __end)
290       return __begin;
291 
292     if (__parse_fill_align(__begin, __end, __fields.__use_range_fill_) && __begin == __end)
293       return __begin;
294 
295     if (__fields.__sign_ && __parse_sign(__begin) && __begin == __end)
296       return __begin;
297 
298     if (__fields.__alternate_form_ && __parse_alternate_form(__begin) && __begin == __end)
299       return __begin;
300 
301     if (__fields.__zero_padding_ && __parse_zero_padding(__begin) && __begin == __end)
302       return __begin;
303 
304     if (__parse_width(__begin, __end, __parse_ctx) && __begin == __end)
305       return __begin;
306 
307     if (__fields.__precision_ && __parse_precision(__begin, __end, __parse_ctx) && __begin == __end)
308       return __begin;
309 
310     if (__fields.__locale_specific_form_ && __parse_locale_specific_form(__begin) && __begin == __end)
311       return __begin;
312 
313     if (__fields.__type_) {
314       __parse_type(__begin);
315 
316       // When __type_ is false the calling parser is expected to do additional
317       // parsing. In that case that parser should do the end of format string
318       // validation.
319       if (__begin != __end && *__begin != _CharT('}'))
320         std::__throw_format_error("The format-spec should consume the input or end with a '}'");
321     }
322 
323     return __begin;
324   }
325 
326   /// \returns the `__parsed_specifications` with the resolved dynamic sizes..
327   _LIBCPP_HIDE_FROM_ABI
__get_parsed_std_specifications(auto & __ctx)328   __parsed_specifications<_CharT> __get_parsed_std_specifications(auto& __ctx) const {
329     return __parsed_specifications<_CharT>{
330         .__std_ = __std{.__alignment_            = __alignment_,
331                         .__sign_                 = __sign_,
332                         .__alternate_form_       = __alternate_form_,
333                         .__locale_specific_form_ = __locale_specific_form_,
334                         .__type_                 = __type_},
335         .__width_{__get_width(__ctx)},
336         .__precision_{__get_precision(__ctx)},
337         .__fill_{__fill_}};
338   }
339 
__get_parsed_chrono_specifications(auto & __ctx)340   _LIBCPP_HIDE_FROM_ABI __parsed_specifications<_CharT> __get_parsed_chrono_specifications(auto& __ctx) const {
341     return __parsed_specifications<_CharT>{
342         .__chrono_ =
343             __chrono{.__alignment_            = __alignment_,
344                      .__locale_specific_form_ = __locale_specific_form_,
345                      .__hour_                 = __hour_,
346                      .__weekday_name_         = __weekday_name_,
347                      .__weekday_              = __weekday_,
348                      .__day_of_year_          = __day_of_year_,
349                      .__week_of_year_         = __week_of_year_,
350                      .__month_name_           = __month_name_},
351         .__width_{__get_width(__ctx)},
352         .__precision_{__get_precision(__ctx)},
353         .__fill_{__fill_}};
354   }
355 
356   __alignment __alignment_ : 3 {__alignment::__default};
357   __sign __sign_ : 2 {__sign::__default};
358   bool __alternate_form_ : 1 {false};
359   bool __locale_specific_form_ : 1 {false};
360   bool __reserved_0_ : 1 {false};
361   __type __type_{__type::__default};
362 
363   // These flags are only used for formatting chrono. Since the struct has
364   // padding space left it's added to this structure.
365   bool __hour_ : 1 {false};
366 
367   bool __weekday_name_ : 1 {false};
368   bool __weekday_      : 1 {false};
369 
370   bool __day_of_year_  : 1 {false};
371   bool __week_of_year_ : 1 {false};
372 
373   bool __month_name_ : 1 {false};
374 
375   uint8_t __reserved_1_ : 2 {0};
376   uint8_t __reserved_2_ : 6 {0};
377   // These two flags are only used internally and not part of the
378   // __parsed_specifications. Therefore put them at the end.
379   bool __width_as_arg_ : 1 {false};
380   bool __precision_as_arg_ : 1 {false};
381 
382   /// The requested width, either the value or the arg-id.
383   int32_t __width_{0};
384 
385   /// The requested precision, either the value or the arg-id.
386   int32_t __precision_{-1};
387 
388   // LWG 3576 will probably change this to always accept a Unicode code point
389   // To avoid changing the size with that change align the field so when it
390   // becomes 32-bit its alignment will remain the same. That also means the
391   // size will remain the same. (D2572 addresses the solution for LWG 3576.)
392   _CharT __fill_{_CharT(' ')};
393 
394 private:
__parse_alignment(_CharT __c)395   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alignment(_CharT __c) {
396     switch (__c) {
397     case _CharT('<'):
398       __alignment_ = __alignment::__left;
399       return true;
400 
401     case _CharT('^'):
402       __alignment_ = __alignment::__center;
403       return true;
404 
405     case _CharT('>'):
406       __alignment_ = __alignment::__right;
407       return true;
408     }
409     return false;
410   }
411 
412   // range-fill and tuple-fill are identical
413   template <contiguous_iterator _Iterator>
__parse_fill_align(_Iterator & __begin,_Iterator __end,bool __use_range_fill)414   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end, bool __use_range_fill) {
415     _LIBCPP_ASSERT(__begin != __end, "when called with an empty input the function will cause "
416                                      "undefined behavior by evaluating data not in the input");
417     if (__begin + 1 != __end) {
418       if (__parse_alignment(*(__begin + 1))) {
419         if (__use_range_fill && (*__begin == _CharT('{') || *__begin == _CharT('}') || *__begin == _CharT(':')))
420           std::__throw_format_error("The format-spec range-fill field contains an invalid character");
421         else if (*__begin == _CharT('{') || *__begin == _CharT('}'))
422           std::__throw_format_error("The format-spec fill field contains an invalid character");
423 
424         __fill_ = *__begin;
425         __begin += 2;
426         return true;
427       }
428     }
429 
430     if (!__parse_alignment(*__begin))
431       return false;
432 
433     ++__begin;
434     return true;
435   }
436 
437   template <contiguous_iterator _Iterator>
__parse_sign(_Iterator & __begin)438   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_sign(_Iterator& __begin) {
439     switch (*__begin) {
440     case _CharT('-'):
441       __sign_ = __sign::__minus;
442       break;
443     case _CharT('+'):
444       __sign_ = __sign::__plus;
445       break;
446     case _CharT(' '):
447       __sign_ = __sign::__space;
448       break;
449     default:
450       return false;
451     }
452     ++__begin;
453     return true;
454   }
455 
456   template <contiguous_iterator _Iterator>
__parse_alternate_form(_Iterator & __begin)457   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alternate_form(_Iterator& __begin) {
458     if (*__begin != _CharT('#'))
459       return false;
460 
461     __alternate_form_ = true;
462     ++__begin;
463     return true;
464   }
465 
466   template <contiguous_iterator _Iterator>
__parse_zero_padding(_Iterator & __begin)467   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_zero_padding(_Iterator& __begin) {
468     if (*__begin != _CharT('0'))
469       return false;
470 
471     if (__alignment_ == __alignment::__default)
472       __alignment_ = __alignment::__zero_padding;
473     ++__begin;
474     return true;
475   }
476 
477   template <contiguous_iterator _Iterator>
__parse_width(_Iterator & __begin,_Iterator __end,auto & __parse_ctx)478   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_width(_Iterator& __begin, _Iterator __end, auto& __parse_ctx) {
479     if (*__begin == _CharT('0'))
480       std::__throw_format_error("A format-spec width field shouldn't have a leading zero");
481 
482     if (*__begin == _CharT('{')) {
483       __format::__parse_number_result __r = __format_spec::__parse_arg_id(++__begin, __end, __parse_ctx);
484       __width_as_arg_ = true;
485       __width_ = __r.__value;
486       __begin = __r.__last;
487       return true;
488     }
489 
490     if (*__begin < _CharT('0') || *__begin > _CharT('9'))
491       return false;
492 
493     __format::__parse_number_result __r = __format::__parse_number(__begin, __end);
494     __width_ = __r.__value;
495     _LIBCPP_ASSERT(__width_ != 0, "A zero value isn't allowed and should be impossible, "
496                                   "due to validations in this function");
497     __begin = __r.__last;
498     return true;
499   }
500 
501   template <contiguous_iterator _Iterator>
__parse_precision(_Iterator & __begin,_Iterator __end,auto & __parse_ctx)502   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_precision(_Iterator& __begin, _Iterator __end, auto& __parse_ctx) {
503     if (*__begin != _CharT('.'))
504       return false;
505 
506     ++__begin;
507     if (__begin == __end)
508       std::__throw_format_error("End of input while parsing format-spec precision");
509 
510     if (*__begin == _CharT('{')) {
511       __format::__parse_number_result __arg_id = __format_spec::__parse_arg_id(++__begin, __end, __parse_ctx);
512       __precision_as_arg_ = true;
513       __precision_ = __arg_id.__value;
514       __begin = __arg_id.__last;
515       return true;
516     }
517 
518     if (*__begin < _CharT('0') || *__begin > _CharT('9'))
519       std::__throw_format_error("The format-spec precision field doesn't contain a value or arg-id");
520 
521     __format::__parse_number_result __r = __format::__parse_number(__begin, __end);
522     __precision_ = __r.__value;
523     __precision_as_arg_ = false;
524     __begin = __r.__last;
525     return true;
526   }
527 
528   template <contiguous_iterator _Iterator>
__parse_locale_specific_form(_Iterator & __begin)529   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_locale_specific_form(_Iterator& __begin) {
530     if (*__begin != _CharT('L'))
531       return false;
532 
533     __locale_specific_form_ = true;
534     ++__begin;
535     return true;
536   }
537 
538   template <contiguous_iterator _Iterator>
__parse_type(_Iterator & __begin)539   _LIBCPP_HIDE_FROM_ABI constexpr void __parse_type(_Iterator& __begin) {
540     // Determines the type. It does not validate whether the selected type is
541     // valid. Most formatters have optional fields that are only allowed for
542     // certain types. These parsers need to do validation after the type has
543     // been parsed. So its easier to implement the validation for all types in
544     // the specific parse function.
545     switch (*__begin) {
546     case 'A':
547       __type_ = __type::__hexfloat_upper_case;
548       break;
549     case 'B':
550       __type_ = __type::__binary_upper_case;
551       break;
552     case 'E':
553       __type_ = __type::__scientific_upper_case;
554       break;
555     case 'F':
556       __type_ = __type::__fixed_upper_case;
557       break;
558     case 'G':
559       __type_ = __type::__general_upper_case;
560       break;
561     case 'X':
562       __type_ = __type::__hexadecimal_upper_case;
563       break;
564     case 'a':
565       __type_ = __type::__hexfloat_lower_case;
566       break;
567     case 'b':
568       __type_ = __type::__binary_lower_case;
569       break;
570     case 'c':
571       __type_ = __type::__char;
572       break;
573     case 'd':
574       __type_ = __type::__decimal;
575       break;
576     case 'e':
577       __type_ = __type::__scientific_lower_case;
578       break;
579     case 'f':
580       __type_ = __type::__fixed_lower_case;
581       break;
582     case 'g':
583       __type_ = __type::__general_lower_case;
584       break;
585     case 'o':
586       __type_ = __type::__octal;
587       break;
588     case 'p':
589       __type_ = __type::__pointer;
590       break;
591     case 's':
592       __type_ = __type::__string;
593       break;
594     case 'x':
595       __type_ = __type::__hexadecimal_lower_case;
596       break;
597 #  if _LIBCPP_STD_VER >= 23
598     case '?':
599       __type_ = __type::__debug;
600       break;
601 #  endif
602     default:
603       return;
604     }
605     ++__begin;
606   }
607 
608   _LIBCPP_HIDE_FROM_ABI
__get_width(auto & __ctx)609   int32_t __get_width(auto& __ctx) const {
610     if (!__width_as_arg_)
611       return __width_;
612 
613     return __format_spec::__substitute_arg_id(__ctx.arg(__width_));
614   }
615 
616   _LIBCPP_HIDE_FROM_ABI
__get_precision(auto & __ctx)617   int32_t __get_precision(auto& __ctx) const {
618     if (!__precision_as_arg_)
619       return __precision_;
620 
621     return __format_spec::__substitute_arg_id(__ctx.arg(__precision_));
622   }
623 };
624 
625 // Validates whether the reserved bitfields don't change the size.
626 static_assert(sizeof(__parser<char>) == 16);
627 #  ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
628 static_assert(sizeof(__parser<wchar_t>) == 16);
629 #  endif
630 
__process_display_type_string(__format_spec::__type __type)631 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_string(__format_spec::__type __type) {
632   switch (__type) {
633   case __format_spec::__type::__default:
634   case __format_spec::__type::__string:
635   case __format_spec::__type::__debug:
636     break;
637 
638   default:
639     std::__throw_format_error("The format-spec type has a type not supported for a string argument");
640   }
641 }
642 
643 template <class _CharT>
__process_display_type_bool_string(__parser<_CharT> & __parser)644 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_bool_string(__parser<_CharT>& __parser) {
645   if (__parser.__sign_ != __sign::__default)
646     std::__throw_format_error("A sign field isn't allowed in this format-spec");
647 
648   if (__parser.__alternate_form_)
649     std::__throw_format_error("An alternate form field isn't allowed in this format-spec");
650 
651   if (__parser.__alignment_ == __alignment::__zero_padding)
652     std::__throw_format_error("A zero-padding field isn't allowed in this format-spec");
653 
654   if (__parser.__alignment_ == __alignment::__default)
655     __parser.__alignment_ = __alignment::__left;
656 }
657 
658 template <class _CharT>
__process_display_type_char(__parser<_CharT> & __parser)659 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_char(__parser<_CharT>& __parser) {
660   __format_spec::__process_display_type_bool_string(__parser);
661 }
662 
663 template <class _CharT>
__process_parsed_bool(__parser<_CharT> & __parser)664 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_bool(__parser<_CharT>& __parser) {
665   switch (__parser.__type_) {
666   case __format_spec::__type::__default:
667   case __format_spec::__type::__string:
668     __format_spec::__process_display_type_bool_string(__parser);
669     break;
670 
671   case __format_spec::__type::__binary_lower_case:
672   case __format_spec::__type::__binary_upper_case:
673   case __format_spec::__type::__octal:
674   case __format_spec::__type::__decimal:
675   case __format_spec::__type::__hexadecimal_lower_case:
676   case __format_spec::__type::__hexadecimal_upper_case:
677     break;
678 
679   default:
680     std::__throw_format_error("The format-spec type has a type not supported for a bool argument");
681   }
682 }
683 
684 template <class _CharT>
__process_parsed_char(__parser<_CharT> & __parser)685 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_char(__parser<_CharT>& __parser) {
686   switch (__parser.__type_) {
687   case __format_spec::__type::__default:
688   case __format_spec::__type::__char:
689   case __format_spec::__type::__debug:
690     __format_spec::__process_display_type_char(__parser);
691     break;
692 
693   case __format_spec::__type::__binary_lower_case:
694   case __format_spec::__type::__binary_upper_case:
695   case __format_spec::__type::__octal:
696   case __format_spec::__type::__decimal:
697   case __format_spec::__type::__hexadecimal_lower_case:
698   case __format_spec::__type::__hexadecimal_upper_case:
699     break;
700 
701   default:
702     std::__throw_format_error("The format-spec type has a type not supported for a char argument");
703   }
704 }
705 
706 template <class _CharT>
__process_parsed_integer(__parser<_CharT> & __parser)707 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_integer(__parser<_CharT>& __parser) {
708   switch (__parser.__type_) {
709   case __format_spec::__type::__default:
710   case __format_spec::__type::__binary_lower_case:
711   case __format_spec::__type::__binary_upper_case:
712   case __format_spec::__type::__octal:
713   case __format_spec::__type::__decimal:
714   case __format_spec::__type::__hexadecimal_lower_case:
715   case __format_spec::__type::__hexadecimal_upper_case:
716     break;
717 
718   case __format_spec::__type::__char:
719     __format_spec::__process_display_type_char(__parser);
720     break;
721 
722   default:
723     std::__throw_format_error("The format-spec type has a type not supported for an integer argument");
724   }
725 }
726 
727 template <class _CharT>
__process_parsed_floating_point(__parser<_CharT> & __parser)728 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_floating_point(__parser<_CharT>& __parser) {
729   switch (__parser.__type_) {
730   case __format_spec::__type::__default:
731   case __format_spec::__type::__hexfloat_lower_case:
732   case __format_spec::__type::__hexfloat_upper_case:
733     // Precision specific behavior will be handled later.
734     break;
735   case __format_spec::__type::__scientific_lower_case:
736   case __format_spec::__type::__scientific_upper_case:
737   case __format_spec::__type::__fixed_lower_case:
738   case __format_spec::__type::__fixed_upper_case:
739   case __format_spec::__type::__general_lower_case:
740   case __format_spec::__type::__general_upper_case:
741     if (!__parser.__precision_as_arg_ && __parser.__precision_ == -1)
742       // Set the default precision for the call to to_chars.
743       __parser.__precision_ = 6;
744     break;
745 
746   default:
747     std::__throw_format_error("The format-spec type has a type not supported for a floating-point argument");
748   }
749 }
750 
__process_display_type_pointer(__format_spec::__type __type)751 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_pointer(__format_spec::__type __type) {
752   switch (__type) {
753   case __format_spec::__type::__default:
754   case __format_spec::__type::__pointer:
755     break;
756 
757   default:
758     std::__throw_format_error("The format-spec type has a type not supported for a pointer argument");
759   }
760 }
761 
762 template <contiguous_iterator _Iterator>
763 struct __column_width_result {
764   /// The number of output columns.
765   size_t __width_;
766   /// One beyond the last code unit used in the estimation.
767   ///
768   /// This limits the original output to fit in the wanted number of columns.
769   _Iterator __last_;
770 };
771 
772 template <contiguous_iterator _Iterator>
773 __column_width_result(size_t, _Iterator) -> __column_width_result<_Iterator>;
774 
775 /// Since a column width can be two it's possible that the requested column
776 /// width can't be achieved. Depending on the intended usage the policy can be
777 /// selected.
778 /// - When used as precision the maximum width may not be exceeded and the
779 ///   result should be "rounded down" to the previous boundary.
780 /// - When used as a width we're done once the minimum is reached, but
781 ///   exceeding is not an issue. Rounding down is an issue since that will
782 ///   result in writing fill characters. Therefore the result needs to be
783 ///   "rounded up".
784 enum class __column_width_rounding { __down, __up };
785 
786 #  ifndef _LIBCPP_HAS_NO_UNICODE
787 
788 namespace __detail {
789 
790 /// Converts a code point to the column width.
791 ///
792 /// The estimations are conforming to [format.string.general]/11
793 ///
794 /// This version expects a value less than 0x1'0000, which is a 3-byte UTF-8
795 /// character.
__column_width_3(uint32_t __c)796 _LIBCPP_HIDE_FROM_ABI constexpr int __column_width_3(uint32_t __c) noexcept {
797   _LIBCPP_ASSERT(__c < 0x10000, "Use __column_width_4 or __column_width for larger values");
798 
799   // clang-format off
800   return 1 + (__c >= 0x1100 && (__c <= 0x115f ||
801              (__c >= 0x2329 && (__c <= 0x232a ||
802              (__c >= 0x2e80 && (__c <= 0x303e ||
803              (__c >= 0x3040 && (__c <= 0xa4cf ||
804              (__c >= 0xac00 && (__c <= 0xd7a3 ||
805              (__c >= 0xf900 && (__c <= 0xfaff ||
806              (__c >= 0xfe10 && (__c <= 0xfe19 ||
807              (__c >= 0xfe30 && (__c <= 0xfe6f ||
808              (__c >= 0xff00 && (__c <= 0xff60 ||
809              (__c >= 0xffe0 && (__c <= 0xffe6
810              ))))))))))))))))))));
811   // clang-format on
812 }
813 
814 /// @overload
815 ///
816 /// This version expects a value greater than or equal to 0x1'0000, which is a
817 /// 4-byte UTF-8 character.
__column_width_4(uint32_t __c)818 _LIBCPP_HIDE_FROM_ABI constexpr int __column_width_4(uint32_t __c) noexcept {
819   _LIBCPP_ASSERT(__c >= 0x10000, "Use __column_width_3 or __column_width for smaller values");
820 
821   // clang-format off
822   return 1 + (__c >= 0x1'f300 && (__c <= 0x1'f64f ||
823              (__c >= 0x1'f900 && (__c <= 0x1'f9ff ||
824              (__c >= 0x2'0000 && (__c <= 0x2'fffd ||
825              (__c >= 0x3'0000 && (__c <= 0x3'fffd
826              ))))))));
827   // clang-format on
828 }
829 
830 /// @overload
831 ///
832 /// The general case, accepting all values.
__column_width(uint32_t __c)833 _LIBCPP_HIDE_FROM_ABI constexpr int __column_width(uint32_t __c) noexcept {
834   if (__c < 0x10000)
835     return __detail::__column_width_3(__c);
836 
837   return __detail::__column_width_4(__c);
838 }
839 
840 template <contiguous_iterator _Iterator>
__estimate_column_width_grapheme_clustering(_Iterator __first,_Iterator __last,size_t __maximum,__column_width_rounding __rounding)841 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_Iterator> __estimate_column_width_grapheme_clustering(
842     _Iterator __first, _Iterator __last, size_t __maximum, __column_width_rounding __rounding) noexcept {
843   using _CharT = iter_value_t<_Iterator>;
844   __unicode::__extended_grapheme_cluster_view<_CharT> __view{__first, __last};
845 
846   __column_width_result<_Iterator> __result{0, __first};
847   while (__result.__last_ != __last && __result.__width_ <= __maximum) {
848     typename __unicode::__extended_grapheme_cluster_view<_CharT>::__cluster __cluster = __view.__consume();
849     int __width = __detail::__column_width(__cluster.__code_point_);
850 
851     // When the next entry would exceed the maximum width the previous width
852     // might be returned. For example when a width of 100 is requested the
853     // returned width might be 99, since the next code point has an estimated
854     // column width of 2. This depends on the rounding flag.
855     // When the maximum is exceeded the loop will abort the next iteration.
856     if (__rounding == __column_width_rounding::__down && __result.__width_ + __width > __maximum)
857       return __result;
858 
859     __result.__width_ += __width;
860     __result.__last_ = __cluster.__last_;
861   }
862 
863   return __result;
864 }
865 
866 } // namespace __detail
867 
868 // Unicode can be stored in several formats: UTF-8, UTF-16, and UTF-32.
869 // Depending on format the relation between the number of code units stored and
870 // the number of output columns differs. The first relation is the number of
871 // code units forming a code point. (The text assumes the code units are
872 // unsigned.)
873 // - UTF-8 The number of code units is between one and four. The first 127
874 //   Unicode code points match the ASCII character set. When the highest bit is
875 //   set it means the code point has more than one code unit.
876 // - UTF-16: The number of code units is between 1 and 2. When the first
877 //   code unit is in the range [0xd800,0xdfff) it means the code point uses two
878 //   code units.
879 // - UTF-32: The number of code units is always one.
880 //
881 // The code point to the number of columns is specified in
882 // [format.string.std]/11. This list might change in the future.
883 //
884 // Another thing to be taken into account is Grapheme clustering. This means
885 // that in some cases multiple code points are combined one element in the
886 // output. For example:
887 // - an ASCII character with a combined diacritical mark
888 // - an emoji with a skin tone modifier
889 // - a group of combined people emoji to create a family
890 // - a combination of flag emoji
891 //
892 // See also:
893 // - [format.string.general]/11
894 // - https://en.wikipedia.org/wiki/UTF-8#Encoding
895 // - https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF
896 
__is_ascii(char32_t __c)897 _LIBCPP_HIDE_FROM_ABI constexpr bool __is_ascii(char32_t __c) { return __c < 0x80; }
898 
899 /// Determines the number of output columns needed to render the input.
900 ///
901 /// \note When the scanner encounters malformed Unicode it acts as-if every
902 /// code unit is a one column code point. Typically a terminal uses the same
903 /// strategy and replaces every malformed code unit with a one column
904 /// replacement character.
905 ///
906 /// \param __first    Points to the first element of the input range.
907 /// \param __last     Points beyond the last element of the input range.
908 /// \param __maximum  The maximum number of output columns. The returned number
909 ///                   of estimated output columns will not exceed this value.
910 /// \param __rounding Selects the rounding method.
911 ///                   \c __down result.__width_ <= __maximum
912 ///                   \c __up result.__width_ <= __maximum + 1
913 template <class _CharT, class _Iterator = typename basic_string_view<_CharT>::const_iterator>
__estimate_column_width(basic_string_view<_CharT> __str,size_t __maximum,__column_width_rounding __rounding)914 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_Iterator> __estimate_column_width(
915     basic_string_view<_CharT> __str, size_t __maximum, __column_width_rounding __rounding) noexcept {
916   // The width estimation is done in two steps:
917   // - Quickly process for the ASCII part. ASCII has the following properties
918   //   - One code unit is one code point
919   //   - Every code point has an estimated width of one
920   // - When needed it will a Unicode Grapheme clustering algorithm to find
921   //   the proper place for truncation.
922 
923   if (__str.empty() || __maximum == 0)
924     return {0, __str.begin()};
925 
926   // ASCII has one caveat; when an ASCII character is followed by a non-ASCII
927   // character they might be part of an extended grapheme cluster. For example:
928   //   an ASCII letter and a COMBINING ACUTE ACCENT
929   // The truncate should happen after the COMBINING ACUTE ACCENT. Therefore we
930   // need to scan one code unit beyond the requested precision. When this code
931   // unit is non-ASCII we omit the current code unit and let the Grapheme
932   // clustering algorithm do its work.
933   auto __it = __str.begin();
934   if (__format_spec::__is_ascii(*__it)) {
935     do {
936       --__maximum;
937       ++__it;
938       if (__it == __str.end())
939         return {__str.size(), __str.end()};
940 
941       if (__maximum == 0) {
942         if (__format_spec::__is_ascii(*__it))
943           return {static_cast<size_t>(__it - __str.begin()), __it};
944 
945         break;
946       }
947     } while (__format_spec::__is_ascii(*__it));
948     --__it;
949     ++__maximum;
950   }
951 
952   ptrdiff_t __ascii_size = __it - __str.begin();
953   __column_width_result __result =
954       __detail::__estimate_column_width_grapheme_clustering(__it, __str.end(), __maximum, __rounding);
955 
956   __result.__width_ += __ascii_size;
957   return __result;
958 }
959 #  else // !defined(_LIBCPP_HAS_NO_UNICODE)
960 template <class _CharT>
961 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<typename basic_string_view<_CharT>::const_iterator>
__estimate_column_width(basic_string_view<_CharT> __str,size_t __maximum,__column_width_rounding)962 __estimate_column_width(basic_string_view<_CharT> __str, size_t __maximum, __column_width_rounding) noexcept {
963   // When Unicode isn't supported assume ASCII and every code unit is one code
964   // point. In ASCII the estimated column width is always one. Thus there's no
965   // need for rounding.
966   size_t __width_ = _VSTD::min(__str.size(), __maximum);
967   return {__width_, __str.begin() + __width_};
968 }
969 
970 #  endif // !defined(_LIBCPP_HAS_NO_UNICODE)
971 
972 } // namespace __format_spec
973 
974 #endif //_LIBCPP_STD_VER >= 20
975 
976 _LIBCPP_END_NAMESPACE_STD
977 
978 _LIBCPP_POP_MACROS
979 
980 #endif // _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
981