• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Formatting library for C++ - scanning API proof of concept
2 //
3 // Copyright (c) 2019 - present, Victor Zverovich
4 // All rights reserved.
5 //
6 // For the license information refer to format.h.
7 
8 #include <array>
9 #include <cassert>
10 #include <climits>
11 
12 #include "fmt/format.h"
13 
14 FMT_BEGIN_NAMESPACE
15 namespace detail {
16 
17 inline auto is_whitespace(char c) -> bool { return c == ' ' || c == '\n'; }
18 
19 // If c is a hex digit returns its numeric value, othewise -1.
20 inline auto to_hex_digit(char c) -> int {
21   if (c >= '0' && c <= '9') return c - '0';
22   if (c >= 'a' && c <= 'f') return c - 'a' + 10;
23   if (c >= 'A' && c <= 'F') return c - 'A' + 10;
24   return -1;
25 }
26 
27 struct maybe_contiguous_range {
28   const char* begin;
29   const char* end;
30 
31   explicit operator bool() const { return begin != nullptr; }
32 };
33 
34 class scan_buffer {
35  private:
36   const char* ptr_;
37   const char* end_;
38   bool contiguous_;
39 
40  protected:
scan_buffer(const char * ptr,const char * end,bool contiguous)41   scan_buffer(const char* ptr, const char* end, bool contiguous)
42       : ptr_(ptr), end_(end), contiguous_(contiguous) {}
43   ~scan_buffer() = default;
44 
set(string_view buf)45   void set(string_view buf) {
46     ptr_ = buf.begin();
47     end_ = buf.end();
48   }
49 
50   auto ptr() const -> const char* { return ptr_; }
51 
52  public:
53   scan_buffer(const scan_buffer&) = delete;
54   void operator=(const scan_buffer&) = delete;
55 
56   // Fills the buffer with more input if available.
57   virtual void consume() = 0;
58 
59   class sentinel {};
60 
61   class iterator {
62    private:
63     const char** ptr_;
64     scan_buffer* buf_;  // This could be merged with ptr_.
65     char value_;
66 
67     static auto get_sentinel() -> const char** {
68       static const char* ptr = nullptr;
69       return &ptr;
70     }
71 
72     friend class scan_buffer;
73 
74     friend auto operator==(iterator lhs, sentinel) -> bool {
75       return *lhs.ptr_ == nullptr;
76     }
77     friend auto operator!=(iterator lhs, sentinel) -> bool {
78       return *lhs.ptr_ != nullptr;
79     }
80 
iterator(scan_buffer * buf)81     iterator(scan_buffer* buf) : buf_(buf) {
82       if (buf->ptr_ == buf->end_) {
83         ptr_ = get_sentinel();
84         return;
85       }
86       ptr_ = &buf->ptr_;
87       value_ = *buf->ptr_;
88     }
89 
get_buffer(iterator it)90     friend scan_buffer& get_buffer(iterator it) { return *it.buf_; }
91 
92    public:
iterator()93     iterator() : ptr_(get_sentinel()), buf_(nullptr) {}
94 
95     auto operator++() -> iterator& {
96       if (!buf_->try_consume()) ptr_ = get_sentinel();
97       value_ = *buf_->ptr_;
98       return *this;
99     }
100     auto operator++(int) -> iterator {
101       iterator copy = *this;
102       ++*this;
103       return copy;
104     }
105     auto operator*() const -> char { return value_; }
106 
107     auto base() const -> const char* { return buf_->ptr_; }
108 
109     friend auto to_contiguous(iterator it) -> maybe_contiguous_range;
110     friend auto advance(iterator it, size_t n) -> iterator;
111   };
112 
113   friend auto to_contiguous(iterator it) -> maybe_contiguous_range {
114     if (it.buf_->is_contiguous()) return {it.buf_->ptr_, it.buf_->end_};
115     return {nullptr, nullptr};
116   }
117   friend auto advance(iterator it, size_t n) -> iterator {
118     FMT_ASSERT(it.buf_->is_contiguous(), "");
119     const char*& ptr = it.buf_->ptr_;
120     ptr += n;
121     it.value_ = *ptr;
122     if (ptr == it.buf_->end_) it.ptr_ = iterator::get_sentinel();
123     return it;
124   }
125 
126   auto begin() -> iterator { return this; }
127   auto end() -> sentinel { return {}; }
128 
129   auto is_contiguous() const -> bool { return contiguous_; }
130 
131   // Tries consuming a single code unit. Returns true iff there is more input.
132   auto try_consume() -> bool {
133     FMT_ASSERT(ptr_ != end_, "");
134     ++ptr_;
135     if (ptr_ != end_) return true;
136     consume();
137     return ptr_ != end_;
138   }
139 };
140 
141 using scan_iterator = scan_buffer::iterator;
142 using scan_sentinel = scan_buffer::sentinel;
143 
144 class string_scan_buffer : public scan_buffer {
145  private:
consume()146   void consume() override {}
147 
148  public:
string_scan_buffer(string_view s)149   explicit string_scan_buffer(string_view s)
150       : scan_buffer(s.begin(), s.end(), true) {}
151 };
152 
153 #ifdef _WIN32
flockfile(FILE * f)154 void flockfile(FILE* f) { _lock_file(f); }
funlockfile(FILE * f)155 void funlockfile(FILE* f) { _unlock_file(f); }
getc_unlocked(FILE * f)156 int getc_unlocked(FILE* f) { return _fgetc_nolock(f); }
157 #endif
158 
159 // A FILE wrapper. F is FILE defined as a template parameter to make
160 // system-specific API detection work.
161 template <typename F> class file_base {
162  protected:
163   F* file_;
164 
165  public:
file_base(F * file)166   file_base(F* file) : file_(file) {}
167   operator F*() const { return file_; }
168 
169   // Reads a code unit from the stream.
170   auto get() -> int {
171     int result = getc_unlocked(file_);
172     if (result == EOF && ferror(file_) != 0)
173       FMT_THROW(system_error(errno, FMT_STRING("getc failed")));
174     return result;
175   }
176 
177   // Puts the code unit back into the stream buffer.
unget(char c)178   void unget(char c) {
179     if (ungetc(c, file_) == EOF)
180       FMT_THROW(system_error(errno, FMT_STRING("ungetc failed")));
181   }
182 };
183 
184 // A FILE wrapper for glibc.
185 template <typename F> class glibc_file : public file_base<F> {
186  public:
187   using file_base<F>::file_base;
188 
189   // Returns the file's read buffer as a string_view.
190   auto buffer() const -> string_view {
191     return {this->file_->_IO_read_ptr,
192             to_unsigned(this->file_->_IO_read_end - this->file_->_IO_read_ptr)};
193   }
194 };
195 
196 // A FILE wrapper for Apple's libc.
197 template <typename F> class apple_file : public file_base<F> {
198  public:
199   using file_base<F>::file_base;
200 
201   auto buffer() const -> string_view {
202     return {reinterpret_cast<char*>(this->file_->_p),
203             to_unsigned(this->file_->_r)};
204   }
205 };
206 
207 // A fallback FILE wrapper.
208 template <typename F> class fallback_file : public file_base<F> {
209  private:
210   char next_;  // The next unconsumed character in the buffer.
211   bool has_next_ = false;
212 
213  public:
214   using file_base<F>::file_base;
215 
216   auto buffer() const -> string_view { return {&next_, has_next_ ? 1u : 0u}; }
217 
218   auto get() -> int {
219     has_next_ = false;
220     return file_base<F>::get();
221   }
222 
unget(char c)223   void unget(char c) {
224     file_base<F>::unget(c);
225     next_ = c;
226     has_next_ = true;
227   }
228 };
229 
230 class file_scan_buffer : public scan_buffer {
231  private:
232   template <typename F, FMT_ENABLE_IF(sizeof(F::_IO_read_ptr) != 0)>
233   static auto get_file(F* f, int) -> glibc_file<F> {
234     return f;
235   }
236   template <typename F, FMT_ENABLE_IF(sizeof(F::_p) != 0)>
237   static auto get_file(F* f, int) -> apple_file<F> {
238     return f;
239   }
240   static auto get_file(FILE* f, ...) -> fallback_file<FILE> { return f; }
241 
242   decltype(get_file(static_cast<FILE*>(nullptr), 0)) file_;
243 
244   // Fills the buffer if it is empty.
fill()245   void fill() {
246     string_view buf = file_.buffer();
247     if (buf.size() == 0) {
248       int c = file_.get();
249       // Put the character back since we are only filling the buffer.
250       if (c != EOF) file_.unget(static_cast<char>(c));
251       buf = file_.buffer();
252     }
253     set(buf);
254   }
255 
consume()256   void consume() override {
257     // Consume the current buffer content.
258     size_t n = to_unsigned(ptr() - file_.buffer().begin());
259     for (size_t i = 0; i != n; ++i) file_.get();
260     fill();
261   }
262 
263  public:
file_scan_buffer(FILE * f)264   explicit file_scan_buffer(FILE* f)
265       : scan_buffer(nullptr, nullptr, false), file_(f) {
266     flockfile(f);
267     fill();
268   }
~file_scan_buffer()269   ~file_scan_buffer() { funlockfile(file_); }
270 };
271 }  // namespace detail
272 
273 template <typename T, typename Char = char> struct scanner {
274   // A deleted default constructor indicates a disabled scanner.
275   scanner() = delete;
276 };
277 
278 class scan_parse_context {
279  private:
280   string_view format_;
281 
282  public:
283   using iterator = string_view::iterator;
284 
scan_parse_context(string_view format)285   explicit FMT_CONSTEXPR scan_parse_context(string_view format)
286       : format_(format) {}
287 
288   FMT_CONSTEXPR auto begin() const -> iterator { return format_.begin(); }
289   FMT_CONSTEXPR auto end() const -> iterator { return format_.end(); }
290 
advance_to(iterator it)291   void advance_to(iterator it) {
292     format_.remove_prefix(detail::to_unsigned(it - begin()));
293   }
294 };
295 
296 namespace detail {
297 enum class scan_type {
298   none_type,
299   int_type,
300   uint_type,
301   long_long_type,
302   ulong_long_type,
303   string_type,
304   string_view_type,
305   custom_type
306 };
307 
308 template <typename Context> struct custom_scan_arg {
309   void* value;
310   void (*scan)(void* arg, scan_parse_context& parse_ctx, Context& ctx);
311 };
312 }  // namespace detail
313 
314 // A scan argument. Context is a template parameter for the compiled API where
315 // output can be unbuffered.
316 template <typename Context> class basic_scan_arg {
317  private:
318   using scan_type = detail::scan_type;
319   scan_type type_;
320   union {
321     int* int_value_;
322     unsigned* uint_value_;
323     long long* long_long_value_;
324     unsigned long long* ulong_long_value_;
325     std::string* string_;
326     string_view* string_view_;
327     detail::custom_scan_arg<Context> custom_;
328     // TODO: more types
329   };
330 
331   template <typename T>
scan_custom_arg(void * arg,scan_parse_context & parse_ctx,Context & ctx)332   static void scan_custom_arg(void* arg, scan_parse_context& parse_ctx,
333                               Context& ctx) {
334     auto s = scanner<T>();
335     parse_ctx.advance_to(s.parse(parse_ctx));
336     ctx.advance_to(s.scan(*static_cast<T*>(arg), ctx));
337   }
338 
339  public:
basic_scan_arg()340   FMT_CONSTEXPR basic_scan_arg()
341       : type_(scan_type::none_type), int_value_(nullptr) {}
basic_scan_arg(int & value)342   FMT_CONSTEXPR basic_scan_arg(int& value)
343       : type_(scan_type::int_type), int_value_(&value) {}
basic_scan_arg(unsigned & value)344   FMT_CONSTEXPR basic_scan_arg(unsigned& value)
345       : type_(scan_type::uint_type), uint_value_(&value) {}
basic_scan_arg(long long & value)346   FMT_CONSTEXPR basic_scan_arg(long long& value)
347       : type_(scan_type::long_long_type), long_long_value_(&value) {}
basic_scan_arg(unsigned long long & value)348   FMT_CONSTEXPR basic_scan_arg(unsigned long long& value)
349       : type_(scan_type::ulong_long_type), ulong_long_value_(&value) {}
basic_scan_arg(std::string & value)350   FMT_CONSTEXPR basic_scan_arg(std::string& value)
351       : type_(scan_type::string_type), string_(&value) {}
basic_scan_arg(string_view & value)352   FMT_CONSTEXPR basic_scan_arg(string_view& value)
353       : type_(scan_type::string_view_type), string_view_(&value) {}
354   template <typename T>
basic_scan_arg(T & value)355   FMT_CONSTEXPR basic_scan_arg(T& value) : type_(scan_type::custom_type) {
356     custom_.value = &value;
357     custom_.scan = scan_custom_arg<T>;
358   }
359 
360   constexpr explicit operator bool() const noexcept {
361     return type_ != scan_type::none_type;
362   }
363 
364   auto type() const -> detail::scan_type { return type_; }
365 
366   template <typename Visitor>
367   auto visit(Visitor&& vis) -> decltype(vis(monostate())) {
368     switch (type_) {
369     case scan_type::none_type:
370       break;
371     case scan_type::int_type:
372       return vis(*int_value_);
373     case scan_type::uint_type:
374       return vis(*uint_value_);
375     case scan_type::long_long_type:
376       return vis(*long_long_value_);
377     case scan_type::ulong_long_type:
378       return vis(*ulong_long_value_);
379     case scan_type::string_type:
380       return vis(*string_);
381     case scan_type::string_view_type:
382       return vis(*string_view_);
383     case scan_type::custom_type:
384       break;
385     }
386     return vis(monostate());
387   }
388 
389   auto scan_custom(const char* parse_begin, scan_parse_context& parse_ctx,
390                    Context& ctx) const -> bool {
391     if (type_ != scan_type::custom_type) return false;
392     parse_ctx.advance_to(parse_begin);
393     custom_.scan(custom_.value, parse_ctx, ctx);
394     return true;
395   }
396 };
397 
398 class scan_context;
399 using scan_arg = basic_scan_arg<scan_context>;
400 
401 struct scan_args {
402   int size;
403   const scan_arg* data;
404 
405   template <size_t N>
scan_argsscan_args406   FMT_CONSTEXPR scan_args(const std::array<scan_arg, N>& store)
407       : size(N), data(store.data()) {
408     static_assert(N < INT_MAX, "too many arguments");
409   }
410 };
411 
412 class scan_context {
413  private:
414   detail::scan_buffer& buf_;
415   scan_args args_;
416 
417  public:
418   using iterator = detail::scan_iterator;
419   using sentinel = detail::scan_sentinel;
420 
scan_context(detail::scan_buffer & buf,scan_args args)421   explicit FMT_CONSTEXPR scan_context(detail::scan_buffer& buf, scan_args args)
422       : buf_(buf), args_(args) {}
423 
424   FMT_CONSTEXPR auto arg(int id) const -> scan_arg {
425     return id < args_.size ? args_.data[id] : scan_arg();
426   }
427 
428   auto begin() const -> iterator { return buf_.begin(); }
429   auto end() const -> sentinel { return {}; }
430 
advance_to(iterator)431   void advance_to(iterator) { buf_.consume(); }
432 };
433 
434 namespace detail {
435 
parse_scan_specs(const char * begin,const char * end,format_specs<> & specs,scan_type)436 const char* parse_scan_specs(const char* begin, const char* end,
437                              format_specs<>& specs, scan_type) {
438   while (begin != end) {
439     switch (to_ascii(*begin)) {
440     // TODO: parse more scan format specifiers
441     case 'x':
442       specs.type = presentation_type::hex_lower;
443       ++begin;
444       break;
445     case '}':
446       return begin;
447     }
448   }
449   return begin;
450 }
451 
452 template <typename T, FMT_ENABLE_IF(std::is_unsigned<T>::value)>
453 auto read(scan_iterator it, T& value)
454     -> scan_iterator {
455   if (it == scan_sentinel()) return it;
456   char c = *it;
457   if (c < '0' || c > '9') throw_format_error("invalid input");
458 
459   int num_digits = 0;
460   T n = 0, prev = 0;
461   char prev_digit = c;
462   do {
463     prev = n;
464     n = n * 10 + static_cast<unsigned>(c - '0');
465     prev_digit = c;
466     c = *++it;
467     ++num_digits;
468     if (c < '0' || c > '9') break;
469   } while (it != scan_sentinel());
470 
471   // Check overflow.
472   if (num_digits <= std::numeric_limits<int>::digits10) {
473     value = n;
474     return it;
475   }
476   unsigned max = to_unsigned((std::numeric_limits<int>::max)());
477   if (num_digits == std::numeric_limits<int>::digits10 + 1 &&
478       prev * 10ull + unsigned(prev_digit - '0') <= max) {
479     value = n;
480   } else {
481     throw_format_error("number is too big");
482   }
483   return it;
484 }
485 
486 template <typename T, FMT_ENABLE_IF(std::is_unsigned<T>::value)>
487 auto read_hex(scan_iterator it, T& value)
488     -> scan_iterator {
489   if (it == scan_sentinel()) return it;
490   int digit = to_hex_digit(*it);
491   if (digit < 0) throw_format_error("invalid input");
492 
493   int num_digits = 0;
494   T n = 0;
495   do {
496     n = (n << 4) + static_cast<unsigned>(digit);
497     ++num_digits;
498     digit = to_hex_digit(*++it);
499     if (digit < 0) break;
500   } while (it != scan_sentinel());
501 
502   // Check overflow.
503   if (num_digits <= (std::numeric_limits<T>::digits >> 2))
504     value = n;
505   else
506     throw_format_error("number is too big");
507   return it;
508 }
509 
510 template <typename T, FMT_ENABLE_IF(std::is_unsigned<T>::value)>
511 auto read(scan_iterator it, T& value, const format_specs<>& specs)
512     -> scan_iterator {
513   if (specs.type == presentation_type::hex_lower)
514     return read_hex(it, value);
515   return read(it, value);
516 }
517 
518 template <typename T, FMT_ENABLE_IF(std::is_signed<T>::value)>
519 auto read(scan_iterator it, T& value, const format_specs<>& = {})
520     -> scan_iterator {
521   bool negative = it != scan_sentinel() && *it == '-';
522   if (negative) {
523     ++it;
524     if (it == scan_sentinel()) throw_format_error("invalid input");
525   }
526   using unsigned_type = typename std::make_unsigned<T>::type;
527   unsigned_type abs_value = 0;
528   it = read(it, abs_value);
529   auto n = static_cast<T>(abs_value);
530   value = negative ? -n : n;
531   return it;
532 }
533 
534 auto read(scan_iterator it, std::string& value, const format_specs<>& = {})
535     -> scan_iterator {
536   while (it != scan_sentinel() && *it != ' ') value.push_back(*it++);
537   return it;
538 }
539 
540 auto read(scan_iterator it, string_view& value, const format_specs<>& = {})
541     -> scan_iterator {
542   auto range = to_contiguous(it);
543   // This could also be checked at compile time in scan.
544   if (!range) throw_format_error("string_view requires contiguous input");
545   auto p = range.begin;
546   while (p != range.end && *p != ' ') ++p;
547   size_t size = to_unsigned(p - range.begin);
548   value = {range.begin, size};
549   return advance(it, size);
550 }
551 
552 auto read(scan_iterator it, monostate, const format_specs<>& = {})
553     -> scan_iterator {
554   return it;
555 }
556 
557 // An argument scanner that uses the default format, e.g. decimal for integers.
558 struct default_arg_scanner {
559   scan_iterator it;
560 
561   template <typename T> FMT_INLINE auto operator()(T&& value) -> scan_iterator {
562     return read(it, value);
563   }
564 };
565 
566 // An argument scanner with format specifiers.
567 struct arg_scanner {
568   scan_iterator it;
569   const format_specs<>& specs;
570 
571   template <typename T> auto operator()(T&& value) -> scan_iterator {
572     return read(it, value, specs);
573   }
574 };
575 
576 struct scan_handler : error_handler {
577  private:
578   scan_parse_context parse_ctx_;
579   scan_context scan_ctx_;
580   int next_arg_id_;
581 
582   using sentinel = scan_buffer::sentinel;
583 
584  public:
scan_handlerscan_handler585   FMT_CONSTEXPR scan_handler(string_view format, scan_buffer& buf,
586                              scan_args args)
587       : parse_ctx_(format), scan_ctx_(buf, args), next_arg_id_(0) {}
588 
589   auto pos() const -> scan_buffer::iterator { return scan_ctx_.begin(); }
590 
on_textscan_handler591   void on_text(const char* begin, const char* end) {
592     if (begin == end) return;
593     auto it = scan_ctx_.begin();
594     for (; begin != end; ++begin, ++it) {
595       if (it == sentinel() || *begin != *it) on_error("invalid input");
596     }
597     scan_ctx_.advance_to(it);
598   }
599 
600   FMT_CONSTEXPR auto on_arg_id() -> int { return on_arg_id(next_arg_id_++); }
601   FMT_CONSTEXPR auto on_arg_id(int id) -> int {
602     if (!scan_ctx_.arg(id)) on_error("argument index out of range");
603     return id;
604   }
605   FMT_CONSTEXPR auto on_arg_id(string_view id) -> int {
606     if (id.data()) on_error("invalid format");
607     return 0;
608   }
609 
on_replacement_fieldscan_handler610   void on_replacement_field(int arg_id, const char*) {
611     scan_arg arg = scan_ctx_.arg(arg_id);
612     auto it = scan_ctx_.begin();
613     while (it != sentinel() && is_whitespace(*it)) ++it;
614     scan_ctx_.advance_to(arg.visit(default_arg_scanner{it}));
615   }
616 
617   auto on_format_specs(int arg_id, const char* begin, const char* end) -> const
618       char* {
619     scan_arg arg = scan_ctx_.arg(arg_id);
620     if (arg.scan_custom(begin, parse_ctx_, scan_ctx_))
621       return parse_ctx_.begin();
622     auto specs = format_specs<>();
623     begin = parse_scan_specs(begin, end, specs, arg.type());
624     if (begin == end || *begin != '}') on_error("missing '}' in format string");
625     scan_ctx_.advance_to(arg.visit(arg_scanner{scan_ctx_.begin(), specs}));
626     return begin;
627   }
628 
on_errorscan_handler629   void on_error(const char* message) { error_handler::on_error(message); }
630 };
631 }  // namespace detail
632 
633 template <typename... T>
634 auto make_scan_args(T&... args) -> std::array<scan_arg, sizeof...(T)> {
635   return {{args...}};
636 }
637 
vscan(detail::scan_buffer & buf,string_view fmt,scan_args args)638 void vscan(detail::scan_buffer& buf, string_view fmt, scan_args args) {
639   auto h = detail::scan_handler(fmt, buf, args);
640   detail::parse_format_string<false>(fmt, h);
641 }
642 
643 template <typename... T>
644 auto scan(string_view input, string_view fmt, T&... args)
645     -> string_view::iterator {
646   auto&& buf = detail::string_scan_buffer(input);
647   vscan(buf, fmt, make_scan_args(args...));
648   return input.begin() + (buf.begin().base() - input.data());
649 }
650 
651 template <typename InputRange, typename... T,
652           FMT_ENABLE_IF(!std::is_convertible<InputRange, string_view>::value)>
653 auto scan(InputRange&& input, string_view fmt, T&... args)
654     -> decltype(std::begin(input)) {
655   auto it = std::begin(input);
656   vscan(get_buffer(it), fmt, make_scan_args(args...));
657   return it;
658 }
659 
scan(std::FILE * f,string_view fmt,T &...args)660 template <typename... T> bool scan(std::FILE* f, string_view fmt, T&... args) {
661   auto&& buf = detail::file_scan_buffer(f);
662   vscan(buf, fmt, make_scan_args(args...));
663   return buf.begin() != buf.end();
664 }
665 
666 FMT_END_NAMESPACE
667