• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #pragma once
2 
3 #include <array> // array
4 #include <cstddef> // size_t
5 #include <cstdio> //FILE *
6 #include <cstring> // strlen
7 #include <istream> // istream
8 #include <iterator> // begin, end, iterator_traits, random_access_iterator_tag, distance, next
9 #include <memory> // shared_ptr, make_shared, addressof
10 #include <numeric> // accumulate
11 #include <string> // string, char_traits
12 #include <type_traits> // enable_if, is_base_of, is_pointer, is_integral, remove_pointer
13 #include <utility> // pair, declval
14 
15 #include <nlohmann/detail/iterators/iterator_traits.hpp>
16 #include <nlohmann/detail/macro_scope.hpp>
17 
18 namespace nlohmann
19 {
20 namespace detail
21 {
22 /// the supported input formats
23 enum class input_format_t { json, cbor, msgpack, ubjson, bson };
24 
25 ////////////////////
26 // input adapters //
27 ////////////////////
28 
29 /*!
30 Input adapter for stdio file access. This adapter read only 1 byte and do not use any
31  buffer. This adapter is a very low level adapter.
32 */
33 class file_input_adapter
34 {
35   public:
36     using char_type = char;
37 
38     JSON_HEDLEY_NON_NULL(2)
file_input_adapter(std::FILE * f)39     explicit file_input_adapter(std::FILE* f) noexcept
40         : m_file(f)
41     {}
42 
43     // make class move-only
44     file_input_adapter(const file_input_adapter&) = delete;
45     file_input_adapter(file_input_adapter&&) = default;
46     file_input_adapter& operator=(const file_input_adapter&) = delete;
47     file_input_adapter& operator=(file_input_adapter&&) = delete;
48 
get_character()49     std::char_traits<char>::int_type get_character() noexcept
50     {
51         return std::fgetc(m_file);
52     }
53 
54   private:
55     /// the file pointer to read from
56     std::FILE* m_file;
57 };
58 
59 
60 /*!
61 Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at
62 beginning of input. Does not support changing the underlying std::streambuf
63 in mid-input. Maintains underlying std::istream and std::streambuf to support
64 subsequent use of standard std::istream operations to process any input
65 characters following those used in parsing the JSON input.  Clears the
66 std::istream flags; any input errors (e.g., EOF) will be detected by the first
67 subsequent call for input from the std::istream.
68 */
69 class input_stream_adapter
70 {
71   public:
72     using char_type = char;
73 
~input_stream_adapter()74     ~input_stream_adapter()
75     {
76         // clear stream flags; we use underlying streambuf I/O, do not
77         // maintain ifstream flags, except eof
78         if (is != nullptr)
79         {
80             is->clear(is->rdstate() & std::ios::eofbit);
81         }
82     }
83 
input_stream_adapter(std::istream & i)84     explicit input_stream_adapter(std::istream& i)
85         : is(&i), sb(i.rdbuf())
86     {}
87 
88     // delete because of pointer members
89     input_stream_adapter(const input_stream_adapter&) = delete;
90     input_stream_adapter& operator=(input_stream_adapter&) = delete;
91     input_stream_adapter& operator=(input_stream_adapter&& rhs) = delete;
92 
input_stream_adapter(input_stream_adapter && rhs)93     input_stream_adapter(input_stream_adapter&& rhs) noexcept : is(rhs.is), sb(rhs.sb)
94     {
95         rhs.is = nullptr;
96         rhs.sb = nullptr;
97     }
98 
99     // std::istream/std::streambuf use std::char_traits<char>::to_int_type, to
100     // ensure that std::char_traits<char>::eof() and the character 0xFF do not
101     // end up as the same value, eg. 0xFFFFFFFF.
get_character()102     std::char_traits<char>::int_type get_character()
103     {
104         auto res = sb->sbumpc();
105         // set eof manually, as we don't use the istream interface.
106         if (JSON_HEDLEY_UNLIKELY(res == EOF))
107         {
108             is->clear(is->rdstate() | std::ios::eofbit);
109         }
110         return res;
111     }
112 
113   private:
114     /// the associated input stream
115     std::istream* is = nullptr;
116     std::streambuf* sb = nullptr;
117 };
118 
119 // General-purpose iterator-based adapter. It might not be as fast as
120 // theoretically possible for some containers, but it is extremely versatile.
121 template<typename IteratorType>
122 class iterator_input_adapter
123 {
124   public:
125     using char_type = typename std::iterator_traits<IteratorType>::value_type;
126 
iterator_input_adapter(IteratorType first,IteratorType last)127     iterator_input_adapter(IteratorType first, IteratorType last)
128         : current(std::move(first)), end(std::move(last)) {}
129 
get_character()130     typename std::char_traits<char_type>::int_type get_character()
131     {
132         if (JSON_HEDLEY_LIKELY(current != end))
133         {
134             auto result = std::char_traits<char_type>::to_int_type(*current);
135             std::advance(current, 1);
136             return result;
137         }
138         else
139         {
140             return std::char_traits<char_type>::eof();
141         }
142     }
143 
144   private:
145     IteratorType current;
146     IteratorType end;
147 
148     template<typename BaseInputAdapter, size_t T>
149     friend struct wide_string_input_helper;
150 
empty() const151     bool empty() const
152     {
153         return current == end;
154     }
155 
156 };
157 
158 
159 template<typename BaseInputAdapter, size_t T>
160 struct wide_string_input_helper;
161 
162 template<typename BaseInputAdapter>
163 struct wide_string_input_helper<BaseInputAdapter, 4>
164 {
165     // UTF-32
fill_buffernlohmann::detail::wide_string_input_helper166     static void fill_buffer(BaseInputAdapter& input,
167                             std::array<std::char_traits<char>::int_type, 4>& utf8_bytes,
168                             size_t& utf8_bytes_index,
169                             size_t& utf8_bytes_filled)
170     {
171         utf8_bytes_index = 0;
172 
173         if (JSON_HEDLEY_UNLIKELY(input.empty()))
174         {
175             utf8_bytes[0] = std::char_traits<char>::eof();
176             utf8_bytes_filled = 1;
177         }
178         else
179         {
180             // get the current character
181             const auto wc = input.get_character();
182 
183             // UTF-32 to UTF-8 encoding
184             if (wc < 0x80)
185             {
186                 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
187                 utf8_bytes_filled = 1;
188             }
189             else if (wc <= 0x7FF)
190             {
191                 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((static_cast<unsigned int>(wc) >> 6u) & 0x1Fu));
192                 utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
193                 utf8_bytes_filled = 2;
194             }
195             else if (wc <= 0xFFFF)
196             {
197                 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((static_cast<unsigned int>(wc) >> 12u) & 0x0Fu));
198                 utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu));
199                 utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
200                 utf8_bytes_filled = 3;
201             }
202             else if (wc <= 0x10FFFF)
203             {
204                 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | ((static_cast<unsigned int>(wc) >> 18u) & 0x07u));
205                 utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 12u) & 0x3Fu));
206                 utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu));
207                 utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
208                 utf8_bytes_filled = 4;
209             }
210             else
211             {
212                 // unknown character
213                 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
214                 utf8_bytes_filled = 1;
215             }
216         }
217     }
218 };
219 
220 template<typename BaseInputAdapter>
221 struct wide_string_input_helper<BaseInputAdapter, 2>
222 {
223     // UTF-16
fill_buffernlohmann::detail::wide_string_input_helper224     static void fill_buffer(BaseInputAdapter& input,
225                             std::array<std::char_traits<char>::int_type, 4>& utf8_bytes,
226                             size_t& utf8_bytes_index,
227                             size_t& utf8_bytes_filled)
228     {
229         utf8_bytes_index = 0;
230 
231         if (JSON_HEDLEY_UNLIKELY(input.empty()))
232         {
233             utf8_bytes[0] = std::char_traits<char>::eof();
234             utf8_bytes_filled = 1;
235         }
236         else
237         {
238             // get the current character
239             const auto wc = input.get_character();
240 
241             // UTF-16 to UTF-8 encoding
242             if (wc < 0x80)
243             {
244                 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
245                 utf8_bytes_filled = 1;
246             }
247             else if (wc <= 0x7FF)
248             {
249                 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((static_cast<unsigned int>(wc) >> 6u)));
250                 utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
251                 utf8_bytes_filled = 2;
252             }
253             else if (0xD800 > wc || wc >= 0xE000)
254             {
255                 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((static_cast<unsigned int>(wc) >> 12u)));
256                 utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu));
257                 utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
258                 utf8_bytes_filled = 3;
259             }
260             else
261             {
262                 if (JSON_HEDLEY_UNLIKELY(!input.empty()))
263                 {
264                     const auto wc2 = static_cast<unsigned int>(input.get_character());
265                     const auto charcode = 0x10000u + (((static_cast<unsigned int>(wc) & 0x3FFu) << 10u) | (wc2 & 0x3FFu));
266                     utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | (charcode >> 18u));
267                     utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 12u) & 0x3Fu));
268                     utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 6u) & 0x3Fu));
269                     utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (charcode & 0x3Fu));
270                     utf8_bytes_filled = 4;
271                 }
272                 else
273                 {
274                     utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
275                     utf8_bytes_filled = 1;
276                 }
277             }
278         }
279     }
280 };
281 
282 // Wraps another input apdater to convert wide character types into individual bytes.
283 template<typename BaseInputAdapter, typename WideCharType>
284 class wide_string_input_adapter
285 {
286   public:
287     using char_type = char;
288 
wide_string_input_adapter(BaseInputAdapter base)289     wide_string_input_adapter(BaseInputAdapter base)
290         : base_adapter(base) {}
291 
get_character()292     typename std::char_traits<char>::int_type get_character() noexcept
293     {
294         // check if buffer needs to be filled
295         if (utf8_bytes_index == utf8_bytes_filled)
296         {
297             fill_buffer<sizeof(WideCharType)>();
298 
299             JSON_ASSERT(utf8_bytes_filled > 0);
300             JSON_ASSERT(utf8_bytes_index == 0);
301         }
302 
303         // use buffer
304         JSON_ASSERT(utf8_bytes_filled > 0);
305         JSON_ASSERT(utf8_bytes_index < utf8_bytes_filled);
306         return utf8_bytes[utf8_bytes_index++];
307     }
308 
309   private:
310     BaseInputAdapter base_adapter;
311 
312     template<size_t T>
fill_buffer()313     void fill_buffer()
314     {
315         wide_string_input_helper<BaseInputAdapter, T>::fill_buffer(base_adapter, utf8_bytes, utf8_bytes_index, utf8_bytes_filled);
316     }
317 
318     /// a buffer for UTF-8 bytes
319     std::array<std::char_traits<char>::int_type, 4> utf8_bytes = {{0, 0, 0, 0}};
320 
321     /// index to the utf8_codes array for the next valid byte
322     std::size_t utf8_bytes_index = 0;
323     /// number of valid bytes in the utf8_codes array
324     std::size_t utf8_bytes_filled = 0;
325 };
326 
327 
328 template<typename IteratorType, typename Enable = void>
329 struct iterator_input_adapter_factory
330 {
331     using iterator_type = IteratorType;
332     using char_type = typename std::iterator_traits<iterator_type>::value_type;
333     using adapter_type = iterator_input_adapter<iterator_type>;
334 
createnlohmann::detail::iterator_input_adapter_factory335     static adapter_type create(IteratorType first, IteratorType last)
336     {
337         return adapter_type(std::move(first), std::move(last));
338     }
339 };
340 
341 template<typename T>
342 struct is_iterator_of_multibyte
343 {
344     using value_type = typename std::iterator_traits<T>::value_type;
345     enum
346     {
347         value = sizeof(value_type) > 1
348     };
349 };
350 
351 template<typename IteratorType>
352 struct iterator_input_adapter_factory<IteratorType, enable_if_t<is_iterator_of_multibyte<IteratorType>::value>>
353 {
354     using iterator_type = IteratorType;
355     using char_type = typename std::iterator_traits<iterator_type>::value_type;
356     using base_adapter_type = iterator_input_adapter<iterator_type>;
357     using adapter_type = wide_string_input_adapter<base_adapter_type, char_type>;
358 
createnlohmann::detail::iterator_input_adapter_factory359     static adapter_type create(IteratorType first, IteratorType last)
360     {
361         return adapter_type(base_adapter_type(std::move(first), std::move(last)));
362     }
363 };
364 
365 // General purpose iterator-based input
366 template<typename IteratorType>
input_adapter(IteratorType first,IteratorType last)367 typename iterator_input_adapter_factory<IteratorType>::adapter_type input_adapter(IteratorType first, IteratorType last)
368 {
369     using factory_type = iterator_input_adapter_factory<IteratorType>;
370     return factory_type::create(first, last);
371 }
372 
373 // Convenience shorthand from container to iterator
374 template<typename ContainerType>
input_adapter(const ContainerType & container)375 auto input_adapter(const ContainerType& container) -> decltype(input_adapter(begin(container), end(container)))
376 {
377     // Enable ADL
378     using std::begin;
379     using std::end;
380 
381     return input_adapter(begin(container), end(container));
382 }
383 
384 // Special cases with fast paths
input_adapter(std::FILE * file)385 inline file_input_adapter input_adapter(std::FILE* file)
386 {
387     return file_input_adapter(file);
388 }
389 
input_adapter(std::istream & stream)390 inline input_stream_adapter input_adapter(std::istream& stream)
391 {
392     return input_stream_adapter(stream);
393 }
394 
input_adapter(std::istream && stream)395 inline input_stream_adapter input_adapter(std::istream&& stream)
396 {
397     return input_stream_adapter(stream);
398 }
399 
400 using contiguous_bytes_input_adapter = decltype(input_adapter(std::declval<const char*>(), std::declval<const char*>()));
401 
402 // Null-delimited strings, and the like.
403 template < typename CharT,
404            typename std::enable_if <
405                std::is_pointer<CharT>::value&&
406                !std::is_array<CharT>::value&&
407                std::is_integral<typename std::remove_pointer<CharT>::type>::value&&
408                sizeof(typename std::remove_pointer<CharT>::type) == 1,
409                int >::type = 0 >
input_adapter(CharT b)410 contiguous_bytes_input_adapter input_adapter(CharT b)
411 {
412     auto length = std::strlen(reinterpret_cast<const char*>(b));
413     const auto* ptr = reinterpret_cast<const char*>(b);
414     return input_adapter(ptr, ptr + length);
415 }
416 
417 template<typename T, std::size_t N>
input_adapter(T (& array)[N])418 auto input_adapter(T (&array)[N]) -> decltype(input_adapter(array, array + N))
419 {
420     return input_adapter(array, array + N);
421 }
422 
423 // This class only handles inputs of input_buffer_adapter type.
424 // It's required so that expressions like {ptr, len} can be implicitely casted
425 // to the correct adapter.
426 class span_input_adapter
427 {
428   public:
429     template < typename CharT,
430                typename std::enable_if <
431                    std::is_pointer<CharT>::value&&
432                    std::is_integral<typename std::remove_pointer<CharT>::type>::value&&
433                    sizeof(typename std::remove_pointer<CharT>::type) == 1,
434                    int >::type = 0 >
span_input_adapter(CharT b,std::size_t l)435     span_input_adapter(CharT b, std::size_t l)
436         : ia(reinterpret_cast<const char*>(b), reinterpret_cast<const char*>(b) + l) {}
437 
438     template<class IteratorType,
439              typename std::enable_if<
440                  std::is_same<typename iterator_traits<IteratorType>::iterator_category, std::random_access_iterator_tag>::value,
441                  int>::type = 0>
span_input_adapter(IteratorType first,IteratorType last)442     span_input_adapter(IteratorType first, IteratorType last)
443         : ia(input_adapter(first, last)) {}
444 
get()445     contiguous_bytes_input_adapter&& get()
446     {
447         return std::move(ia);
448     }
449 
450   private:
451     contiguous_bytes_input_adapter ia;
452 };
453 }  // namespace detail
454 }  // namespace nlohmann
455