• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #pragma once
2 
3 #include <array> // array
4 #include <cstddef> // size_t
5 #include <cstring> // strlen
6 #include <iterator> // begin, end, iterator_traits, random_access_iterator_tag, distance, next
7 #include <memory> // shared_ptr, make_shared, addressof
8 #include <numeric> // accumulate
9 #include <string> // string, char_traits
10 #include <type_traits> // enable_if, is_base_of, is_pointer, is_integral, remove_pointer
11 #include <utility> // pair, declval
12 
13 #ifndef JSON_NO_IO
14     #include <cstdio>   // FILE *
15     #include <istream>  // istream
16 #endif                  // JSON_NO_IO
17 
18 #include <nlohmann/detail/iterators/iterator_traits.hpp>
19 #include <nlohmann/detail/macro_scope.hpp>
20 
21 namespace nlohmann
22 {
23 namespace detail
24 {
25 /// the supported input formats
26 enum class input_format_t { json, cbor, msgpack, ubjson, bson };
27 
28 ////////////////////
29 // input adapters //
30 ////////////////////
31 
32 #ifndef JSON_NO_IO
33 /*!
34 Input adapter for stdio file access. This adapter read only 1 byte and do not use any
35  buffer. This adapter is a very low level adapter.
36 */
37 class file_input_adapter
38 {
39   public:
40     using char_type = char;
41 
42     JSON_HEDLEY_NON_NULL(2)
file_input_adapter(std::FILE * f)43     explicit file_input_adapter(std::FILE* f) noexcept
44         : m_file(f)
45     {}
46 
47     // make class move-only
48     file_input_adapter(const file_input_adapter&) = delete;
49     file_input_adapter(file_input_adapter&&) noexcept = default;
50     file_input_adapter& operator=(const file_input_adapter&) = delete;
51     file_input_adapter& operator=(file_input_adapter&&) = delete;
52     ~file_input_adapter() = default;
53 
get_character()54     std::char_traits<char>::int_type get_character() noexcept
55     {
56         return std::fgetc(m_file);
57     }
58 
59   private:
60     /// the file pointer to read from
61     std::FILE* m_file;
62 };
63 
64 
65 /*!
66 Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at
67 beginning of input. Does not support changing the underlying std::streambuf
68 in mid-input. Maintains underlying std::istream and std::streambuf to support
69 subsequent use of standard std::istream operations to process any input
70 characters following those used in parsing the JSON input.  Clears the
71 std::istream flags; any input errors (e.g., EOF) will be detected by the first
72 subsequent call for input from the std::istream.
73 */
74 class input_stream_adapter
75 {
76   public:
77     using char_type = char;
78 
~input_stream_adapter()79     ~input_stream_adapter()
80     {
81         // clear stream flags; we use underlying streambuf I/O, do not
82         // maintain ifstream flags, except eof
83         if (is != nullptr)
84         {
85             is->clear(is->rdstate() & std::ios::eofbit);
86         }
87     }
88 
input_stream_adapter(std::istream & i)89     explicit input_stream_adapter(std::istream& i)
90         : is(&i), sb(i.rdbuf())
91     {}
92 
93     // delete because of pointer members
94     input_stream_adapter(const input_stream_adapter&) = delete;
95     input_stream_adapter& operator=(input_stream_adapter&) = delete;
96     input_stream_adapter& operator=(input_stream_adapter&&) = delete;
97 
input_stream_adapter(input_stream_adapter && rhs)98     input_stream_adapter(input_stream_adapter&& rhs) noexcept
99         : is(rhs.is), sb(rhs.sb)
100     {
101         rhs.is = nullptr;
102         rhs.sb = nullptr;
103     }
104 
105     // std::istream/std::streambuf use std::char_traits<char>::to_int_type, to
106     // ensure that std::char_traits<char>::eof() and the character 0xFF do not
107     // end up as the same value, eg. 0xFFFFFFFF.
get_character()108     std::char_traits<char>::int_type get_character()
109     {
110         auto res = sb->sbumpc();
111         // set eof manually, as we don't use the istream interface.
112         if (JSON_HEDLEY_UNLIKELY(res == std::char_traits<char>::eof()))
113         {
114             is->clear(is->rdstate() | std::ios::eofbit);
115         }
116         return res;
117     }
118 
119   private:
120     /// the associated input stream
121     std::istream* is = nullptr;
122     std::streambuf* sb = nullptr;
123 };
124 #endif  // JSON_NO_IO
125 
126 // General-purpose iterator-based adapter. It might not be as fast as
127 // theoretically possible for some containers, but it is extremely versatile.
128 template<typename IteratorType>
129 class iterator_input_adapter
130 {
131   public:
132     using char_type = typename std::iterator_traits<IteratorType>::value_type;
133 
iterator_input_adapter(IteratorType first,IteratorType last)134     iterator_input_adapter(IteratorType first, IteratorType last)
135         : current(std::move(first)), end(std::move(last))
136     {}
137 
get_character()138     typename std::char_traits<char_type>::int_type get_character()
139     {
140         if (JSON_HEDLEY_LIKELY(current != end))
141         {
142             auto result = std::char_traits<char_type>::to_int_type(*current);
143             std::advance(current, 1);
144             return result;
145         }
146 
147         return std::char_traits<char_type>::eof();
148     }
149 
150   private:
151     IteratorType current;
152     IteratorType end;
153 
154     template<typename BaseInputAdapter, size_t T>
155     friend struct wide_string_input_helper;
156 
empty() const157     bool empty() const
158     {
159         return current == end;
160     }
161 };
162 
163 
164 template<typename BaseInputAdapter, size_t T>
165 struct wide_string_input_helper;
166 
167 template<typename BaseInputAdapter>
168 struct wide_string_input_helper<BaseInputAdapter, 4>
169 {
170     // UTF-32
fill_buffernlohmann::detail::wide_string_input_helper171     static void fill_buffer(BaseInputAdapter& input,
172                             std::array<std::char_traits<char>::int_type, 4>& utf8_bytes,
173                             size_t& utf8_bytes_index,
174                             size_t& utf8_bytes_filled)
175     {
176         utf8_bytes_index = 0;
177 
178         if (JSON_HEDLEY_UNLIKELY(input.empty()))
179         {
180             utf8_bytes[0] = std::char_traits<char>::eof();
181             utf8_bytes_filled = 1;
182         }
183         else
184         {
185             // get the current character
186             const auto wc = input.get_character();
187 
188             // UTF-32 to UTF-8 encoding
189             if (wc < 0x80)
190             {
191                 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
192                 utf8_bytes_filled = 1;
193             }
194             else if (wc <= 0x7FF)
195             {
196                 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((static_cast<unsigned int>(wc) >> 6u) & 0x1Fu));
197                 utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
198                 utf8_bytes_filled = 2;
199             }
200             else if (wc <= 0xFFFF)
201             {
202                 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((static_cast<unsigned int>(wc) >> 12u) & 0x0Fu));
203                 utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu));
204                 utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
205                 utf8_bytes_filled = 3;
206             }
207             else if (wc <= 0x10FFFF)
208             {
209                 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | ((static_cast<unsigned int>(wc) >> 18u) & 0x07u));
210                 utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 12u) & 0x3Fu));
211                 utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu));
212                 utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
213                 utf8_bytes_filled = 4;
214             }
215             else
216             {
217                 // unknown character
218                 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
219                 utf8_bytes_filled = 1;
220             }
221         }
222     }
223 };
224 
225 template<typename BaseInputAdapter>
226 struct wide_string_input_helper<BaseInputAdapter, 2>
227 {
228     // UTF-16
fill_buffernlohmann::detail::wide_string_input_helper229     static void fill_buffer(BaseInputAdapter& input,
230                             std::array<std::char_traits<char>::int_type, 4>& utf8_bytes,
231                             size_t& utf8_bytes_index,
232                             size_t& utf8_bytes_filled)
233     {
234         utf8_bytes_index = 0;
235 
236         if (JSON_HEDLEY_UNLIKELY(input.empty()))
237         {
238             utf8_bytes[0] = std::char_traits<char>::eof();
239             utf8_bytes_filled = 1;
240         }
241         else
242         {
243             // get the current character
244             const auto wc = input.get_character();
245 
246             // UTF-16 to UTF-8 encoding
247             if (wc < 0x80)
248             {
249                 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
250                 utf8_bytes_filled = 1;
251             }
252             else if (wc <= 0x7FF)
253             {
254                 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((static_cast<unsigned int>(wc) >> 6u)));
255                 utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
256                 utf8_bytes_filled = 2;
257             }
258             else if (0xD800 > wc || wc >= 0xE000)
259             {
260                 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((static_cast<unsigned int>(wc) >> 12u)));
261                 utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu));
262                 utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
263                 utf8_bytes_filled = 3;
264             }
265             else
266             {
267                 if (JSON_HEDLEY_UNLIKELY(!input.empty()))
268                 {
269                     const auto wc2 = static_cast<unsigned int>(input.get_character());
270                     const auto charcode = 0x10000u + (((static_cast<unsigned int>(wc) & 0x3FFu) << 10u) | (wc2 & 0x3FFu));
271                     utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | (charcode >> 18u));
272                     utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 12u) & 0x3Fu));
273                     utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 6u) & 0x3Fu));
274                     utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (charcode & 0x3Fu));
275                     utf8_bytes_filled = 4;
276                 }
277                 else
278                 {
279                     utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
280                     utf8_bytes_filled = 1;
281                 }
282             }
283         }
284     }
285 };
286 
287 // Wraps another input apdater to convert wide character types into individual bytes.
288 template<typename BaseInputAdapter, typename WideCharType>
289 class wide_string_input_adapter
290 {
291   public:
292     using char_type = char;
293 
wide_string_input_adapter(BaseInputAdapter base)294     wide_string_input_adapter(BaseInputAdapter base)
295         : base_adapter(base) {}
296 
get_character()297     typename std::char_traits<char>::int_type get_character() noexcept
298     {
299         // check if buffer needs to be filled
300         if (utf8_bytes_index == utf8_bytes_filled)
301         {
302             fill_buffer<sizeof(WideCharType)>();
303 
304             JSON_ASSERT(utf8_bytes_filled > 0);
305             JSON_ASSERT(utf8_bytes_index == 0);
306         }
307 
308         // use buffer
309         JSON_ASSERT(utf8_bytes_filled > 0);
310         JSON_ASSERT(utf8_bytes_index < utf8_bytes_filled);
311         return utf8_bytes[utf8_bytes_index++];
312     }
313 
314   private:
315     BaseInputAdapter base_adapter;
316 
317     template<size_t T>
fill_buffer()318     void fill_buffer()
319     {
320         wide_string_input_helper<BaseInputAdapter, T>::fill_buffer(base_adapter, utf8_bytes, utf8_bytes_index, utf8_bytes_filled);
321     }
322 
323     /// a buffer for UTF-8 bytes
324     std::array<std::char_traits<char>::int_type, 4> utf8_bytes = {{0, 0, 0, 0}};
325 
326     /// index to the utf8_codes array for the next valid byte
327     std::size_t utf8_bytes_index = 0;
328     /// number of valid bytes in the utf8_codes array
329     std::size_t utf8_bytes_filled = 0;
330 };
331 
332 
333 template<typename IteratorType, typename Enable = void>
334 struct iterator_input_adapter_factory
335 {
336     using iterator_type = IteratorType;
337     using char_type = typename std::iterator_traits<iterator_type>::value_type;
338     using adapter_type = iterator_input_adapter<iterator_type>;
339 
createnlohmann::detail::iterator_input_adapter_factory340     static adapter_type create(IteratorType first, IteratorType last)
341     {
342         return adapter_type(std::move(first), std::move(last));
343     }
344 };
345 
346 template<typename T>
347 struct is_iterator_of_multibyte
348 {
349     using value_type = typename std::iterator_traits<T>::value_type;
350     enum
351     {
352         value = sizeof(value_type) > 1
353     };
354 };
355 
356 template<typename IteratorType>
357 struct iterator_input_adapter_factory<IteratorType, enable_if_t<is_iterator_of_multibyte<IteratorType>::value>>
358 {
359     using iterator_type = IteratorType;
360     using char_type = typename std::iterator_traits<iterator_type>::value_type;
361     using base_adapter_type = iterator_input_adapter<iterator_type>;
362     using adapter_type = wide_string_input_adapter<base_adapter_type, char_type>;
363 
createnlohmann::detail::iterator_input_adapter_factory364     static adapter_type create(IteratorType first, IteratorType last)
365     {
366         return adapter_type(base_adapter_type(std::move(first), std::move(last)));
367     }
368 };
369 
370 // General purpose iterator-based input
371 template<typename IteratorType>
input_adapter(IteratorType first,IteratorType last)372 typename iterator_input_adapter_factory<IteratorType>::adapter_type input_adapter(IteratorType first, IteratorType last)
373 {
374     using factory_type = iterator_input_adapter_factory<IteratorType>;
375     return factory_type::create(first, last);
376 }
377 
378 // Convenience shorthand from container to iterator
379 // Enables ADL on begin(container) and end(container)
380 // Encloses the using declarations in namespace for not to leak them to outside scope
381 
382 namespace container_input_adapter_factory_impl
383 {
384 
385 using std::begin;
386 using std::end;
387 
388 template<typename ContainerType, typename Enable = void>
389 struct container_input_adapter_factory {};
390 
391 template<typename ContainerType>
392 struct container_input_adapter_factory< ContainerType,
393        void_t<decltype(begin(std::declval<ContainerType>()), end(std::declval<ContainerType>()))>>
394        {
395            using adapter_type = decltype(input_adapter(begin(std::declval<ContainerType>()), end(std::declval<ContainerType>())));
396 
createnlohmann::detail::container_input_adapter_factory_impl::container_input_adapter_factory397            static adapter_type create(const ContainerType& container)
398 {
399     return input_adapter(begin(container), end(container));
400 }
401        };
402 
403 } // namespace container_input_adapter_factory_impl
404 
405 template<typename ContainerType>
input_adapter(const ContainerType & container)406 typename container_input_adapter_factory_impl::container_input_adapter_factory<ContainerType>::adapter_type input_adapter(const ContainerType& container)
407 {
408     return container_input_adapter_factory_impl::container_input_adapter_factory<ContainerType>::create(container);
409 }
410 
411 #ifndef JSON_NO_IO
412 // Special cases with fast paths
input_adapter(std::FILE * file)413 inline file_input_adapter input_adapter(std::FILE* file)
414 {
415     return file_input_adapter(file);
416 }
417 
input_adapter(std::istream & stream)418 inline input_stream_adapter input_adapter(std::istream& stream)
419 {
420     return input_stream_adapter(stream);
421 }
422 
input_adapter(std::istream && stream)423 inline input_stream_adapter input_adapter(std::istream&& stream)
424 {
425     return input_stream_adapter(stream);
426 }
427 #endif  // JSON_NO_IO
428 
429 using contiguous_bytes_input_adapter = decltype(input_adapter(std::declval<const char*>(), std::declval<const char*>()));
430 
431 // Null-delimited strings, and the like.
432 template < typename CharT,
433            typename std::enable_if <
434                std::is_pointer<CharT>::value&&
435                !std::is_array<CharT>::value&&
436                std::is_integral<typename std::remove_pointer<CharT>::type>::value&&
437                sizeof(typename std::remove_pointer<CharT>::type) == 1,
438                int >::type = 0 >
input_adapter(CharT b)439 contiguous_bytes_input_adapter input_adapter(CharT b)
440 {
441     auto length = std::strlen(reinterpret_cast<const char*>(b));
442     const auto* ptr = reinterpret_cast<const char*>(b);
443     return input_adapter(ptr, ptr + length);
444 }
445 
446 template<typename T, std::size_t N>
input_adapter(T (& array)[N])447 auto input_adapter(T (&array)[N]) -> decltype(input_adapter(array, array + N)) // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays)
448 {
449     return input_adapter(array, array + N);
450 }
451 
452 // This class only handles inputs of input_buffer_adapter type.
453 // It's required so that expressions like {ptr, len} can be implicitely casted
454 // to the correct adapter.
455 class span_input_adapter
456 {
457   public:
458     template < typename CharT,
459                typename std::enable_if <
460                    std::is_pointer<CharT>::value&&
461                    std::is_integral<typename std::remove_pointer<CharT>::type>::value&&
462                    sizeof(typename std::remove_pointer<CharT>::type) == 1,
463                    int >::type = 0 >
span_input_adapter(CharT b,std::size_t l)464     span_input_adapter(CharT b, std::size_t l)
465         : ia(reinterpret_cast<const char*>(b), reinterpret_cast<const char*>(b) + l) {}
466 
467     template<class IteratorType,
468              typename std::enable_if<
469                  std::is_same<typename iterator_traits<IteratorType>::iterator_category, std::random_access_iterator_tag>::value,
470                  int>::type = 0>
span_input_adapter(IteratorType first,IteratorType last)471     span_input_adapter(IteratorType first, IteratorType last)
472         : ia(input_adapter(first, last)) {}
473 
get()474     contiguous_bytes_input_adapter&& get()
475     {
476         return std::move(ia); // NOLINT(hicpp-move-const-arg,performance-move-const-arg)
477     }
478 
479   private:
480     contiguous_bytes_input_adapter ia;
481 };
482 }  // namespace detail
483 }  // namespace nlohmann
484