1 #pragma once
2
3 #include <array> // array
4 #include <cstddef> // size_t
5 #include <cstdio> //FILE *
6 #include <cstring> // strlen
7 #include <istream> // istream
8 #include <iterator> // begin, end, iterator_traits, random_access_iterator_tag, distance, next
9 #include <memory> // shared_ptr, make_shared, addressof
10 #include <numeric> // accumulate
11 #include <string> // string, char_traits
12 #include <type_traits> // enable_if, is_base_of, is_pointer, is_integral, remove_pointer
13 #include <utility> // pair, declval
14
15 #include <nlohmann/detail/iterators/iterator_traits.hpp>
16 #include <nlohmann/detail/macro_scope.hpp>
17
18 namespace nlohmann
19 {
20 namespace detail
21 {
22 /// the supported input formats
23 enum class input_format_t { json, cbor, msgpack, ubjson, bson };
24
25 ////////////////////
26 // input adapters //
27 ////////////////////
28
29 /*!
30 Input adapter for stdio file access. This adapter read only 1 byte and do not use any
31 buffer. This adapter is a very low level adapter.
32 */
33 class file_input_adapter
34 {
35 public:
36 using char_type = char;
37
38 JSON_HEDLEY_NON_NULL(2)
file_input_adapter(std::FILE * f)39 explicit file_input_adapter(std::FILE* f) noexcept
40 : m_file(f)
41 {}
42
43 // make class move-only
44 file_input_adapter(const file_input_adapter&) = delete;
45 file_input_adapter(file_input_adapter&&) = default;
46 file_input_adapter& operator=(const file_input_adapter&) = delete;
47 file_input_adapter& operator=(file_input_adapter&&) = delete;
48
get_character()49 std::char_traits<char>::int_type get_character() noexcept
50 {
51 return std::fgetc(m_file);
52 }
53
54 private:
55 /// the file pointer to read from
56 std::FILE* m_file;
57 };
58
59
60 /*!
61 Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at
62 beginning of input. Does not support changing the underlying std::streambuf
63 in mid-input. Maintains underlying std::istream and std::streambuf to support
64 subsequent use of standard std::istream operations to process any input
65 characters following those used in parsing the JSON input. Clears the
66 std::istream flags; any input errors (e.g., EOF) will be detected by the first
67 subsequent call for input from the std::istream.
68 */
69 class input_stream_adapter
70 {
71 public:
72 using char_type = char;
73
~input_stream_adapter()74 ~input_stream_adapter()
75 {
76 // clear stream flags; we use underlying streambuf I/O, do not
77 // maintain ifstream flags, except eof
78 if (is != nullptr)
79 {
80 is->clear(is->rdstate() & std::ios::eofbit);
81 }
82 }
83
input_stream_adapter(std::istream & i)84 explicit input_stream_adapter(std::istream& i)
85 : is(&i), sb(i.rdbuf())
86 {}
87
88 // delete because of pointer members
89 input_stream_adapter(const input_stream_adapter&) = delete;
90 input_stream_adapter& operator=(input_stream_adapter&) = delete;
91 input_stream_adapter& operator=(input_stream_adapter&& rhs) = delete;
92
input_stream_adapter(input_stream_adapter && rhs)93 input_stream_adapter(input_stream_adapter&& rhs) noexcept : is(rhs.is), sb(rhs.sb)
94 {
95 rhs.is = nullptr;
96 rhs.sb = nullptr;
97 }
98
99 // std::istream/std::streambuf use std::char_traits<char>::to_int_type, to
100 // ensure that std::char_traits<char>::eof() and the character 0xFF do not
101 // end up as the same value, eg. 0xFFFFFFFF.
get_character()102 std::char_traits<char>::int_type get_character()
103 {
104 auto res = sb->sbumpc();
105 // set eof manually, as we don't use the istream interface.
106 if (JSON_HEDLEY_UNLIKELY(res == EOF))
107 {
108 is->clear(is->rdstate() | std::ios::eofbit);
109 }
110 return res;
111 }
112
113 private:
114 /// the associated input stream
115 std::istream* is = nullptr;
116 std::streambuf* sb = nullptr;
117 };
118
119 // General-purpose iterator-based adapter. It might not be as fast as
120 // theoretically possible for some containers, but it is extremely versatile.
121 template<typename IteratorType>
122 class iterator_input_adapter
123 {
124 public:
125 using char_type = typename std::iterator_traits<IteratorType>::value_type;
126
iterator_input_adapter(IteratorType first,IteratorType last)127 iterator_input_adapter(IteratorType first, IteratorType last)
128 : current(std::move(first)), end(std::move(last)) {}
129
get_character()130 typename std::char_traits<char_type>::int_type get_character()
131 {
132 if (JSON_HEDLEY_LIKELY(current != end))
133 {
134 auto result = std::char_traits<char_type>::to_int_type(*current);
135 std::advance(current, 1);
136 return result;
137 }
138 else
139 {
140 return std::char_traits<char_type>::eof();
141 }
142 }
143
144 private:
145 IteratorType current;
146 IteratorType end;
147
148 template<typename BaseInputAdapter, size_t T>
149 friend struct wide_string_input_helper;
150
empty() const151 bool empty() const
152 {
153 return current == end;
154 }
155
156 };
157
158
159 template<typename BaseInputAdapter, size_t T>
160 struct wide_string_input_helper;
161
162 template<typename BaseInputAdapter>
163 struct wide_string_input_helper<BaseInputAdapter, 4>
164 {
165 // UTF-32
fill_buffernlohmann::detail::wide_string_input_helper166 static void fill_buffer(BaseInputAdapter& input,
167 std::array<std::char_traits<char>::int_type, 4>& utf8_bytes,
168 size_t& utf8_bytes_index,
169 size_t& utf8_bytes_filled)
170 {
171 utf8_bytes_index = 0;
172
173 if (JSON_HEDLEY_UNLIKELY(input.empty()))
174 {
175 utf8_bytes[0] = std::char_traits<char>::eof();
176 utf8_bytes_filled = 1;
177 }
178 else
179 {
180 // get the current character
181 const auto wc = input.get_character();
182
183 // UTF-32 to UTF-8 encoding
184 if (wc < 0x80)
185 {
186 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
187 utf8_bytes_filled = 1;
188 }
189 else if (wc <= 0x7FF)
190 {
191 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((static_cast<unsigned int>(wc) >> 6u) & 0x1Fu));
192 utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
193 utf8_bytes_filled = 2;
194 }
195 else if (wc <= 0xFFFF)
196 {
197 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((static_cast<unsigned int>(wc) >> 12u) & 0x0Fu));
198 utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu));
199 utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
200 utf8_bytes_filled = 3;
201 }
202 else if (wc <= 0x10FFFF)
203 {
204 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | ((static_cast<unsigned int>(wc) >> 18u) & 0x07u));
205 utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 12u) & 0x3Fu));
206 utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu));
207 utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
208 utf8_bytes_filled = 4;
209 }
210 else
211 {
212 // unknown character
213 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
214 utf8_bytes_filled = 1;
215 }
216 }
217 }
218 };
219
220 template<typename BaseInputAdapter>
221 struct wide_string_input_helper<BaseInputAdapter, 2>
222 {
223 // UTF-16
fill_buffernlohmann::detail::wide_string_input_helper224 static void fill_buffer(BaseInputAdapter& input,
225 std::array<std::char_traits<char>::int_type, 4>& utf8_bytes,
226 size_t& utf8_bytes_index,
227 size_t& utf8_bytes_filled)
228 {
229 utf8_bytes_index = 0;
230
231 if (JSON_HEDLEY_UNLIKELY(input.empty()))
232 {
233 utf8_bytes[0] = std::char_traits<char>::eof();
234 utf8_bytes_filled = 1;
235 }
236 else
237 {
238 // get the current character
239 const auto wc = input.get_character();
240
241 // UTF-16 to UTF-8 encoding
242 if (wc < 0x80)
243 {
244 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
245 utf8_bytes_filled = 1;
246 }
247 else if (wc <= 0x7FF)
248 {
249 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((static_cast<unsigned int>(wc) >> 6u)));
250 utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
251 utf8_bytes_filled = 2;
252 }
253 else if (0xD800 > wc || wc >= 0xE000)
254 {
255 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((static_cast<unsigned int>(wc) >> 12u)));
256 utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((static_cast<unsigned int>(wc) >> 6u) & 0x3Fu));
257 utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (static_cast<unsigned int>(wc) & 0x3Fu));
258 utf8_bytes_filled = 3;
259 }
260 else
261 {
262 if (JSON_HEDLEY_UNLIKELY(!input.empty()))
263 {
264 const auto wc2 = static_cast<unsigned int>(input.get_character());
265 const auto charcode = 0x10000u + (((static_cast<unsigned int>(wc) & 0x3FFu) << 10u) | (wc2 & 0x3FFu));
266 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | (charcode >> 18u));
267 utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 12u) & 0x3Fu));
268 utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 6u) & 0x3Fu));
269 utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (charcode & 0x3Fu));
270 utf8_bytes_filled = 4;
271 }
272 else
273 {
274 utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
275 utf8_bytes_filled = 1;
276 }
277 }
278 }
279 }
280 };
281
282 // Wraps another input apdater to convert wide character types into individual bytes.
283 template<typename BaseInputAdapter, typename WideCharType>
284 class wide_string_input_adapter
285 {
286 public:
287 using char_type = char;
288
wide_string_input_adapter(BaseInputAdapter base)289 wide_string_input_adapter(BaseInputAdapter base)
290 : base_adapter(base) {}
291
get_character()292 typename std::char_traits<char>::int_type get_character() noexcept
293 {
294 // check if buffer needs to be filled
295 if (utf8_bytes_index == utf8_bytes_filled)
296 {
297 fill_buffer<sizeof(WideCharType)>();
298
299 JSON_ASSERT(utf8_bytes_filled > 0);
300 JSON_ASSERT(utf8_bytes_index == 0);
301 }
302
303 // use buffer
304 JSON_ASSERT(utf8_bytes_filled > 0);
305 JSON_ASSERT(utf8_bytes_index < utf8_bytes_filled);
306 return utf8_bytes[utf8_bytes_index++];
307 }
308
309 private:
310 BaseInputAdapter base_adapter;
311
312 template<size_t T>
fill_buffer()313 void fill_buffer()
314 {
315 wide_string_input_helper<BaseInputAdapter, T>::fill_buffer(base_adapter, utf8_bytes, utf8_bytes_index, utf8_bytes_filled);
316 }
317
318 /// a buffer for UTF-8 bytes
319 std::array<std::char_traits<char>::int_type, 4> utf8_bytes = {{0, 0, 0, 0}};
320
321 /// index to the utf8_codes array for the next valid byte
322 std::size_t utf8_bytes_index = 0;
323 /// number of valid bytes in the utf8_codes array
324 std::size_t utf8_bytes_filled = 0;
325 };
326
327
328 template<typename IteratorType, typename Enable = void>
329 struct iterator_input_adapter_factory
330 {
331 using iterator_type = IteratorType;
332 using char_type = typename std::iterator_traits<iterator_type>::value_type;
333 using adapter_type = iterator_input_adapter<iterator_type>;
334
createnlohmann::detail::iterator_input_adapter_factory335 static adapter_type create(IteratorType first, IteratorType last)
336 {
337 return adapter_type(std::move(first), std::move(last));
338 }
339 };
340
341 template<typename T>
342 struct is_iterator_of_multibyte
343 {
344 using value_type = typename std::iterator_traits<T>::value_type;
345 enum
346 {
347 value = sizeof(value_type) > 1
348 };
349 };
350
351 template<typename IteratorType>
352 struct iterator_input_adapter_factory<IteratorType, enable_if_t<is_iterator_of_multibyte<IteratorType>::value>>
353 {
354 using iterator_type = IteratorType;
355 using char_type = typename std::iterator_traits<iterator_type>::value_type;
356 using base_adapter_type = iterator_input_adapter<iterator_type>;
357 using adapter_type = wide_string_input_adapter<base_adapter_type, char_type>;
358
createnlohmann::detail::iterator_input_adapter_factory359 static adapter_type create(IteratorType first, IteratorType last)
360 {
361 return adapter_type(base_adapter_type(std::move(first), std::move(last)));
362 }
363 };
364
365 // General purpose iterator-based input
366 template<typename IteratorType>
input_adapter(IteratorType first,IteratorType last)367 typename iterator_input_adapter_factory<IteratorType>::adapter_type input_adapter(IteratorType first, IteratorType last)
368 {
369 using factory_type = iterator_input_adapter_factory<IteratorType>;
370 return factory_type::create(first, last);
371 }
372
373 // Convenience shorthand from container to iterator
374 template<typename ContainerType>
input_adapter(const ContainerType & container)375 auto input_adapter(const ContainerType& container) -> decltype(input_adapter(begin(container), end(container)))
376 {
377 // Enable ADL
378 using std::begin;
379 using std::end;
380
381 return input_adapter(begin(container), end(container));
382 }
383
384 // Special cases with fast paths
input_adapter(std::FILE * file)385 inline file_input_adapter input_adapter(std::FILE* file)
386 {
387 return file_input_adapter(file);
388 }
389
input_adapter(std::istream & stream)390 inline input_stream_adapter input_adapter(std::istream& stream)
391 {
392 return input_stream_adapter(stream);
393 }
394
input_adapter(std::istream && stream)395 inline input_stream_adapter input_adapter(std::istream&& stream)
396 {
397 return input_stream_adapter(stream);
398 }
399
400 using contiguous_bytes_input_adapter = decltype(input_adapter(std::declval<const char*>(), std::declval<const char*>()));
401
402 // Null-delimited strings, and the like.
403 template < typename CharT,
404 typename std::enable_if <
405 std::is_pointer<CharT>::value&&
406 !std::is_array<CharT>::value&&
407 std::is_integral<typename std::remove_pointer<CharT>::type>::value&&
408 sizeof(typename std::remove_pointer<CharT>::type) == 1,
409 int >::type = 0 >
input_adapter(CharT b)410 contiguous_bytes_input_adapter input_adapter(CharT b)
411 {
412 auto length = std::strlen(reinterpret_cast<const char*>(b));
413 const auto* ptr = reinterpret_cast<const char*>(b);
414 return input_adapter(ptr, ptr + length);
415 }
416
417 template<typename T, std::size_t N>
input_adapter(T (& array)[N])418 auto input_adapter(T (&array)[N]) -> decltype(input_adapter(array, array + N))
419 {
420 return input_adapter(array, array + N);
421 }
422
423 // This class only handles inputs of input_buffer_adapter type.
424 // It's required so that expressions like {ptr, len} can be implicitely casted
425 // to the correct adapter.
426 class span_input_adapter
427 {
428 public:
429 template < typename CharT,
430 typename std::enable_if <
431 std::is_pointer<CharT>::value&&
432 std::is_integral<typename std::remove_pointer<CharT>::type>::value&&
433 sizeof(typename std::remove_pointer<CharT>::type) == 1,
434 int >::type = 0 >
span_input_adapter(CharT b,std::size_t l)435 span_input_adapter(CharT b, std::size_t l)
436 : ia(reinterpret_cast<const char*>(b), reinterpret_cast<const char*>(b) + l) {}
437
438 template<class IteratorType,
439 typename std::enable_if<
440 std::is_same<typename iterator_traits<IteratorType>::iterator_category, std::random_access_iterator_tag>::value,
441 int>::type = 0>
span_input_adapter(IteratorType first,IteratorType last)442 span_input_adapter(IteratorType first, IteratorType last)
443 : ia(input_adapter(first, last)) {}
444
get()445 contiguous_bytes_input_adapter&& get()
446 {
447 return std::move(ia);
448 }
449
450 private:
451 contiguous_bytes_input_adapter ia;
452 };
453 } // namespace detail
454 } // namespace nlohmann
455