1 #include "iwjson.h"
2 #include "iwjson_internal.h"
3 #include "iwconv.h"
4 #include "utf8proc.h"
5
6 #include <errno.h>
7 #include <stdlib.h>
8 #include <locale.h>
9 #include <assert.h>
10
11 #define IS_WHITESPACE(c_) ((unsigned char) (c_) <= (unsigned char) ' ')
12
13 /** JSON parsing context */
14 typedef struct JCTX {
15 IWPOOL *pool;
16 JBL_NODE root;
17 const char *buf;
18 const char *sp;
19 iwrc rc;
20 } JCTX;
21
_jbn_add_item(JBL_NODE parent,JBL_NODE node)22 static void _jbn_add_item(JBL_NODE parent, JBL_NODE node) {
23 assert(parent && node);
24 node->next = 0;
25 node->parent = parent;
26 if (parent->child) {
27 JBL_NODE prev = parent->child->prev;
28 parent->child->prev = node;
29 if (prev) { // -V1051
30 prev->next = node;
31 node->prev = prev;
32 } else {
33 parent->child->next = node;
34 node->prev = parent->child;
35 }
36 } else {
37 parent->child = node;
38 }
39 if (parent->type == JBV_ARRAY) {
40 if (node->prev) {
41 node->klidx = node->prev->klidx + 1;
42 } else {
43 node->klidx = 0;
44 }
45 }
46 }
47
_jbl_json_create_node(jbl_type_t type,const char * key,int klidx,JBL_NODE parent,JCTX * ctx)48 static JBL_NODE _jbl_json_create_node(jbl_type_t type, const char *key, int klidx, JBL_NODE parent, JCTX *ctx) {
49 JBL_NODE node = iwpool_calloc(sizeof(*node), ctx->pool);
50 if (!node) {
51 ctx->rc = iwrc_set_errno(IW_ERROR_ALLOC, errno);
52 return 0;
53 }
54 node->type = type;
55 node->key = key;
56 node->klidx = klidx;
57 if (parent) {
58 _jbn_add_item(parent, node);
59 }
60 if (!ctx->root) {
61 ctx->root = node;
62 }
63 return node;
64 }
65
_jbl_skip_bom(JCTX * ctx)66 IW_INLINE void _jbl_skip_bom(JCTX *ctx) {
67 const char *p = ctx->buf;
68 if ((p[0] == '\xEF') && (p[1] == '\xBB') && (p[2] == '\xBF')) {
69 ctx->buf += 3;
70 }
71 }
72
_jbl_hex(char c)73 IW_INLINE int _jbl_hex(char c) {
74 if ((c >= '0') && (c <= '9')) {
75 return c - '0';
76 }
77 if ((c >= 'a') && (c <= 'f')) {
78 return c - 'a' + 10;
79 }
80 if ((c >= 'A') && (c <= 'F')) {
81 return c - 'A' + 10;
82 }
83 return -1;
84 }
85
_jbl_unescape_json_string(const char * p,char * d,int dlen,const char ** end,iwrc * rcp)86 static int _jbl_unescape_json_string(const char *p, char *d, int dlen, const char **end, iwrc *rcp) {
87 *rcp = 0;
88 char c;
89 char *ds = d;
90 char *de = d + dlen;
91
92 while ((c = *p++)) {
93 if (c == '"') { // string closing quotes
94 if (end) {
95 *end = p;
96 }
97 return (int) (d - ds);
98 } else if (c == '\\') {
99 switch (*p) {
100 case '\\':
101 case '/':
102 case '"':
103 if (d < de) {
104 *d = *p;
105 }
106 ++p, ++d;
107 break;
108 case 'b':
109 if (d < de) {
110 *d = '\b';
111 }
112 ++p, ++d;
113 break;
114 case 'f':
115 if (d < de) {
116 *d = '\f';
117 }
118 ++p, ++d;
119 break;
120 case 'n':
121 case 'r':
122 if (d < de) {
123 *d = '\n';
124 }
125 ++p, ++d;
126 break;
127 case 't':
128 if (d < de) {
129 *d = '\t';
130 }
131 ++p, ++d;
132 break;
133 case 'u': {
134 uint32_t cp, cp2;
135 int h1, h2, h3, h4;
136 if ( ((h1 = _jbl_hex(p[1])) < 0) || ((h2 = _jbl_hex(p[2])) < 0)
137 || ((h3 = _jbl_hex(p[3])) < 0) || ((h4 = _jbl_hex(p[4])) < 0)) {
138 *rcp = JBL_ERROR_PARSE_INVALID_CODEPOINT;
139 return 0;
140 }
141 cp = h1 << 12 | h2 << 8 | h3 << 4 | h4;
142 if ((cp & 0xfc00) == 0xd800) {
143 p += 6;
144 if ( (p[-1] != '\\') || (*p != 'u')
145 || ((h1 = _jbl_hex(p[1])) < 0) || ((h2 = _jbl_hex(p[2])) < 0)
146 || ((h3 = _jbl_hex(p[3])) < 0) || ((h4 = _jbl_hex(p[4])) < 0)) {
147 *rcp = JBL_ERROR_PARSE_INVALID_CODEPOINT;
148 return 0;
149 }
150 cp2 = h1 << 12 | h2 << 8 | h3 << 4 | h4;
151 if ((cp2 & 0xfc00) != 0xdc00) {
152 *rcp = JBL_ERROR_PARSE_INVALID_CODEPOINT;
153 return 0;
154 }
155 cp = 0x10000 + ((cp - 0xd800) << 10) + (cp2 - 0xdc00);
156 }
157 if (!utf8proc_codepoint_valid(cp)) {
158 *rcp = JBL_ERROR_PARSE_INVALID_CODEPOINT;
159 return 0;
160 }
161 uint8_t uchars[4];
162 utf8proc_ssize_t ulen = utf8proc_encode_char(cp, uchars);
163 assert(ulen <= sizeof(uchars));
164 for (int i = 0; i < ulen; ++i) {
165 if (d < de) {
166 *d = uchars[i];
167 }
168 ++d;
169 }
170 p += 5;
171 break;
172 }
173 default:
174 if (d < de) {
175 *d = c;
176 }
177 ++d;
178 }
179 } else {
180 if (d < de) {
181 *d = c;
182 }
183 ++d;
184 }
185 }
186 *rcp = JBL_ERROR_PARSE_UNQUOTED_STRING;
187 return 0;
188 }
189
_jbl_parse_key(const char ** key,const char * p,JCTX * ctx)190 static const char* _jbl_parse_key(const char **key, const char *p, JCTX *ctx) {
191 char c;
192 *key = "";
193 while ((c = *p++)) {
194 if (c == '"') {
195 int len = _jbl_unescape_json_string(p, 0, 0, 0, &ctx->rc);
196 if (ctx->rc) {
197 return 0;
198 }
199 if (len) {
200 char *kptr = iwpool_alloc(len + 1, ctx->pool);
201 if (!kptr) {
202 ctx->rc = iwrc_set_errno(IW_ERROR_ALLOC, errno);
203 return 0;
204 }
205 if ((len != _jbl_unescape_json_string(p, kptr, len, &p, &ctx->rc)) || ctx->rc) {
206 if (!ctx->rc) {
207 ctx->rc = JBL_ERROR_PARSE_JSON;
208 }
209 return 0;
210 }
211 kptr[len] = '\0';
212 *key = kptr;
213 }
214 while (*p && IS_WHITESPACE(*p)) p++;
215 if (*p == ':') {
216 return p + 1;
217 }
218 ctx->rc = JBL_ERROR_PARSE_JSON;
219 return 0;
220 } else if (c == '}') {
221 return p - 1;
222 } else if (IS_WHITESPACE(c) || (c == ',')) {
223 continue;
224 } else {
225 ctx->rc = JBL_ERROR_PARSE_JSON;
226 return 0;
227 }
228 }
229 ctx->rc = JBL_ERROR_PARSE_JSON;
230 return 0;
231 }
232
_jbl_parse_value(int lvl,JBL_NODE parent,const char * key,int klidx,const char * p,JCTX * ctx)233 static const char* _jbl_parse_value(
234 int lvl,
235 JBL_NODE parent,
236 const char *key, int klidx,
237 const char *p,
238 JCTX *ctx
239 ) {
240 if (lvl > JBL_MAX_NESTING_LEVEL) {
241 ctx->rc = JBL_ERROR_MAX_NESTING_LEVEL_EXCEEDED;
242 return 0;
243 }
244
245 JBL_NODE node;
246 while (1) {
247 switch (*p) {
248 case '\0':
249 ctx->rc = JBL_ERROR_PARSE_JSON;
250 return 0;
251 case ' ':
252 case '\t':
253 case '\n':
254 case '\r':
255 case ',':
256 ++p;
257 break;
258 case 'n':
259 if (!strncmp(p, "null", 4)) {
260 _jbl_json_create_node(JBV_NULL, key, klidx, parent, ctx);
261 if (ctx->rc) {
262 return 0;
263 }
264 return p + 4;
265 }
266 ctx->rc = JBL_ERROR_PARSE_JSON;
267 return 0;
268 case 't':
269 if (!strncmp(p, "true", 4)) {
270 node = _jbl_json_create_node(JBV_BOOL, key, klidx, parent, ctx);
271 if (ctx->rc) {
272 return 0;
273 }
274 node->vbool = true; // -V522
275 return p + 4;
276 }
277 ctx->rc = JBL_ERROR_PARSE_JSON;
278 return 0;
279 case 'f':
280 if (!strncmp(p, "false", 5)) {
281 node = _jbl_json_create_node(JBV_BOOL, key, klidx, parent, ctx);
282 if (ctx->rc) {
283 return 0;
284 }
285 node->vbool = false;
286 return p + 5;
287 }
288 ctx->rc = JBL_ERROR_PARSE_JSON;
289 return 0;
290 case '"':
291 ++p;
292 const char *end;
293 int len = _jbl_unescape_json_string(p, 0, 0, &end, &ctx->rc);
294 if (ctx->rc) {
295 return 0;
296 }
297 node = _jbl_json_create_node(JBV_STR, key, klidx, parent, ctx);
298 if (ctx->rc) {
299 return 0;
300 }
301 if (len) {
302 char *vptr = iwpool_alloc(len + 1, ctx->pool);
303 if (!vptr) {
304 ctx->rc = iwrc_set_errno(IW_ERROR_ALLOC, errno);
305 return 0;
306 }
307 if ((len != _jbl_unescape_json_string(p, vptr, len, &p, &ctx->rc)) || ctx->rc) {
308 if (!ctx->rc) {
309 ctx->rc = JBL_ERROR_PARSE_JSON;
310 }
311 return 0;
312 }
313 vptr[len] = '\0';
314 node->vptr = vptr;
315 node->vsize = len;
316 } else {
317 p = end;
318 node->vptr = "";
319 node->vsize = 0;
320 }
321 return p;
322 case '{':
323 node = _jbl_json_create_node(JBV_OBJECT, key, klidx, parent, ctx);
324 if (ctx->rc) {
325 return 0;
326 }
327 ++p;
328 while (1) {
329 const char *nkey;
330 p = _jbl_parse_key(&nkey, p, ctx);
331 if (ctx->rc) {
332 return 0;
333 }
334 if (*p == '}') {
335 return p + 1; // -V522
336 }
337 p = _jbl_parse_value(lvl + 1, node, nkey, (int) strlen(nkey), p, ctx);
338 if (ctx->rc) {
339 return 0;
340 }
341 }
342 break;
343 case '[':
344 node = _jbl_json_create_node(JBV_ARRAY, key, klidx, parent, ctx);
345 if (ctx->rc) {
346 return 0;
347 }
348 ++p;
349 for (int i = 0; ; ++i) {
350 p = _jbl_parse_value(lvl + 1, node, 0, i, p, ctx);
351 if (ctx->rc) {
352 return 0;
353 }
354 if (*p == ']') {
355 return p + 1;
356 }
357 }
358 break;
359 case ']':
360 return p;
361 break;
362 case '-':
363 case '0':
364 case '1':
365 case '2':
366 case '3':
367 case '4':
368 case '5':
369 case '6':
370 case '7':
371 case '8':
372 case '9': {
373 node = _jbl_json_create_node(JBV_I64, key, klidx, parent, ctx);
374 if (ctx->rc) {
375 return 0;
376 }
377 char *pe;
378 node->vi64 = strtoll(p, &pe, 0);
379 if ((pe == p) || (errno == ERANGE)) {
380 ctx->rc = JBL_ERROR_PARSE_JSON;
381 return 0;
382 }
383 if ((*pe == '.') || (*pe == 'e') || (*pe == 'E')) {
384 node->type = JBV_F64;
385 node->vf64 = iwstrtod(p, &pe);
386 if ((pe == p) || (errno == ERANGE)) {
387 ctx->rc = JBL_ERROR_PARSE_JSON;
388 return 0;
389 }
390 }
391 return pe;
392 }
393 default:
394 ctx->rc = JBL_ERROR_PARSE_JSON;
395 return 0;
396 }
397 }
398 return p;
399 }
400
_jbl_node_as_json(JBL_NODE node,jbl_json_printer pt,void * op,int lvl,jbl_print_flags_t pf)401 static iwrc _jbl_node_as_json(JBL_NODE node, jbl_json_printer pt, void *op, int lvl, jbl_print_flags_t pf) {
402 iwrc rc = 0;
403 bool pretty = pf & JBL_PRINT_PRETTY;
404
405 #define PT(data_, size_, ch_, count_) do { \
406 rc = pt(data_, size_, ch_, count_, op); \
407 RCRET(rc); \
408 } while (0)
409
410 switch (node->type) {
411 case JBV_ARRAY:
412 PT(0, 0, '[', 1);
413 if (node->child && pretty) {
414 PT(0, 0, '\n', 1);
415 }
416 for (JBL_NODE n = node->child; n; n = n->next) {
417 if (pretty) {
418 PT(0, 0, ' ', lvl + 1);
419 }
420 rc = _jbl_node_as_json(n, pt, op, lvl + 1, pf);
421 RCRET(rc);
422 if (n->next) {
423 PT(0, 0, ',', 1);
424 }
425 if (pretty) {
426 PT(0, 0, '\n', 1);
427 }
428 }
429 if (node->child && pretty) {
430 PT(0, 0, ' ', lvl);
431 }
432 PT(0, 0, ']', 1);
433 break;
434 case JBV_OBJECT:
435 PT(0, 0, '{', 1);
436 if (node->child && pretty) {
437 PT(0, 0, '\n', 1);
438 }
439 for (JBL_NODE n = node->child; n; n = n->next) {
440 if (pretty) {
441 PT(0, 0, ' ', lvl + 1);
442 }
443 rc = _jbl_write_string(n->key, n->klidx, pt, op, pf);
444 RCRET(rc);
445 if (pretty) {
446 PT(": ", -1, 0, 0);
447 } else {
448 PT(0, 0, ':', 1);
449 }
450 rc = _jbl_node_as_json(n, pt, op, lvl + 1, pf);
451 RCRET(rc);
452 if (n->next) {
453 PT(0, 0, ',', 1);
454 }
455 if (pretty) {
456 PT(0, 0, '\n', 1);
457 }
458 }
459 if (node->child && pretty) {
460 PT(0, 0, ' ', lvl);
461 }
462 PT(0, 0, '}', 1);
463 break;
464 case JBV_STR:
465 rc = _jbl_write_string(node->vptr, node->vsize, pt, op, pf);
466 break;
467 case JBV_I64:
468 rc = _jbl_write_int(node->vi64, pt, op);
469 break;
470 case JBV_F64:
471 rc = _jbl_write_double(node->vf64, pt, op);
472 break;
473 case JBV_BOOL:
474 if (node->vbool) {
475 PT("true", 4, 0, 1);
476 } else {
477 PT("false", 5, 0, 1);
478 }
479 break;
480 case JBV_NULL:
481 PT("null", 4, 0, 1);
482 break;
483 default:
484 iwlog_ecode_error3(IW_ERROR_ASSERTION);
485 return IW_ERROR_ASSERTION;
486 }
487 #undef PT
488 return rc;
489 }
490
_jbl_clone_node_struct(JBL_NODE src,IWPOOL * pool)491 static JBL_NODE _jbl_clone_node_struct(JBL_NODE src, IWPOOL *pool) {
492 iwrc rc;
493 JBL_NODE n = iwpool_calloc(sizeof(*n), pool);
494 if (!n) {
495 return 0;
496 }
497 n->vsize = src->vsize;
498 n->type = src->type;
499 n->klidx = src->klidx;
500 n->flags = src->flags;
501
502 if (src->key) {
503 n->key = iwpool_strndup(pool, src->key, src->klidx, &rc);
504 if (!n->key) {
505 return 0;
506 }
507 }
508 switch (src->type) {
509 case JBV_STR: {
510 n->vptr = iwpool_strndup(pool, src->vptr, src->vsize, &rc);
511 if (!n->vptr) {
512 return 0;
513 }
514 break;
515 }
516 case JBV_I64:
517 n->vi64 = src->vi64;
518 break;
519 case JBV_BOOL:
520 n->vbool = src->vbool;
521 break;
522 case JBV_F64:
523 n->vf64 = src->vf64;
524 break;
525 default:
526 break;
527 }
528 ;
529 return n;
530 }
531
_jbl_clone_node_visit(int lvl,JBL_NODE n,const char * key,int klidx,JBN_VCTX * vctx,iwrc * rc)532 static jbn_visitor_cmd_t _jbl_clone_node_visit(
533 int lvl, JBL_NODE n, const char *key, int klidx, JBN_VCTX *vctx,
534 iwrc *rc
535 ) {
536 if (lvl < 0) {
537 return JBL_VCMD_OK;
538 }
539 JBL_NODE parent = vctx->root;
540 if (lvl < vctx->pos) { // Pop
541 for ( ; lvl < vctx->pos; --vctx->pos) {
542 parent = parent->parent;
543 assert(parent);
544 }
545 vctx->root = parent;
546 assert(vctx->root);
547 } else if (lvl > vctx->pos) { // Push
548 vctx->pos = lvl;
549 parent = vctx->op;
550 vctx->root = parent;
551 assert(parent);
552 }
553 JBL_NODE nn = _jbl_clone_node_struct(n, vctx->pool);
554 if (!nn) {
555 *rc = iwrc_set_errno(IW_ERROR_ALLOC, errno);
556 return JBL_VCMD_TERMINATE;
557 }
558 _jbn_add_item(parent, nn);
559 if (nn->type >= JBV_OBJECT) {
560 vctx->op = nn; // Remeber the last container object
561 }
562 return JBL_VCMD_OK;
563 }
564
jbn_clone(JBL_NODE src,JBL_NODE * targetp,IWPOOL * pool)565 iwrc jbn_clone(JBL_NODE src, JBL_NODE *targetp, IWPOOL *pool) {
566 *targetp = 0;
567 JBL_NODE n = _jbl_clone_node_struct(src, pool);
568 if (!n) {
569 return iwrc_set_errno(IW_ERROR_ALLOC, errno);
570 }
571 JBN_VCTX vctx = {
572 .pool = pool,
573 .root = n,
574 .op = n
575 };
576 iwrc rc = jbn_visit(src, 0, &vctx, _jbl_clone_node_visit);
577 RCRET(rc);
578 *targetp = n;
579 return 0;
580 }
581
jbn_as_json(JBL_NODE node,jbl_json_printer pt,void * op,jbl_print_flags_t pf)582 iwrc jbn_as_json(JBL_NODE node, jbl_json_printer pt, void *op, jbl_print_flags_t pf) {
583 return _jbl_node_as_json(node, pt, op, 0, pf);
584 }
585
jbn_from_json(const char * json,JBL_NODE * node,IWPOOL * pool)586 iwrc jbn_from_json(const char *json, JBL_NODE *node, IWPOOL *pool) {
587 *node = 0;
588 JCTX ctx = {
589 .pool = pool,
590 .buf = json
591 };
592 _jbl_skip_bom(&ctx);
593 _jbl_parse_value(0, 0, 0, 0, ctx.buf, &ctx);
594 *node = ctx.root;
595 return ctx.rc;
596 }
597
598