1 /*
2 * JSON Parser
3 *
4 * Copyright IBM, Corp. 2009
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10 * See the COPYING.LIB file in the top-level directory.
11 *
12 */
13
14 #include <stdarg.h>
15
16 #include "qemu-common.h"
17 #include "qstring.h"
18 #include "qint.h"
19 #include "qdict.h"
20 #include "qlist.h"
21 #include "qfloat.h"
22 #include "qbool.h"
23 #include "json-parser.h"
24 #include "json-lexer.h"
25
26 typedef struct JSONParserContext
27 {
28 } JSONParserContext;
29
30 #define BUG_ON(cond) assert(!(cond))
31
32 /**
33 * TODO
34 *
35 * 0) make errors meaningful again
36 * 1) add geometry information to tokens
37 * 3) should we return a parsed size?
38 * 4) deal with premature EOI
39 */
40
41 static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap);
42
43 /**
44 * Token manipulators
45 *
46 * tokens are dictionaries that contain a type, a string value, and geometry information
47 * about a token identified by the lexer. These are routines that make working with
48 * these objects a bit easier.
49 */
token_get_value(QObject * obj)50 static const char *token_get_value(QObject *obj)
51 {
52 return qdict_get_str(qobject_to_qdict(obj), "token");
53 }
54
token_get_type(QObject * obj)55 static JSONTokenType token_get_type(QObject *obj)
56 {
57 return qdict_get_int(qobject_to_qdict(obj), "type");
58 }
59
token_is_operator(QObject * obj,char op)60 static int token_is_operator(QObject *obj, char op)
61 {
62 const char *val;
63
64 if (token_get_type(obj) != JSON_OPERATOR) {
65 return 0;
66 }
67
68 val = token_get_value(obj);
69
70 return (val[0] == op) && (val[1] == 0);
71 }
72
token_is_keyword(QObject * obj,const char * value)73 static int token_is_keyword(QObject *obj, const char *value)
74 {
75 if (token_get_type(obj) != JSON_KEYWORD) {
76 return 0;
77 }
78
79 return strcmp(token_get_value(obj), value) == 0;
80 }
81
token_is_escape(QObject * obj,const char * value)82 static int token_is_escape(QObject *obj, const char *value)
83 {
84 if (token_get_type(obj) != JSON_ESCAPE) {
85 return 0;
86 }
87
88 return (strcmp(token_get_value(obj), value) == 0);
89 }
90
91 /**
92 * Error handler
93 */
parse_error(JSONParserContext * ctxt,QObject * token,const char * msg,...)94 static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
95 QObject *token, const char *msg, ...)
96 {
97 va_list ap;
98 va_start(ap, msg);
99 fprintf(stderr, "parse error: ");
100 vfprintf(stderr, msg, ap);
101 fprintf(stderr, "\n");
102 va_end(ap);
103 }
104
105 /**
106 * String helpers
107 *
108 * These helpers are used to unescape strings.
109 */
wchar_to_utf8(uint16_t wchar,char * buffer,size_t buffer_length)110 static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
111 {
112 if (wchar <= 0x007F) {
113 BUG_ON(buffer_length < 2);
114
115 buffer[0] = wchar & 0x7F;
116 buffer[1] = 0;
117 } else if (wchar <= 0x07FF) {
118 BUG_ON(buffer_length < 3);
119
120 buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
121 buffer[1] = 0x80 | (wchar & 0x3F);
122 buffer[2] = 0;
123 } else {
124 BUG_ON(buffer_length < 4);
125
126 buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
127 buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
128 buffer[2] = 0x80 | (wchar & 0x3F);
129 buffer[3] = 0;
130 }
131 }
132
hex2decimal(char ch)133 static int hex2decimal(char ch)
134 {
135 if (ch >= '0' && ch <= '9') {
136 return (ch - '0');
137 } else if (ch >= 'a' && ch <= 'f') {
138 return 10 + (ch - 'a');
139 } else if (ch >= 'A' && ch <= 'F') {
140 return 10 + (ch - 'A');
141 }
142
143 return -1;
144 }
145
146 /**
147 * parse_string(): Parse a json string and return a QObject
148 *
149 * string
150 * ""
151 * " chars "
152 * chars
153 * char
154 * char chars
155 * char
156 * any-Unicode-character-
157 * except-"-or-\-or-
158 * control-character
159 * \"
160 * \\
161 * \/
162 * \b
163 * \f
164 * \n
165 * \r
166 * \t
167 * \u four-hex-digits
168 */
qstring_from_escaped_str(JSONParserContext * ctxt,QObject * token)169 static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token)
170 {
171 const char *ptr = token_get_value(token);
172 QString *str;
173 int double_quote = 1;
174
175 if (*ptr == '"') {
176 double_quote = 1;
177 } else {
178 double_quote = 0;
179 }
180 ptr++;
181
182 str = qstring_new();
183 while (*ptr &&
184 ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
185 if (*ptr == '\\') {
186 ptr++;
187
188 switch (*ptr) {
189 case '"':
190 qstring_append(str, "\"");
191 ptr++;
192 break;
193 case '\'':
194 qstring_append(str, "'");
195 ptr++;
196 break;
197 case '\\':
198 qstring_append(str, "\\");
199 ptr++;
200 break;
201 case '/':
202 qstring_append(str, "/");
203 ptr++;
204 break;
205 case 'b':
206 qstring_append(str, "\b");
207 ptr++;
208 break;
209 case 'f':
210 qstring_append(str, "\f");
211 ptr++;
212 break;
213 case 'n':
214 qstring_append(str, "\n");
215 ptr++;
216 break;
217 case 'r':
218 qstring_append(str, "\r");
219 ptr++;
220 break;
221 case 't':
222 qstring_append(str, "\t");
223 ptr++;
224 break;
225 case 'u': {
226 uint16_t unicode_char = 0;
227 char utf8_char[4];
228 int i = 0;
229
230 ptr++;
231
232 for (i = 0; i < 4; i++) {
233 if (qemu_isxdigit(*ptr)) {
234 unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
235 } else {
236 parse_error(ctxt, token,
237 "invalid hex escape sequence in string");
238 goto out;
239 }
240 ptr++;
241 }
242
243 wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
244 qstring_append(str, utf8_char);
245 } break;
246 default:
247 parse_error(ctxt, token, "invalid escape sequence in string");
248 goto out;
249 }
250 } else {
251 char dummy[2];
252
253 dummy[0] = *ptr++;
254 dummy[1] = 0;
255
256 qstring_append(str, dummy);
257 }
258 }
259
260 return str;
261
262 out:
263 QDECREF(str);
264 return NULL;
265 }
266
267 /**
268 * Parsing rules
269 */
parse_pair(JSONParserContext * ctxt,QDict * dict,QList ** tokens,va_list * ap)270 static int parse_pair(JSONParserContext *ctxt, QDict *dict, QList **tokens, va_list *ap)
271 {
272 QObject *key, *token = NULL, *value, *peek;
273 QList *working = qlist_copy(*tokens);
274
275 peek = qlist_peek(working);
276 key = parse_value(ctxt, &working, ap);
277 if (!key || qobject_type(key) != QTYPE_QSTRING) {
278 parse_error(ctxt, peek, "key is not a string in object");
279 goto out;
280 }
281
282 token = qlist_pop(working);
283 if (!token_is_operator(token, ':')) {
284 parse_error(ctxt, token, "missing : in object pair");
285 goto out;
286 }
287
288 value = parse_value(ctxt, &working, ap);
289 if (value == NULL) {
290 parse_error(ctxt, token, "Missing value in dict");
291 goto out;
292 }
293
294 qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
295
296 qobject_decref(token);
297 qobject_decref(key);
298 QDECREF(*tokens);
299 *tokens = working;
300
301 return 0;
302
303 out:
304 qobject_decref(token);
305 qobject_decref(key);
306 QDECREF(working);
307
308 return -1;
309 }
310
parse_object(JSONParserContext * ctxt,QList ** tokens,va_list * ap)311 static QObject *parse_object(JSONParserContext *ctxt, QList **tokens, va_list *ap)
312 {
313 QDict *dict = NULL;
314 QObject *token, *peek;
315 QList *working = qlist_copy(*tokens);
316
317 token = qlist_pop(working);
318 if (!token_is_operator(token, '{')) {
319 goto out;
320 }
321 qobject_decref(token);
322 token = NULL;
323
324 dict = qdict_new();
325
326 peek = qlist_peek(working);
327 if (!token_is_operator(peek, '}')) {
328 if (parse_pair(ctxt, dict, &working, ap) == -1) {
329 goto out;
330 }
331
332 token = qlist_pop(working);
333 while (!token_is_operator(token, '}')) {
334 if (!token_is_operator(token, ',')) {
335 parse_error(ctxt, token, "expected separator in dict");
336 goto out;
337 }
338 qobject_decref(token);
339 token = NULL;
340
341 if (parse_pair(ctxt, dict, &working, ap) == -1) {
342 goto out;
343 }
344
345 token = qlist_pop(working);
346 }
347 qobject_decref(token);
348 token = NULL;
349 } else {
350 token = qlist_pop(working);
351 qobject_decref(token);
352 token = NULL;
353 }
354
355 QDECREF(*tokens);
356 *tokens = working;
357
358 return QOBJECT(dict);
359
360 out:
361 qobject_decref(token);
362 QDECREF(working);
363 QDECREF(dict);
364 return NULL;
365 }
366
parse_array(JSONParserContext * ctxt,QList ** tokens,va_list * ap)367 static QObject *parse_array(JSONParserContext *ctxt, QList **tokens, va_list *ap)
368 {
369 QList *list = NULL;
370 QObject *token, *peek;
371 QList *working = qlist_copy(*tokens);
372
373 token = qlist_pop(working);
374 if (!token_is_operator(token, '[')) {
375 goto out;
376 }
377 qobject_decref(token);
378 token = NULL;
379
380 list = qlist_new();
381
382 peek = qlist_peek(working);
383 if (!token_is_operator(peek, ']')) {
384 QObject *obj;
385
386 obj = parse_value(ctxt, &working, ap);
387 if (obj == NULL) {
388 parse_error(ctxt, token, "expecting value");
389 goto out;
390 }
391
392 qlist_append_obj(list, obj);
393
394 token = qlist_pop(working);
395 while (!token_is_operator(token, ']')) {
396 if (!token_is_operator(token, ',')) {
397 parse_error(ctxt, token, "expected separator in list");
398 goto out;
399 }
400
401 qobject_decref(token);
402 token = NULL;
403
404 obj = parse_value(ctxt, &working, ap);
405 if (obj == NULL) {
406 parse_error(ctxt, token, "expecting value");
407 goto out;
408 }
409
410 qlist_append_obj(list, obj);
411
412 token = qlist_pop(working);
413 }
414
415 qobject_decref(token);
416 token = NULL;
417 } else {
418 token = qlist_pop(working);
419 qobject_decref(token);
420 token = NULL;
421 }
422
423 QDECREF(*tokens);
424 *tokens = working;
425
426 return QOBJECT(list);
427
428 out:
429 qobject_decref(token);
430 QDECREF(working);
431 QDECREF(list);
432 return NULL;
433 }
434
parse_keyword(JSONParserContext * ctxt,QList ** tokens)435 static QObject *parse_keyword(JSONParserContext *ctxt, QList **tokens)
436 {
437 QObject *token, *ret;
438 QList *working = qlist_copy(*tokens);
439
440 token = qlist_pop(working);
441
442 if (token_get_type(token) != JSON_KEYWORD) {
443 goto out;
444 }
445
446 if (token_is_keyword(token, "true")) {
447 ret = QOBJECT(qbool_from_int(true));
448 } else if (token_is_keyword(token, "false")) {
449 ret = QOBJECT(qbool_from_int(false));
450 } else {
451 parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token));
452 goto out;
453 }
454
455 qobject_decref(token);
456 QDECREF(*tokens);
457 *tokens = working;
458
459 return ret;
460
461 out:
462 qobject_decref(token);
463 QDECREF(working);
464
465 return NULL;
466 }
467
parse_escape(JSONParserContext * ctxt,QList ** tokens,va_list * ap)468 static QObject *parse_escape(JSONParserContext *ctxt, QList **tokens, va_list *ap)
469 {
470 QObject *token = NULL, *obj;
471 QList *working = qlist_copy(*tokens);
472
473 if (ap == NULL) {
474 goto out;
475 }
476
477 token = qlist_pop(working);
478
479 if (token_is_escape(token, "%p")) {
480 obj = va_arg(*ap, QObject *);
481 } else if (token_is_escape(token, "%i")) {
482 obj = QOBJECT(qbool_from_int(va_arg(*ap, int)));
483 } else if (token_is_escape(token, "%d")) {
484 obj = QOBJECT(qint_from_int(va_arg(*ap, int)));
485 } else if (token_is_escape(token, "%ld")) {
486 obj = QOBJECT(qint_from_int(va_arg(*ap, long)));
487 } else if (token_is_escape(token, "%lld") ||
488 token_is_escape(token, "%I64d")) {
489 obj = QOBJECT(qint_from_int(va_arg(*ap, long long)));
490 } else if (token_is_escape(token, "%s")) {
491 obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
492 } else if (token_is_escape(token, "%f")) {
493 obj = QOBJECT(qfloat_from_double(va_arg(*ap, double)));
494 } else {
495 goto out;
496 }
497
498 qobject_decref(token);
499 QDECREF(*tokens);
500 *tokens = working;
501
502 return obj;
503
504 out:
505 qobject_decref(token);
506 QDECREF(working);
507
508 return NULL;
509 }
510
parse_literal(JSONParserContext * ctxt,QList ** tokens)511 static QObject *parse_literal(JSONParserContext *ctxt, QList **tokens)
512 {
513 QObject *token, *obj;
514 QList *working = qlist_copy(*tokens);
515
516 token = qlist_pop(working);
517 switch (token_get_type(token)) {
518 case JSON_STRING:
519 obj = QOBJECT(qstring_from_escaped_str(ctxt, token));
520 break;
521 case JSON_INTEGER:
522 obj = QOBJECT(qint_from_int(strtoll(token_get_value(token), NULL, 10)));
523 break;
524 case JSON_FLOAT:
525 /* FIXME dependent on locale */
526 obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL)));
527 break;
528 default:
529 goto out;
530 }
531
532 qobject_decref(token);
533 QDECREF(*tokens);
534 *tokens = working;
535
536 return obj;
537
538 out:
539 qobject_decref(token);
540 QDECREF(working);
541
542 return NULL;
543 }
544
parse_value(JSONParserContext * ctxt,QList ** tokens,va_list * ap)545 static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap)
546 {
547 QObject *obj;
548
549 obj = parse_object(ctxt, tokens, ap);
550 if (obj == NULL) {
551 obj = parse_array(ctxt, tokens, ap);
552 }
553 if (obj == NULL) {
554 obj = parse_escape(ctxt, tokens, ap);
555 }
556 if (obj == NULL) {
557 obj = parse_keyword(ctxt, tokens);
558 }
559 if (obj == NULL) {
560 obj = parse_literal(ctxt, tokens);
561 }
562
563 return obj;
564 }
565
json_parser_parse(QList * tokens,va_list * ap)566 QObject *json_parser_parse(QList *tokens, va_list *ap)
567 {
568 JSONParserContext ctxt = {};
569 QList *working = qlist_copy(tokens);
570 QObject *result;
571
572 result = parse_value(&ctxt, &working, ap);
573
574 QDECREF(working);
575
576 return result;
577 }
578