1 // Copyright 2018 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "tools/cddl/parse.h"
6
7 #include <unistd.h>
8
9 #include <algorithm>
10 #include <iostream>
11 #include <memory>
12 #include <sstream>
13 #include <vector>
14
15 #include "absl/strings/ascii.h"
16 #include "absl/strings/match.h"
17 #include "absl/types/optional.h"
18 #include "tools/cddl/logging.h"
19
20 static_assert(sizeof(absl::string_view::size_type) == sizeof(size_t),
21 "We assume string_view's size_type is the same as size_t. If "
22 "not, the following file needs to be refactored");
23
24 // All of the parsing methods in this file that operate on Parser are named
25 // either Parse* or Skip* and are named according to the CDDL grammar in
26 // grammar.abnf. Similarly, methods like ParseMemberKey1 attempt to parse the
27 // first choice in the memberkey rule.
28 struct Parser {
29 Parser(const Parser&) = delete;
30 Parser& operator=(const Parser&) = delete;
31
32 const char* data;
33 std::vector<std::unique_ptr<AstNode>> nodes;
34 };
35
AddNode(Parser * p,AstNode::Type type,absl::string_view text,AstNode * children=nullptr)36 AstNode* AddNode(Parser* p,
37 AstNode::Type type,
38 absl::string_view text,
39 AstNode* children = nullptr) {
40 p->nodes.emplace_back(new AstNode);
41 AstNode* node = p->nodes.back().get();
42 node->children = children;
43 node->sibling = nullptr;
44 node->type = type;
45 node->text = std::string(text);
46 return node;
47 }
48
IsBinaryDigit(char x)49 bool IsBinaryDigit(char x) {
50 return '0' == x || x == '1';
51 }
52
53 // Determines if the given character matches regex '[a-zA-Z@_$]'.
IsExtendedAlpha(char x)54 bool IsExtendedAlpha(char x) {
55 return absl::ascii_isalpha(x) || x == '@' || x == '_' || x == '$';
56 }
57
IsNewline(char x)58 bool IsNewline(char x) {
59 return x == '\r' || x == '\n';
60 }
61
IsWhitespaceOrSemicolon(char c)62 bool IsWhitespaceOrSemicolon(char c) {
63 return c == ' ' || c == ';' || c == '\r' || c == '\n';
64 }
65
SkipNewline(absl::string_view view)66 absl::string_view SkipNewline(absl::string_view view) {
67 size_t index = 0;
68 while (IsNewline(view[index])) {
69 ++index;
70 }
71
72 return view.substr(index);
73 }
74
75 // Skips over a comment that makes up the remainder of the current line.
SkipComment(absl::string_view view)76 absl::string_view SkipComment(absl::string_view view) {
77 size_t index = 0;
78 if (view[index] == ';') {
79 ++index;
80 while (!IsNewline(view[index]) && index < view.length()) {
81 CHECK(absl::ascii_isprint(view[index]));
82 ++index;
83 }
84
85 while (IsNewline(view[index])) {
86 ++index;
87 }
88 }
89
90 return view.substr(index);
91 }
92
SkipWhitespace(Parser * p,bool skip_comments=true)93 void SkipWhitespace(Parser* p, bool skip_comments = true) {
94 if (!skip_comments) {
95 p->data = absl::StripLeadingAsciiWhitespace(p->data).data();
96 return;
97 }
98
99 absl::string_view view = p->data;
100 absl::string_view new_view;
101
102 while (true) {
103 new_view = SkipComment(view);
104 if (new_view.data() == view.data()) {
105 new_view = absl::StripLeadingAsciiWhitespace(view);
106 }
107
108 if (new_view == view) {
109 break;
110 }
111
112 view = new_view;
113 }
114
115 p->data = new_view.data();
116 }
117
TrySkipNewline(Parser * p)118 bool TrySkipNewline(Parser* p) {
119 auto* new_view = SkipNewline(p->data).data();
120 bool is_changed = p->data == new_view;
121 p->data = new_view;
122 return is_changed;
123 }
124
TrySkipCharacter(Parser * p,char c)125 bool TrySkipCharacter(Parser* p, char c) {
126 if (p->data[0] == c) {
127 p->data++;
128 return true;
129 }
130
131 return false;
132 }
133
134 enum class AssignType {
135 kInvalid = -1,
136 kAssign,
137 kAssignT,
138 kAssignG,
139 };
140
ParseAssignmentType(Parser * p)141 AssignType ParseAssignmentType(Parser* p) {
142 const char* it = p->data;
143 if (it[0] == '=') {
144 p->data = it + 1;
145 return AssignType::kAssign;
146 } else if (it[0] == '/') {
147 ++it;
148 if (it[0] == '/' && it[1] == '=') {
149 p->data = it + 2;
150 return AssignType::kAssignG;
151 } else if (it[0] == '=') {
152 p->data = it + 1;
153 return AssignType::kAssignT;
154 }
155 }
156 return AssignType::kInvalid;
157 }
158
159 AstNode* ParseType1(Parser* p);
160 AstNode* ParseType(Parser* p, bool skip_comments = true);
161 AstNode* ParseId(Parser* p);
162
SkipUint(Parser * p)163 void SkipUint(Parser* p) {
164 absl::string_view view = p->data;
165
166 bool is_binary = false;
167 size_t index = 0;
168 if (absl::StartsWith(view, "0b")) {
169 is_binary = true;
170 index = 2;
171 } else if (absl::StartsWith(view, "0x")) {
172 index = 2;
173 }
174
175 while (index < view.length() && absl::ascii_isdigit(view[index])) {
176 if (is_binary) {
177 CHECK(IsBinaryDigit(view[index]));
178 }
179
180 ++index;
181 }
182
183 p->data = view.substr(index).data();
184 }
185
ParseNumber(Parser * p)186 AstNode* ParseNumber(Parser* p) {
187 Parser p_speculative{p->data};
188 if (!absl::ascii_isdigit(p_speculative.data[0]) &&
189 p_speculative.data[0] != '-') {
190 // TODO(btolsch): Add support for hexfloat, fraction, exponent.
191 return nullptr;
192 }
193 if (p_speculative.data[0] == '-') {
194 ++p_speculative.data;
195 }
196
197 SkipUint(&p_speculative);
198
199 AstNode* node =
200 AddNode(p, AstNode::Type::kNumber,
201 absl::string_view(p->data, p_speculative.data - p->data));
202 p->data = p_speculative.data;
203 std::move(p_speculative.nodes.begin(), p_speculative.nodes.end(),
204 std::back_inserter(p->nodes));
205 return node;
206 }
207
ParseText(Parser * p)208 AstNode* ParseText(Parser* p) {
209 return nullptr;
210 }
211
ParseBytes(Parser * p)212 AstNode* ParseBytes(Parser* p) {
213 return nullptr;
214 }
215
216 // Returns whether |c| could be the first character in a valid "value" string.
217 // This is not a guarantee however, since 'h' and 'b' could also indicate the
218 // start of an ID, but value needs to be tried first.
IsValue(char c)219 bool IsValue(char c) {
220 return (c == '-' || absl::ascii_isdigit(c) || // FIRST(number)
221 c == '"' || // FIRST(text)
222 c == '\'' || c == 'h' || c == 'b'); // FIRST(bytes)
223 }
224
ParseValue(Parser * p)225 AstNode* ParseValue(Parser* p) {
226 AstNode* node = ParseNumber(p);
227 if (!node) {
228 node = ParseText(p);
229 }
230 if (!node) {
231 ParseBytes(p);
232 }
233 return node;
234 }
235
236 // Determines whether an occurrence operator (such as '*' or '?') prefacing
237 // the group definition occurs before the next whitespace character, and
238 // creates a new Occurrence node if so.
ParseOccur(Parser * p)239 AstNode* ParseOccur(Parser* p) {
240 Parser p_speculative{p->data};
241
242 if (*p_speculative.data == '?' || *p_speculative.data == '+') {
243 p_speculative.data++;
244 } else {
245 SkipUint(&p_speculative);
246 if (*p_speculative.data++ != '*') {
247 return nullptr;
248 }
249 SkipUint(&p_speculative);
250 }
251
252 AstNode* node =
253 AddNode(p, AstNode::Type::kOccur,
254 absl::string_view(p->data, p_speculative.data - p->data));
255 p->data = p_speculative.data;
256 return node;
257 }
258
ParseTypeKeyFromComment(Parser * p)259 absl::optional<std::string> ParseTypeKeyFromComment(Parser* p) {
260 Parser p_speculative{p->data};
261 if (!TrySkipCharacter(&p_speculative, ';')) {
262 return absl::nullopt;
263 }
264
265 SkipWhitespace(&p_speculative, false);
266 const char kTypeKeyPrefix[] = "type key";
267 if (!absl::StartsWith(p_speculative.data, kTypeKeyPrefix)) {
268 return absl::nullopt;
269 }
270 p_speculative.data += strlen(kTypeKeyPrefix);
271
272 SkipWhitespace(&p_speculative, false);
273 Parser p_speculative2{p_speculative.data};
274 for (; absl::ascii_isdigit(p_speculative2.data[0]); p_speculative2.data++) {
275 }
276 auto result = absl::string_view(p_speculative.data,
277 p_speculative2.data - p_speculative.data);
278 p->data = p_speculative2.data;
279 return std::string(result.data()).substr(0, result.length());
280 }
281
ParseMemberKeyFromComment(Parser * p)282 AstNode* ParseMemberKeyFromComment(Parser* p) {
283 Parser p_speculative{p->data};
284 if (!TrySkipCharacter(&p_speculative, ';')) {
285 return nullptr;
286 }
287
288 SkipWhitespace(&p_speculative, false);
289
290 AstNode* value = ParseId(&p_speculative);
291 if (!value) {
292 return nullptr;
293 }
294
295 SkipWhitespace(&p_speculative, false);
296 if (!TrySkipNewline(&p_speculative)) {
297 return nullptr;
298 }
299
300 AstNode* node = AddNode(p, AstNode::Type::kMemberKey, value->text, value);
301 p->data = p_speculative.data;
302 std::move(p_speculative.nodes.begin(), p_speculative.nodes.end(),
303 std::back_inserter(p->nodes));
304
305 return node;
306 }
307
ParseMemberKey1(Parser * p)308 AstNode* ParseMemberKey1(Parser* p) {
309 Parser p_speculative{p->data};
310 if (!ParseType1(&p_speculative)) {
311 return nullptr;
312 }
313
314 SkipWhitespace(&p_speculative);
315
316 if (*p_speculative.data++ != '=' || *p_speculative.data++ != '>') {
317 return nullptr;
318 }
319 AstNode* node =
320 AddNode(p, AstNode::Type::kMemberKey,
321 absl::string_view(p->data, p_speculative.data - p->data));
322 p->data = p_speculative.data;
323 std::move(p_speculative.nodes.begin(), p_speculative.nodes.end(),
324 std::back_inserter(p->nodes));
325 return node;
326 }
327
ParseMemberKey2(Parser * p)328 AstNode* ParseMemberKey2(Parser* p) {
329 Parser p_speculative{p->data};
330 AstNode* id = ParseId(&p_speculative);
331 if (!id) {
332 return nullptr;
333 }
334
335 SkipWhitespace(&p_speculative);
336
337 if (*p_speculative.data++ != ':') {
338 return nullptr;
339 }
340
341 AstNode* node =
342 AddNode(p, AstNode::Type::kMemberKey,
343 absl::string_view(p->data, p_speculative.data - p->data), id);
344 p->data = p_speculative.data;
345 std::move(p_speculative.nodes.begin(), p_speculative.nodes.end(),
346 std::back_inserter(p->nodes));
347 return node;
348 }
349
ParseMemberKey3(Parser * p)350 AstNode* ParseMemberKey3(Parser* p) {
351 Parser p_speculative{p->data};
352 AstNode* value = ParseValue(&p_speculative);
353 if (!value) {
354 return nullptr;
355 }
356
357 SkipWhitespace(&p_speculative);
358
359 if (*p_speculative.data++ != ':') {
360 return nullptr;
361 }
362 AstNode* node =
363 AddNode(p, AstNode::Type::kMemberKey,
364 absl::string_view(p->data, p_speculative.data - p->data), value);
365 p->data = p_speculative.data;
366 std::move(p_speculative.nodes.begin(), p_speculative.nodes.end(),
367 std::back_inserter(p->nodes));
368 return node;
369 }
370
371 // Iteratively tries all valid member key formats, retuning a Node representing
372 // the member key if found or nullptr if not.
ParseMemberKey(Parser * p)373 AstNode* ParseMemberKey(Parser* p) {
374 AstNode* node = ParseMemberKey1(p);
375 if (!node) {
376 node = ParseMemberKey2(p);
377 }
378 if (!node) {
379 node = ParseMemberKey3(p);
380 }
381 return node;
382 }
383
384 AstNode* ParseGroupEntry(Parser* p);
385
SkipOptionalComma(Parser * p)386 bool SkipOptionalComma(Parser* p) {
387 SkipWhitespace(p);
388 if (p->data[0] == ',') {
389 ++p->data;
390 SkipWhitespace(p);
391 }
392 return true;
393 }
394
395 // Parse the group contained inside of other brackets. Since the brackets around
396 // the group are optional inside of other brackets, we can't directly call
397 // ParseGroupEntry(...) and instead need this wrapper around it.
ParseGroupChoice(Parser * p)398 AstNode* ParseGroupChoice(Parser* p) {
399 Parser p_speculative{p->data};
400 AstNode* tail = nullptr;
401 AstNode* group_node =
402 AddNode(&p_speculative, AstNode::Type::kGrpchoice, absl::string_view());
403 const char* group_node_text = p_speculative.data;
404 while (true) {
405 const char* orig = p_speculative.data;
406 AstNode* group_entry = ParseGroupEntry(&p_speculative);
407 if (!group_entry) {
408 p_speculative.data = orig;
409 if (!group_node->children) {
410 return nullptr;
411 }
412 group_node->text =
413 std::string(group_node_text, p_speculative.data - group_node_text);
414 p->data = p_speculative.data;
415 std::move(p_speculative.nodes.begin(), p_speculative.nodes.end(),
416 std::back_inserter(p->nodes));
417 return group_node;
418 }
419 if (!group_node->children) {
420 group_node->children = group_entry;
421 }
422 if (tail) {
423 tail->sibling = group_entry;
424 }
425 tail = group_entry;
426 if (!SkipOptionalComma(&p_speculative)) {
427 return nullptr;
428 }
429 }
430 return nullptr;
431 }
432
ParseGroup(Parser * p)433 AstNode* ParseGroup(Parser* p) {
434 const char* orig = p->data;
435 AstNode* group_choice = ParseGroupChoice(p);
436 if (!group_choice) {
437 return nullptr;
438 }
439 return AddNode(p, AstNode::Type::kGroup,
440 absl::string_view(orig, p->data - orig), group_choice);
441 }
442
443 // Parse optional range operator .. (inlcusive) or ... (exclusive)
444 // ABNF rule: rangeop = "..." / ".."
ParseRangeop(Parser * p)445 AstNode* ParseRangeop(Parser* p) {
446 absl::string_view view(p->data);
447 if (absl::StartsWith(view, "...")) {
448 // rangeop ...
449 p->data += 3;
450 return AddNode(p, AstNode::Type::kRangeop, view.substr(0, 3));
451 } else if (absl::StartsWith(view, "..")) {
452 // rangeop ..
453 p->data += 2;
454 return AddNode(p, AstNode::Type::kRangeop, view.substr(0, 2));
455 }
456 return nullptr;
457 }
458
459 // Parse optional control operator .id
460 // ABNF rule: ctlop = "." id
ParseCtlop(Parser * p)461 AstNode* ParseCtlop(Parser* p) {
462 absl::string_view view(p->data);
463 if (!absl::StartsWith(view, ".")) {
464 return nullptr;
465 }
466 ++p->data;
467 AstNode* id = ParseId(p);
468 if (!id) {
469 return nullptr;
470 }
471 return AddNode(p, AstNode::Type::kCtlop,
472 view.substr(0, p->data - view.begin()), id);
473 }
474
ParseType2(Parser * p)475 AstNode* ParseType2(Parser* p) {
476 const char* orig = p->data;
477 const char* it = p->data;
478 AstNode* node = AddNode(p, AstNode::Type::kType2, absl::string_view());
479 if (IsValue(it[0])) {
480 AstNode* value = ParseValue(p);
481 if (!value) {
482 if (it[0] == 'h' || it[0] == 'b') {
483 AstNode* id = ParseId(p);
484 if (!id) {
485 return nullptr;
486 }
487 id->type = AstNode::Type::kTypename;
488 node->children = id;
489 } else {
490 return nullptr;
491 }
492 } else {
493 node->children = value;
494 }
495 } else if (IsExtendedAlpha(it[0])) {
496 AstNode* id = ParseId(p);
497 if (!id) {
498 return nullptr;
499 }
500 if (p->data[0] == '<') {
501 std::cerr << "It looks like you're trying to use a generic argument, "
502 "which we don't support"
503 << std::endl;
504 return nullptr;
505 }
506 id->type = AstNode::Type::kTypename;
507 node->children = id;
508 } else if (it[0] == '(') {
509 p->data = it + 1;
510 SkipWhitespace(p);
511 if (p->data[0] == ')') {
512 std::cerr << "It looks like you're trying to provide an empty Type (), "
513 "which we don't support"
514 << std::endl;
515 return nullptr;
516 }
517 AstNode* type = ParseType(p);
518 if (!type) {
519 return nullptr;
520 }
521 SkipWhitespace(p);
522 if (p->data[0] != ')') {
523 return nullptr;
524 }
525 ++p->data;
526 node->children = type;
527 } else if (it[0] == '{') {
528 p->data = it + 1;
529 SkipWhitespace(p);
530 if (p->data[0] == '}') {
531 std::cerr << "It looks like you're trying to provide an empty Group {}, "
532 "which we don't support"
533 << std::endl;
534 return nullptr;
535 }
536 AstNode* group = ParseGroup(p);
537 if (!group) {
538 return nullptr;
539 }
540 SkipWhitespace(p);
541 if (p->data[0] != '}') {
542 return nullptr;
543 }
544 ++p->data;
545 node->children = group;
546 } else if (it[0] == '[') {
547 p->data = it + 1;
548 SkipWhitespace(p);
549 AstNode* group = ParseGroup(p);
550 if (!group) {
551 return nullptr;
552 }
553 SkipWhitespace(p);
554 if (p->data[0] != ']') {
555 return nullptr;
556 }
557 ++p->data;
558 node->children = group;
559 } else if (it[0] == '~') {
560 p->data = it + 1;
561 SkipWhitespace(p);
562 if (!ParseId(p)) {
563 return nullptr;
564 }
565 if (p->data[0] == '<') {
566 std::cerr << "It looks like you're trying to use a generic argument, "
567 "which we don't support"
568 << std::endl;
569 return nullptr;
570 }
571 } else if (it[0] == '&') {
572 p->data = it + 1;
573 SkipWhitespace(p);
574 if (p->data[0] == '(') {
575 ++p->data;
576 SkipWhitespace(p);
577 if (p->data[0] == ')') {
578 std::cerr << "It looks like you're trying to provide an empty Type &(),"
579 " which we don't support"
580 << std::endl;
581 return nullptr;
582 }
583 AstNode* group = ParseGroup(p);
584 if (!group) {
585 return nullptr;
586 }
587 SkipWhitespace(p);
588 if (p->data[0] != ')') {
589 return nullptr;
590 }
591 ++p->data;
592 node->children = group;
593 } else {
594 AstNode* id = ParseId(p);
595 if (id) {
596 if (p->data[0] == '<') {
597 std::cerr << "It looks like you're trying to use a generic argument, "
598 "which we don't support"
599 << std::endl;
600 return nullptr;
601 }
602 id->type = AstNode::Type::kGroupname;
603 node->children = id;
604 } else {
605 return nullptr;
606 }
607 }
608 } else if (it[0] == '#') {
609 ++it;
610 if (it[0] == '6') {
611 ++it;
612 if (it[0] == '.') {
613 p->data = it + 1;
614 SkipUint(p);
615 it = p->data;
616 }
617 if (it[0] != '(') {
618 return nullptr;
619 }
620 p->data = ++it;
621 SkipWhitespace(p);
622 AstNode* type = ParseType(p);
623 if (!type) {
624 return nullptr;
625 }
626 SkipWhitespace(p);
627 if (p->data[0] != ')') {
628 return nullptr;
629 }
630 ++p->data;
631 node->children = type;
632 } else if (absl::ascii_isdigit(it[0])) {
633 std::cerr << "# MAJOR unimplemented" << std::endl;
634 return nullptr;
635 } else {
636 p->data = it;
637 }
638 } else {
639 return nullptr;
640 }
641 node->text = std::string(orig, p->data - orig);
642 return node;
643 }
644
ParseType1(Parser * p)645 AstNode* ParseType1(Parser* p) {
646 const char* orig = p->data;
647 AstNode* type2 = ParseType2(p);
648 if (!type2) {
649 return nullptr;
650 }
651 SkipWhitespace(p, false);
652 AstNode* rangeop_or_ctlop = ParseRangeop(p);
653 if (!rangeop_or_ctlop) {
654 rangeop_or_ctlop = ParseCtlop(p);
655 }
656 if (rangeop_or_ctlop) {
657 SkipWhitespace(p, false);
658 AstNode* param = ParseType2(p);
659 if (!param) {
660 return nullptr;
661 }
662 type2->sibling = rangeop_or_ctlop;
663 rangeop_or_ctlop->sibling = param;
664 }
665 return AddNode(p, AstNode::Type::kType1,
666 absl::string_view(orig, p->data - orig), type2);
667 }
668
669 // Different valid types for a call are specified as type1 / type2, so we split
670 // at the '/' character and process each allowed type separately.
ParseType(Parser * p,bool skip_comments)671 AstNode* ParseType(Parser* p, bool skip_comments) {
672 Parser p_speculative{p->data};
673
674 // Parse all allowed types into a linked list starting in type1's sibling ptr.
675 AstNode* type1 = ParseType1(&p_speculative);
676 if (!type1) {
677 return nullptr;
678 }
679 SkipWhitespace(&p_speculative, skip_comments);
680
681 AstNode* tail = type1;
682 while (*p_speculative.data == '/') {
683 ++p_speculative.data;
684 SkipWhitespace(&p_speculative, skip_comments);
685
686 AstNode* next_type1 = ParseType1(&p_speculative);
687 if (!next_type1) {
688 return nullptr;
689 }
690 tail->sibling = next_type1;
691 tail = next_type1;
692 SkipWhitespace(&p_speculative, skip_comments);
693 }
694
695 // Create a new AstNode with all parsed types.
696 AstNode* node =
697 AddNode(p, AstNode::Type::kType,
698 absl::string_view(p->data, p_speculative.data - p->data), type1);
699 p->data = p_speculative.data;
700 std::move(p_speculative.nodes.begin(), p_speculative.nodes.end(),
701 std::back_inserter(p->nodes));
702 return node;
703 }
704
ParseId(Parser * p)705 AstNode* ParseId(Parser* p) {
706 const char* id = p->data;
707
708 // If the id doesnt start with a valid name character, return null.
709 if (!IsExtendedAlpha(id[0])) {
710 return nullptr;
711 }
712
713 // Read through the name character by character and make sure it's valid.
714 const char* it = id + 1;
715 while (true) {
716 if (it[0] == '-' || it[0] == '.') {
717 ++it;
718 if (!IsExtendedAlpha(it[0]) && !absl::ascii_isdigit(it[0])) {
719 return nullptr;
720 }
721 ++it;
722 } else if (IsExtendedAlpha(it[0]) || absl::ascii_isdigit(it[0])) {
723 ++it;
724 } else {
725 break;
726 }
727 }
728
729 // Create and return a new node with the parsed data.
730 AstNode* node =
731 AddNode(p, AstNode::Type::kId, absl::string_view(p->data, it - p->data));
732 p->data = it;
733 return node;
734 }
735
UpdateNodesForGroupEntry(Parser * p,Parser * p_speculative,AstNode * occur,AstNode * member_key,AstNode * type)736 AstNode* UpdateNodesForGroupEntry(Parser* p,
737 Parser* p_speculative,
738 AstNode* occur,
739 AstNode* member_key,
740 AstNode* type) {
741 AstNode* node = AddNode(p, AstNode::Type::kGrpent, absl::string_view());
742 if (occur) {
743 node->children = occur;
744 if (member_key) {
745 occur->sibling = member_key;
746 member_key->sibling = type;
747 } else {
748 occur->sibling = type;
749 }
750 } else if (member_key) {
751 node->children = member_key;
752 member_key->sibling = type;
753 } else {
754 node->children = type;
755 }
756 node->text = std::string(p->data, p_speculative->data - p->data);
757 p->data = p_speculative->data;
758 std::move(p_speculative->nodes.begin(), p_speculative->nodes.end(),
759 std::back_inserter(p->nodes));
760 return node;
761 }
762
763 // Parse a group entry of form <id_num>: <type> ; <name>
ParseGroupEntryWithNameInComment(Parser * p)764 AstNode* ParseGroupEntryWithNameInComment(Parser* p) {
765 Parser p_speculative{p->data};
766 AstNode* occur = ParseOccur(&p_speculative);
767 if (occur) {
768 SkipWhitespace(&p_speculative, false);
769 }
770 AstNode* member_key_num = ParseValue(&p_speculative);
771 if (!member_key_num) {
772 return nullptr;
773 }
774 SkipWhitespace(&p_speculative, false);
775 if (*p_speculative.data++ != ':') {
776 return nullptr;
777 }
778 SkipWhitespace(&p_speculative, false);
779 AstNode* type = ParseType(&p_speculative, false);
780 if (!type) {
781 return nullptr;
782 }
783 SkipWhitespace(&p_speculative, false);
784 AstNode* member_key = ParseMemberKeyFromComment(&p_speculative);
785 if (!member_key) {
786 return nullptr;
787 }
788
789 member_key->integer_member_key_text = member_key_num->text;
790
791 return UpdateNodesForGroupEntry(p, &p_speculative, occur, member_key, type);
792 }
793
ParseGroupEntryWithNameAsId(Parser * p)794 AstNode* ParseGroupEntryWithNameAsId(Parser* p) {
795 Parser p_speculative{p->data};
796 AstNode* occur = ParseOccur(&p_speculative);
797 if (occur) {
798 SkipWhitespace(&p_speculative);
799 }
800 AstNode* member_key = ParseMemberKey(&p_speculative);
801 if (member_key) {
802 SkipWhitespace(&p_speculative);
803 }
804 AstNode* type = ParseType(&p_speculative);
805 if (!type) {
806 return nullptr;
807 }
808 return UpdateNodesForGroupEntry(p, &p_speculative, occur, member_key, type);
809 }
810
811 // NOTE: This should probably never be hit, why is it in the grammar?
ParseGroupEntryWithGroupReference(Parser * p)812 AstNode* ParseGroupEntryWithGroupReference(Parser* p) {
813 Parser p_speculative{p->data};
814
815 // Check for an occurance indicator ('?', '*', "+") before the sub-group
816 // definition.
817 AstNode* occur = ParseOccur(&p_speculative);
818 if (occur) {
819 SkipWhitespace(&p_speculative);
820 }
821
822 // Parse the ID of the sub-group.
823 AstNode* id = ParseId(&p_speculative);
824 if (!id) {
825 return nullptr;
826 }
827 id->type = AstNode::Type::kGroupname;
828 if (*p_speculative.data == '<') {
829 std::cerr << "It looks like you're trying to use a generic argument, "
830 "which we don't support"
831 << std::endl;
832 return nullptr;
833 }
834
835 // Create a new node containing this sub-group reference.
836 AstNode* node = AddNode(p, AstNode::Type::kGrpent, absl::string_view());
837 if (occur) {
838 occur->sibling = id;
839 node->children = occur;
840 } else {
841 node->children = id;
842 }
843 node->text = std::string(p->data, p_speculative.data - p->data);
844 p->data = p_speculative.data;
845 std::move(p_speculative.nodes.begin(), p_speculative.nodes.end(),
846 std::back_inserter(p->nodes));
847 return node;
848 }
849
850 // Recursively parse a group entry that's an inline-defined group of the form
851 // '(...<some contents>...)'.
ParseGroupEntryWithInlineGroupDefinition(Parser * p)852 AstNode* ParseGroupEntryWithInlineGroupDefinition(Parser* p) {
853 Parser p_speculative{p->data};
854 AstNode* occur = ParseOccur(&p_speculative);
855 if (occur) {
856 SkipWhitespace(&p_speculative);
857 }
858 if (*p_speculative.data != '(') {
859 return nullptr;
860 }
861 ++p_speculative.data;
862 SkipWhitespace(&p_speculative);
863 AstNode* group = ParseGroup(&p_speculative); // Recursive call here.
864 if (!group) {
865 return nullptr;
866 }
867
868 SkipWhitespace(&p_speculative);
869 if (*p_speculative.data != ')') {
870 return nullptr;
871 }
872 ++p_speculative.data;
873 AstNode* node = AddNode(p, AstNode::Type::kGrpent, absl::string_view());
874 if (occur) {
875 node->children = occur;
876 occur->sibling = group;
877 } else {
878 node->children = group;
879 }
880 node->text = std::string(p->data, p_speculative.data - p->data);
881 p->data = p_speculative.data;
882 std::move(p_speculative.nodes.begin(), p_speculative.nodes.end(),
883 std::back_inserter(p->nodes));
884 return node;
885 }
886
887 // Recursively parse the group assignemnt.
ParseGroupEntry(Parser * p)888 AstNode* ParseGroupEntry(Parser* p) {
889 // Parse a group entry of form '#: type ; name'
890 AstNode* node = ParseGroupEntryWithNameInComment(p);
891
892 // Parse a group entry of form 'id: type'.
893 if (!node) {
894 node = ParseGroupEntryWithNameAsId(p);
895 }
896
897 // Parse a group entry of form 'subgroupName'.
898 if (!node) {
899 node = ParseGroupEntryWithGroupReference(p);
900 }
901
902 // Parse a group entry of the form: '(' <some contents> ')'.
903 // NOTE: This is the method hit during the top-level group parsing, and the
904 // recursive call occurs inside this method.
905 if (!node) {
906 node = ParseGroupEntryWithInlineGroupDefinition(p);
907 }
908
909 // Return the results of the recursive call.
910 return node;
911 }
912
ParseRule(Parser * p)913 AstNode* ParseRule(Parser* p) {
914 const char* start = p->data;
915
916 // Parse the type key, if it's present
917 absl::optional<std::string> type_key = ParseTypeKeyFromComment(p);
918 SkipWhitespace(p);
919
920 // Use the parser to extract the id and data.
921 AstNode* id = ParseId(p);
922 if (!id) {
923 Logger::Error("No id found!");
924 return nullptr;
925 }
926 if (p->data[0] == '<') {
927 std::cerr << "It looks like you're trying to use a generic parameter, "
928 "which we don't support"
929 << std::endl;
930 return nullptr;
931 }
932
933 // Determine the type of assignment being done to this variable name (ie '=').
934 SkipWhitespace(p);
935 const char* assign_start = p->data;
936 AssignType assign_type = ParseAssignmentType(p);
937 if (assign_type != AssignType::kAssign) {
938 Logger::Error("No assignment operator found! assign_type: %d", assign_type);
939 return nullptr;
940 }
941 AstNode* assign_node = AddNode(
942 p,
943 (assign_type == AssignType::kAssign)
944 ? AstNode::Type::kAssign
945 : (assign_type == AssignType::kAssignT) ? AstNode::Type::kAssignT
946 : AstNode::Type::kAssignG,
947 absl::string_view(assign_start, p->data - assign_start));
948 id->sibling = assign_node;
949
950 // Parse the object type being assigned.
951 SkipWhitespace(p);
952 AstNode* type = ParseType(p, false); // Try to parse it as a type.
953 id->type = AstNode::Type::kTypename;
954 if (!type) { // If it's not a type, try and parse it as a group.
955 type = ParseGroupEntry(p);
956 id->type = AstNode::Type::kGroupname;
957 }
958 if (!type) { // if it's not a type or a group, exit as failure.
959 Logger::Error("No node type found!");
960 return nullptr;
961 }
962 assign_node->sibling = type;
963 SkipWhitespace(p, false);
964
965 // Return the results.
966 auto rule_node = AddNode(p, AstNode::Type::kRule,
967 absl::string_view(start, p->data - start), id);
968 rule_node->type_key = type_key;
969 return rule_node;
970 }
971
972 // Iteratively parse the CDDL spec into a tree structure.
ParseCddl(absl::string_view data)973 ParseResult ParseCddl(absl::string_view data) {
974 if (data[0] == 0) {
975 return {nullptr, {}};
976 }
977 Parser p{(char*)data.data()};
978
979 SkipWhitespace(&p);
980 AstNode* root = nullptr;
981 AstNode* tail = nullptr;
982 do {
983 AstNode* next = ParseRule(&p);
984 if (!next) {
985 Logger::Error("Failed to parse next node. Failed starting at: '%s'",
986 p.data);
987 return {nullptr, {}};
988 } else {
989 Logger::Log("Processed text \"%s\" into node: ", next->text);
990 DumpAst(next);
991 }
992
993 if (!root) {
994 root = next;
995 }
996 if (tail) {
997 tail->sibling = next;
998 }
999 tail = next;
1000 } while (p.data[0]);
1001 return {root, std::move(p.nodes)};
1002 }
1003
1004 // Recursively print out the AstNode graph.
DumpAst(AstNode * node,int indent_level)1005 void DumpAst(AstNode* node, int indent_level) {
1006 while (node) {
1007 // Prefix with '-'s so the levels of the graph are clear.
1008 std::string node_text = "";
1009 for (int i = 0; i <= indent_level; ++i) {
1010 node_text += "--";
1011 }
1012
1013 // Print the type.
1014 switch (node->type) {
1015 case AstNode::Type::kRule:
1016 node_text += "kRule";
1017 break;
1018 case AstNode::Type::kTypename:
1019 node_text += "kTypename";
1020 break;
1021 case AstNode::Type::kGroupname:
1022 node_text += "kGroupname";
1023 break;
1024 case AstNode::Type::kAssign:
1025 node_text += "kAssign";
1026 break;
1027 case AstNode::Type::kAssignT:
1028 node_text += "kAssignT";
1029 break;
1030 case AstNode::Type::kAssignG:
1031 node_text += "kAssignG";
1032 break;
1033 case AstNode::Type::kType:
1034 node_text += "kType";
1035 break;
1036 case AstNode::Type::kGrpent:
1037 node_text += "kGrpent";
1038 break;
1039 case AstNode::Type::kType1:
1040 node_text += "kType1";
1041 break;
1042 case AstNode::Type::kType2:
1043 node_text += "kType2";
1044 break;
1045 case AstNode::Type::kValue:
1046 node_text += "kValue";
1047 break;
1048 case AstNode::Type::kGroup:
1049 node_text += "kGroup";
1050 break;
1051 case AstNode::Type::kUint:
1052 node_text += "kUint";
1053 break;
1054 case AstNode::Type::kDigit:
1055 node_text += "kDigit";
1056 break;
1057 case AstNode::Type::kRangeop:
1058 node_text += "kRangeop";
1059 break;
1060 case AstNode::Type::kCtlop:
1061 node_text += "kCtlop";
1062 break;
1063 case AstNode::Type::kGrpchoice:
1064 node_text += "kGrpchoice";
1065 break;
1066 case AstNode::Type::kOccur:
1067 node_text += "kOccur";
1068 break;
1069 case AstNode::Type::kMemberKey:
1070 node_text += "kMemberKey";
1071 break;
1072 case AstNode::Type::kId:
1073 node_text += "kId";
1074 break;
1075 case AstNode::Type::kNumber:
1076 node_text += "kNumber";
1077 break;
1078 case AstNode::Type::kText:
1079 node_text += "kText";
1080 break;
1081 case AstNode::Type::kBytes:
1082 node_text += "kBytes";
1083 break;
1084 case AstNode::Type::kOther:
1085 node_text += "kOther";
1086 break;
1087 }
1088 if (node->type_key != absl::nullopt) {
1089 node_text += " (type key=\"" + node->type_key.value() + "\")";
1090 }
1091 node_text += ": ";
1092
1093 // Print the contents.
1094 int size = static_cast<int>(node->text.size());
1095 absl::string_view text = node->text.data();
1096 for (int i = 0; i < size; ++i) {
1097 if (text[i] == ' ' || text[i] == '\n') {
1098 node_text += " ";
1099 while (i < size - 1 && (text[i + 1] == ' ' || text[i + 1] == '\n')) {
1100 ++i;
1101 }
1102 continue;
1103 } else {
1104 node_text += text[i];
1105 }
1106 }
1107 Logger::Log(node_text);
1108
1109 // Recursively print children then iteratively print siblings.
1110 DumpAst(node->children, indent_level + 1);
1111 node = node->sibling;
1112 }
1113 }
1114