1 /*
2 * Copyright 2017 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "RegexParser.h"
9
10 #include "LexUtil.h"
11
parse(std::string source)12 RegexNode RegexParser::parse(std::string source) {
13 fSource = source;
14 fIndex = 0;
15 ASSERT(fStack.size() == 0);
16 this->regex();
17 ASSERT(fStack.size() == 1);
18 ASSERT(fIndex == source.size());
19 return this->pop();
20 }
21
peek()22 char RegexParser::peek() {
23 if (fIndex >= fSource.size()) {
24 return END;
25 }
26 return fSource[fIndex];
27 }
28
expect(char c)29 void RegexParser::expect(char c) {
30 if (this->peek() != c) {
31 printf("expected '%c' at index %d, but found '%c'", c, (int) fIndex, this->peek());
32 exit(1);
33 }
34 ++fIndex;
35 }
36
pop()37 RegexNode RegexParser::pop() {
38 RegexNode result = fStack.top();
39 fStack.pop();
40 return result;
41 }
42
term()43 void RegexParser::term() {
44 switch (this->peek()) {
45 case '(': this->group(); break;
46 case '[': this->set(); break;
47 case '.': this->dot(); break;
48 default: this->literal();
49 }
50 }
51
quantifiedTerm()52 void RegexParser::quantifiedTerm() {
53 this->term();
54 switch (this->peek()) {
55 case '*': fStack.push(RegexNode(RegexNode::kStar_Kind, this->pop())); ++fIndex; break;
56 case '+': fStack.push(RegexNode(RegexNode::kPlus_Kind, this->pop())); ++fIndex; break;
57 case '?': fStack.push(RegexNode(RegexNode::kQuestion_Kind, this->pop())); ++fIndex; break;
58 default: break;
59 }
60 }
61
sequence()62 void RegexParser::sequence() {
63 this->quantifiedTerm();
64 for (;;) {
65 switch (this->peek()) {
66 case END: // fall through
67 case '|': // fall through
68 case ')': return;
69 default:
70 this->sequence();
71 RegexNode right = this->pop();
72 RegexNode left = this->pop();
73 fStack.emplace(RegexNode::kConcat_Kind, std::move(left), std::move(right));
74 }
75 }
76 }
77
escapeSequence(char c)78 RegexNode RegexParser::escapeSequence(char c) {
79 switch (c) {
80 case 'n': return RegexNode(RegexNode::kChar_Kind, '\n');
81 case 'r': return RegexNode(RegexNode::kChar_Kind, '\r');
82 case 't': return RegexNode(RegexNode::kChar_Kind, '\t');
83 case 's': return RegexNode(RegexNode::kCharset_Kind, " \t\n\r");
84 default: return RegexNode(RegexNode::kChar_Kind, c);
85 }
86 }
87
literal()88 void RegexParser::literal() {
89 char c = this->peek();
90 if (c == '\\') {
91 ++fIndex;
92 fStack.push(this->escapeSequence(peek()));
93 ++fIndex;
94 }
95 else {
96 fStack.push(RegexNode(RegexNode::kChar_Kind, c));
97 ++fIndex;
98 }
99 }
100
dot()101 void RegexParser::dot() {
102 this->expect('.');
103 fStack.push(RegexNode(RegexNode::kDot_Kind));
104 }
105
group()106 void RegexParser::group() {
107 this->expect('(');
108 this->regex();
109 this->expect(')');
110 }
111
setItem()112 void RegexParser::setItem() {
113 this->literal();
114 if (this->peek() == '-') {
115 ++fIndex;
116 if (peek() == ']') {
117 fStack.push(RegexNode(RegexNode::kChar_Kind, '-'));
118 }
119 else {
120 literal();
121 RegexNode end = this->pop();
122 ASSERT(end.fKind == RegexNode::kChar_Kind);
123 RegexNode start = this->pop();
124 ASSERT(start.fKind == RegexNode::kChar_Kind);
125 fStack.push(RegexNode(RegexNode::kRange_Kind, std::move(start), std::move(end)));
126 }
127 }
128 }
129
set()130 void RegexParser::set() {
131 expect('[');
132 size_t depth = fStack.size();
133 RegexNode set(RegexNode::kCharset_Kind);
134 if (this->peek() == '^') {
135 ++fIndex;
136 set.fPayload.fBool = true;
137 }
138 else {
139 set.fPayload.fBool = false;
140 }
141 for (;;) {
142 switch (this->peek()) {
143 case ']':
144 ++fIndex;
145 while (fStack.size() > depth) {
146 set.fChildren.push_back(this->pop());
147 }
148 fStack.push(std::move(set));
149 return;
150 case END:
151 printf("unterminated character set\n");
152 exit(1);
153 default:
154 this->setItem();
155 }
156 }
157 }
158
regex()159 void RegexParser::regex() {
160 this->sequence();
161 switch (this->peek()) {
162 case '|': {
163 ++fIndex;
164 this->regex();
165 RegexNode right = this->pop();
166 RegexNode left = this->pop();
167 fStack.push(RegexNode(RegexNode::kOr_Kind, left, right));
168 break;
169 }
170 case END: // fall through
171 case ')':
172 return;
173 default:
174 ASSERT(false);
175 }
176 }
177