1 /*
2 * *****************************************************************************
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 *
6 * Copyright (c) 2018-2021 Gavin D. Howard and contributors.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
10 *
11 * * Redistributions of source code must retain the above copyright notice, this
12 * list of conditions and the following disclaimer.
13 *
14 * * Redistributions in binary form must reproduce the above copyright notice,
15 * this list of conditions and the following disclaimer in the documentation
16 * and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 *
30 * *****************************************************************************
31 *
32 * The lexer for dc.
33 *
34 */
35
36 #if DC_ENABLED
37
38 #include <ctype.h>
39
40 #include <dc.h>
41 #include <vm.h>
42
dc_lex_negCommand(BcLex * l)43 bool dc_lex_negCommand(BcLex *l) {
44 char c = l->buf[l->i];
45 return !BC_LEX_NUM_CHAR(c, false, false);
46 }
47
48 /**
49 * Processes a dc command that needs a register. This is where the
50 * extended-register extension is implemented.
51 * @param l The lexer.
52 */
dc_lex_register(BcLex * l)53 static void dc_lex_register(BcLex *l) {
54
55 // If extended register is enabled and the character is whitespace...
56 if (DC_X && isspace(l->buf[l->i - 1])) {
57
58 char c;
59
60 // Eat the whitespace.
61 bc_lex_whitespace(l);
62 c = l->buf[l->i];
63
64 // Check for a letter or underscore.
65 if (BC_ERR(!isalpha(c) && c != '_'))
66 bc_lex_verr(l, BC_ERR_PARSE_CHAR, c);
67
68 // Parse a normal identifier.
69 l->i += 1;
70 bc_lex_name(l);
71 }
72 else {
73
74 // I don't allow newlines because newlines are used for controlling when
75 // execution happens, and allowing newlines would just be complex.
76 if (BC_ERR(l->buf[l->i - 1] == '\n'))
77 bc_lex_verr(l, BC_ERR_PARSE_CHAR, l->buf[l->i - 1]);
78
79 // Set the lexer string and token.
80 bc_vec_popAll(&l->str);
81 bc_vec_pushByte(&l->str, (uchar) l->buf[l->i - 1]);
82 bc_vec_pushByte(&l->str, '\0');
83 l->t = BC_LEX_NAME;
84 }
85 }
86
87 /**
88 * Parses a dc string. Since dc's strings need to check for balanced brackets,
89 * we can't just parse bc and dc strings with different start and end
90 * characters. Oh, and dc strings need to check for escaped brackets.
91 * @param l The lexer.
92 */
dc_lex_string(BcLex * l)93 static void dc_lex_string(BcLex *l) {
94
95 size_t depth, nls, i;
96 char c;
97 bool got_more;
98
99 // Set the token and clear the string.
100 l->t = BC_LEX_STR;
101 bc_vec_popAll(&l->str);
102
103 do {
104
105 depth = 1;
106 nls = 0;
107 got_more = false;
108
109 assert(!l->is_stdin || l->buf == vm.buffer.v);
110
111 // This is the meat. As long as we don't run into the NUL byte, and we
112 // have "depth", which means we haven't completely balanced brackets
113 // yet, we continue eating the string.
114 for (i = l->i; (c = l->buf[i]) && depth; ++i) {
115
116 // Check for escaped brackets and set the depths as appropriate.
117 if (c == '\\') {
118 c = l->buf[++i];
119 if (!c) break;
120 }
121 else {
122 depth += (c == '[');
123 depth -= (c == ']');
124 }
125
126 // We want to adjust the line in the lexer as necessary.
127 nls += (c == '\n');
128
129 if (depth) bc_vec_push(&l->str, &c);
130 }
131
132 if (BC_ERR(c == '\0' && depth)) {
133 if (!vm.eof && l->is_stdin) got_more = bc_lex_readLine(l);
134 if (got_more) bc_vec_popAll(&l->str);
135 }
136
137 } while (got_more && depth);
138
139 // Obviously, if we didn't balance, that's an error.
140 if (BC_ERR(c == '\0' && depth)) {
141 l->i = i;
142 bc_lex_err(l, BC_ERR_PARSE_STRING);
143 }
144
145 bc_vec_pushByte(&l->str, '\0');
146
147 l->i = i;
148 l->line += nls;
149 }
150
151 /**
152 * Lexes a dc token. This is the dc implementation of BcLexNext.
153 * @param l The lexer.
154 */
dc_lex_token(BcLex * l)155 void dc_lex_token(BcLex *l) {
156
157 char c = l->buf[l->i++], c2;
158 size_t i;
159
160 // If the last token was a command that needs a register, we need to parse a
161 // register, so do so.
162 for (i = 0; i < dc_lex_regs_len; ++i) {
163
164 // If the token is a register token, take care of it and return.
165 if (l->last == dc_lex_regs[i]) {
166 dc_lex_register(l);
167 return;
168 }
169 }
170
171 // These lines are for tokens that easily correspond to one character. We
172 // just set the token.
173 if (c >= '"' && c <= '~' &&
174 (l->t = dc_lex_tokens[(c - '"')]) != BC_LEX_INVALID)
175 {
176 return;
177 }
178
179 // This is the workhorse of the lexer when more complicated things are
180 // needed.
181 switch (c) {
182
183 case '\0':
184 case '\n':
185 case '\t':
186 case '\v':
187 case '\f':
188 case '\r':
189 case ' ':
190 {
191 bc_lex_commonTokens(l, c);
192 break;
193 }
194
195 // We don't have the ! command, so we always expect certain things
196 // after the exclamation point.
197 case '!':
198 {
199 c2 = l->buf[l->i];
200
201 if (c2 == '=') l->t = BC_LEX_OP_REL_NE;
202 else if (c2 == '<') l->t = BC_LEX_OP_REL_LE;
203 else if (c2 == '>') l->t = BC_LEX_OP_REL_GE;
204 else bc_lex_invalidChar(l, c);
205
206 l->i += 1;
207
208 break;
209 }
210
211 case '#':
212 {
213 bc_lex_lineComment(l);
214 break;
215 }
216
217 case '.':
218 {
219 c2 = l->buf[l->i];
220
221 // If the character after is a number, this dot is part of a number.
222 // Otherwise, it's the BSD dot (equivalent to last).
223 if (BC_NO_ERR(BC_LEX_NUM_CHAR(c2, true, false)))
224 bc_lex_number(l, c);
225 else bc_lex_invalidChar(l, c);
226
227 break;
228 }
229
230 case '0':
231 case '1':
232 case '2':
233 case '3':
234 case '4':
235 case '5':
236 case '6':
237 case '7':
238 case '8':
239 case '9':
240 case 'A':
241 case 'B':
242 case 'C':
243 case 'D':
244 case 'E':
245 case 'F':
246 {
247 bc_lex_number(l, c);
248 break;
249 }
250
251 case '[':
252 {
253 dc_lex_string(l);
254 break;
255 }
256
257 default:
258 {
259 bc_lex_invalidChar(l, c);
260 }
261 }
262 }
263 #endif // DC_ENABLED
264