1 /*
2 * *****************************************************************************
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 *
6 * Copyright (c) 2018-2021 Gavin D. Howard and contributors.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
10 *
11 * * Redistributions of source code must retain the above copyright notice, this
12 * list of conditions and the following disclaimer.
13 *
14 * * Redistributions in binary form must reproduce the above copyright notice,
15 * this list of conditions and the following disclaimer in the documentation
16 * and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 *
30 * *****************************************************************************
31 *
32 * The lexer for bc.
33 *
34 */
35
36 #if BC_ENABLED
37
38 #include <assert.h>
39 #include <ctype.h>
40 #include <string.h>
41
42 #include <bc.h>
43 #include <vm.h>
44
bc_lex_identifier(BcLex * l)45 static void bc_lex_identifier(BcLex *l) {
46
47 size_t i;
48 const char *buf = l->buf + l->i - 1;
49
50 for (i = 0; i < bc_lex_kws_len; ++i) {
51
52 const BcLexKeyword *kw = bc_lex_kws + i;
53 size_t n = BC_LEX_KW_LEN(kw);
54
55 if (!strncmp(buf, kw->name, n) && !isalnum(buf[n]) && buf[n] != '_') {
56
57 l->t = BC_LEX_KW_AUTO + (BcLexType) i;
58
59 if (!BC_LEX_KW_POSIX(kw))
60 bc_lex_verr(l, BC_ERR_POSIX_KW, kw->name);
61
62 // We minus 1 because the index has already been incremented.
63 l->i += n - 1;
64 return;
65 }
66 }
67
68 bc_lex_name(l);
69
70 if (BC_ERR(l->str.len - 1 > 1))
71 bc_lex_verr(l, BC_ERR_POSIX_NAME_LEN, l->str.v);
72 }
73
bc_lex_string(BcLex * l)74 static void bc_lex_string(BcLex *l) {
75
76 size_t len, nlines = 0, i = l->i;
77 const char *buf = l->buf;
78 char c;
79
80 l->t = BC_LEX_STR;
81
82 for (; (c = buf[i]) && c != '"'; ++i) nlines += c == '\n';
83
84 if (BC_ERR(c == '\0')) {
85 l->i = i;
86 bc_lex_err(l, BC_ERR_PARSE_STRING);
87 }
88
89 len = i - l->i;
90 bc_vec_string(&l->str, len, l->buf + l->i);
91
92 l->i = i + 1;
93 l->line += nlines;
94 }
95
bc_lex_assign(BcLex * l,BcLexType with,BcLexType without)96 static void bc_lex_assign(BcLex *l, BcLexType with, BcLexType without) {
97 if (l->buf[l->i] == '=') {
98 l->i += 1;
99 l->t = with;
100 }
101 else l->t = without;
102 }
103
bc_lex_token(BcLex * l)104 void bc_lex_token(BcLex *l) {
105
106 char c = l->buf[l->i++], c2;
107
108 // This is the workhorse of the lexer.
109 switch (c) {
110
111 case '\0':
112 case '\n':
113 case '\t':
114 case '\v':
115 case '\f':
116 case '\r':
117 case ' ':
118 {
119 bc_lex_commonTokens(l, c);
120 break;
121 }
122
123 case '!':
124 {
125 bc_lex_assign(l, BC_LEX_OP_REL_NE, BC_LEX_OP_BOOL_NOT);
126
127 if (l->t == BC_LEX_OP_BOOL_NOT)
128 bc_lex_verr(l, BC_ERR_POSIX_BOOL, "!");
129
130 break;
131 }
132
133 case '"':
134 {
135 bc_lex_string(l);
136 break;
137 }
138
139 case '#':
140 {
141 bc_lex_err(l, BC_ERR_POSIX_COMMENT);
142 bc_lex_lineComment(l);
143 break;
144 }
145
146 case '%':
147 {
148 bc_lex_assign(l, BC_LEX_OP_ASSIGN_MODULUS, BC_LEX_OP_MODULUS);
149 break;
150 }
151
152 case '&':
153 {
154 c2 = l->buf[l->i];
155 if (BC_NO_ERR(c2 == '&')) {
156
157 bc_lex_verr(l, BC_ERR_POSIX_BOOL, "&&");
158
159 l->i += 1;
160 l->t = BC_LEX_OP_BOOL_AND;
161 }
162 else bc_lex_invalidChar(l, c);
163
164 break;
165 }
166 #if BC_ENABLE_EXTRA_MATH
167 case '$':
168 {
169 l->t = BC_LEX_OP_TRUNC;
170 break;
171 }
172
173 case '@':
174 {
175 bc_lex_assign(l, BC_LEX_OP_ASSIGN_PLACES, BC_LEX_OP_PLACES);
176 break;
177 }
178 #endif // BC_ENABLE_EXTRA_MATH
179 case '(':
180 case ')':
181 {
182 l->t = (BcLexType) (c - '(' + BC_LEX_LPAREN);
183 break;
184 }
185
186 case '*':
187 {
188 bc_lex_assign(l, BC_LEX_OP_ASSIGN_MULTIPLY, BC_LEX_OP_MULTIPLY);
189 break;
190 }
191
192 case '+':
193 {
194 c2 = l->buf[l->i];
195 if (c2 == '+') {
196 l->i += 1;
197 l->t = BC_LEX_OP_INC;
198 }
199 else bc_lex_assign(l, BC_LEX_OP_ASSIGN_PLUS, BC_LEX_OP_PLUS);
200 break;
201 }
202
203 case ',':
204 {
205 l->t = BC_LEX_COMMA;
206 break;
207 }
208
209 case '-':
210 {
211 c2 = l->buf[l->i];
212 if (c2 == '-') {
213 l->i += 1;
214 l->t = BC_LEX_OP_DEC;
215 }
216 else bc_lex_assign(l, BC_LEX_OP_ASSIGN_MINUS, BC_LEX_OP_MINUS);
217 break;
218 }
219
220 case '.':
221 {
222 c2 = l->buf[l->i];
223 if (BC_LEX_NUM_CHAR(c2, true, false)) bc_lex_number(l, c);
224 else {
225 l->t = BC_LEX_KW_LAST;
226 bc_lex_err(l, BC_ERR_POSIX_DOT);
227 }
228 break;
229 }
230
231 case '/':
232 {
233 c2 = l->buf[l->i];
234 if (c2 =='*') bc_lex_comment(l);
235 else bc_lex_assign(l, BC_LEX_OP_ASSIGN_DIVIDE, BC_LEX_OP_DIVIDE);
236 break;
237 }
238
239 case '0':
240 case '1':
241 case '2':
242 case '3':
243 case '4':
244 case '5':
245 case '6':
246 case '7':
247 case '8':
248 case '9':
249 case 'A':
250 case 'B':
251 case 'C':
252 case 'D':
253 case 'E':
254 case 'F':
255 // Apparently, GNU bc (and maybe others) allows any uppercase letter as
256 // a number. When single digits, they act like the ones above. When
257 // multi-digit, any letter above the input base is automatically set to
258 // the biggest allowable digit in the input base.
259 case 'G':
260 case 'H':
261 case 'I':
262 case 'J':
263 case 'K':
264 case 'L':
265 case 'M':
266 case 'N':
267 case 'O':
268 case 'P':
269 case 'Q':
270 case 'R':
271 case 'S':
272 case 'T':
273 case 'U':
274 case 'V':
275 case 'W':
276 case 'X':
277 case 'Y':
278 case 'Z':
279 {
280 bc_lex_number(l, c);
281 break;
282 }
283
284 case ';':
285 {
286 l->t = BC_LEX_SCOLON;
287 break;
288 }
289
290 case '<':
291 {
292 #if BC_ENABLE_EXTRA_MATH
293 c2 = l->buf[l->i];
294
295 if (c2 == '<') {
296 l->i += 1;
297 bc_lex_assign(l, BC_LEX_OP_ASSIGN_LSHIFT, BC_LEX_OP_LSHIFT);
298 break;
299 }
300 #endif // BC_ENABLE_EXTRA_MATH
301 bc_lex_assign(l, BC_LEX_OP_REL_LE, BC_LEX_OP_REL_LT);
302 break;
303 }
304
305 case '=':
306 {
307 bc_lex_assign(l, BC_LEX_OP_REL_EQ, BC_LEX_OP_ASSIGN);
308 break;
309 }
310
311 case '>':
312 {
313 #if BC_ENABLE_EXTRA_MATH
314 c2 = l->buf[l->i];
315
316 if (c2 == '>') {
317 l->i += 1;
318 bc_lex_assign(l, BC_LEX_OP_ASSIGN_RSHIFT, BC_LEX_OP_RSHIFT);
319 break;
320 }
321 #endif // BC_ENABLE_EXTRA_MATH
322 bc_lex_assign(l, BC_LEX_OP_REL_GE, BC_LEX_OP_REL_GT);
323 break;
324 }
325
326 case '[':
327 case ']':
328 {
329 l->t = (BcLexType) (c - '[' + BC_LEX_LBRACKET);
330 break;
331 }
332
333 case '\\':
334 {
335 if (BC_NO_ERR(l->buf[l->i] == '\n')) {
336 l->i += 1;
337 l->t = BC_LEX_WHITESPACE;
338 }
339 else bc_lex_invalidChar(l, c);
340 break;
341 }
342
343 case '^':
344 {
345 bc_lex_assign(l, BC_LEX_OP_ASSIGN_POWER, BC_LEX_OP_POWER);
346 break;
347 }
348
349 case 'a':
350 case 'b':
351 case 'c':
352 case 'd':
353 case 'e':
354 case 'f':
355 case 'g':
356 case 'h':
357 case 'i':
358 case 'j':
359 case 'k':
360 case 'l':
361 case 'm':
362 case 'n':
363 case 'o':
364 case 'p':
365 case 'q':
366 case 'r':
367 case 's':
368 case 't':
369 case 'u':
370 case 'v':
371 case 'w':
372 case 'x':
373 case 'y':
374 case 'z':
375 {
376 bc_lex_identifier(l);
377 break;
378 }
379
380 case '{':
381 case '}':
382 {
383 l->t = (BcLexType) (c - '{' + BC_LEX_LBRACE);
384 break;
385 }
386
387 case '|':
388 {
389 c2 = l->buf[l->i];
390
391 if (BC_NO_ERR(c2 == '|')) {
392
393 bc_lex_verr(l, BC_ERR_POSIX_BOOL, "||");
394
395 l->i += 1;
396 l->t = BC_LEX_OP_BOOL_OR;
397 }
398 else bc_lex_invalidChar(l, c);
399
400 break;
401 }
402
403 default:
404 {
405 bc_lex_invalidChar(l, c);
406 }
407 }
408 }
409 #endif // BC_ENABLED
410