1 /*
2 * *****************************************************************************
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 *
6 * Copyright (c) 2018-2021 Gavin D. Howard and contributors.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
10 *
11 * * Redistributions of source code must retain the above copyright notice, this
12 * list of conditions and the following disclaimer.
13 *
14 * * Redistributions in binary form must reproduce the above copyright notice,
15 * this list of conditions and the following disclaimer in the documentation
16 * and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 *
30 * *****************************************************************************
31 *
32 * The parser for bc.
33 *
34 */
35
36 #if BC_ENABLED
37
38 #include <assert.h>
39 #include <stdbool.h>
40 #include <stdlib.h>
41 #include <string.h>
42
43 #include <setjmp.h>
44
45 #include <bc.h>
46 #include <num.h>
47 #include <vm.h>
48
49 // Before you embark on trying to understand this code, have you read the
50 // Development manual (manuals/development.md) and the comment in include/bc.h
51 // yet? No? Do that first. I'm serious.
52 //
53 // The reason is because this file holds the most sensitive and finicky code in
54 // the entire codebase. Even getting history to work on Windows was nothing
55 // compared to this. This is where dreams go to die, where dragons live, and
56 // from which Ken Thompson himself would flee.
57
58 static void bc_parse_else(BcParse *p);
59 static void bc_parse_stmt(BcParse *p);
60 static BcParseStatus bc_parse_expr_err(BcParse *p, uint8_t flags,
61 BcParseNext next);
62 static void bc_parse_expr_status(BcParse *p, uint8_t flags, BcParseNext next);
63
64 /**
65 * Returns true if an instruction could only have come from a "leaf" expression.
66 * For more on what leaf expressions are, read the comment for BC_PARSE_LEAF().
67 * @param t The instruction to test.
68 */
bc_parse_inst_isLeaf(BcInst t)69 static bool bc_parse_inst_isLeaf(BcInst t) {
70 return (t >= BC_INST_NUM && t <= BC_INST_MAXSCALE) ||
71 #if BC_ENABLE_EXTRA_MATH
72 t == BC_INST_TRUNC ||
73 #endif // BC_ENABLE_EXTRA_MATH
74 t <= BC_INST_DEC;
75 }
76
77 /**
78 * Returns true if the *previous* token was a delimiter. A delimiter is anything
79 * that can legally end a statement. In bc's case, it could be a newline, a
80 * semicolon, and a brace in certain cases.
81 * @param p The parser.
82 */
bc_parse_isDelimiter(const BcParse * p)83 static bool bc_parse_isDelimiter(const BcParse *p) {
84
85 BcLexType t = p->l.t;
86 bool good;
87
88 // If it's an obvious delimiter, say so.
89 if (BC_PARSE_DELIMITER(t)) return true;
90
91 good = false;
92
93 // If the current token is a keyword, then...beware. That means that we need
94 // to check for a "dangling" else, where there was no brace-delimited block
95 // on the previous if.
96 if (t == BC_LEX_KW_ELSE) {
97
98 size_t i;
99 uint16_t *fptr = NULL, flags = BC_PARSE_FLAG_ELSE;
100
101 // As long as going up the stack is valid for a dangling else, keep on.
102 for (i = 0; i < p->flags.len && BC_PARSE_BLOCK_STMT(flags); ++i) {
103
104 fptr = bc_vec_item_rev(&p->flags, i);
105 flags = *fptr;
106
107 // If we need a brace and don't have one, then we don't have a
108 // delimiter.
109 if ((flags & BC_PARSE_FLAG_BRACE) && p->l.last != BC_LEX_RBRACE)
110 return false;
111 }
112
113 // Oh, and we had also better have an if statement somewhere.
114 good = ((flags & BC_PARSE_FLAG_IF) != 0);
115 }
116 else if (t == BC_LEX_RBRACE) {
117
118 size_t i;
119
120 // Since we have a brace, we need to just check if a brace was needed.
121 for (i = 0; !good && i < p->flags.len; ++i) {
122 uint16_t *fptr = bc_vec_item_rev(&p->flags, i);
123 good = (((*fptr) & BC_PARSE_FLAG_BRACE) != 0);
124 }
125 }
126
127 return good;
128 }
129
130 /**
131 * Sets a previously defined exit label. What are labels? See the bc Parsing
132 * section of the Development manual (manuals/development.md).
133 * @param p The parser.
134 */
bc_parse_setLabel(BcParse * p)135 static void bc_parse_setLabel(BcParse *p) {
136
137 BcFunc *func = p->func;
138 BcInstPtr *ip = bc_vec_top(&p->exits);
139 size_t *label;
140
141 assert(func == bc_vec_item(&p->prog->fns, p->fidx));
142
143 // Set the preallocated label to the correct index.
144 label = bc_vec_item(&func->labels, ip->idx);
145 *label = func->code.len;
146
147 // Now, we don't need the exit label; it is done.
148 bc_vec_pop(&p->exits);
149 }
150
151 /**
152 * Creates a label and sets it to idx. If this is an exit label, then idx is
153 * actually invalid, but it doesn't matter because it will be fixed by
154 * bc_parse_setLabel() later.
155 * @param p The parser.
156 * @param idx The index of the label.
157 */
bc_parse_createLabel(BcParse * p,size_t idx)158 static void bc_parse_createLabel(BcParse *p, size_t idx) {
159 bc_vec_push(&p->func->labels, &idx);
160 }
161
162 /**
163 * Creates a conditional label. Unlike an exit label, this label is set at
164 * creation time because it comes *before* the code that will target it.
165 * @param p The parser.
166 * @param idx The index of the label.
167 */
bc_parse_createCondLabel(BcParse * p,size_t idx)168 static void bc_parse_createCondLabel(BcParse *p, size_t idx) {
169 bc_parse_createLabel(p, p->func->code.len);
170 bc_vec_push(&p->conds, &idx);
171 }
172
173 /*
174 * Creates an exit label to be filled in later by bc_parse_setLabel(). Also, why
175 * create a label to be filled in later? Because exit labels are meant to be
176 * targeted by code that comes *before* the label. Since we have to parse that
177 * code first, and don't know how long it will be, we need to just make sure to
178 * reserve a slot to be filled in later when we know.
179 *
180 * By the way, this uses BcInstPtr because it was convenient. The field idx
181 * holds the index, and the field func holds the loop boolean.
182 *
183 * @param p The parser.
184 * @param idx The index of the label's position.
185 * @param loop True if the exit label is for a loop or not.
186 */
bc_parse_createExitLabel(BcParse * p,size_t idx,bool loop)187 static void bc_parse_createExitLabel(BcParse *p, size_t idx, bool loop) {
188
189 BcInstPtr ip;
190
191 assert(p->func == bc_vec_item(&p->prog->fns, p->fidx));
192
193 ip.func = loop;
194 ip.idx = idx;
195 ip.len = 0;
196
197 bc_vec_push(&p->exits, &ip);
198 bc_parse_createLabel(p, SIZE_MAX);
199 }
200
201 /**
202 * Pops the correct operators off of the operator stack based on the current
203 * operator. This is because of the Shunting-Yard algorithm. Lower prec means
204 * higher precedence.
205 * @param p The parser.
206 * @param type The operator.
207 * @param start The previous start of the operator stack. For more
208 * information, see the bc Parsing section of the Development
209 * manual (manuals/development.md).
210 * @param nexprs A pointer to the current number of expressions that have not
211 * been consumed yet. This is an IN and OUT parameter.
212 */
bc_parse_operator(BcParse * p,BcLexType type,size_t start,size_t * nexprs)213 static void bc_parse_operator(BcParse *p, BcLexType type,
214 size_t start, size_t *nexprs)
215 {
216 BcLexType t;
217 uchar l, r = BC_PARSE_OP_PREC(type);
218 uchar left = BC_PARSE_OP_LEFT(type);
219
220 // While we haven't hit the stop point yet.
221 while (p->ops.len > start) {
222
223 // Get the top operator.
224 t = BC_PARSE_TOP_OP(p);
225
226 // If it's a right paren, we have reached the end of whatever expression
227 // this is no matter what.
228 if (t == BC_LEX_LPAREN) break;
229
230 // Break for precedence. Precedence operates differently on left and
231 // right associativity, by the way. A left associative operator that
232 // matches the current precedence should take priority, but a right
233 // associative operator should not.
234 l = BC_PARSE_OP_PREC(t);
235 if (l >= r && (l != r || !left)) break;
236
237 // Do the housekeeping. In particular, make sure to note that one
238 // expression was consumed. (Two were, but another was added.)
239 bc_parse_push(p, BC_PARSE_TOKEN_INST(t));
240 bc_vec_pop(&p->ops);
241 *nexprs -= !BC_PARSE_OP_PREFIX(t);
242 }
243
244 bc_vec_push(&p->ops, &type);
245 }
246
247 /**
248 * Parses a right paren. In the Shunting-Yard algorithm, it needs to be put on
249 * the operator stack. But before that, it needs to consume whatever operators
250 * there are until it hits a left paren.
251 * @param p The parser.
252 * @param nexprs A pointer to the current number of expressions that have not
253 * been consumed yet. This is an IN and OUT parameter.
254 */
bc_parse_rightParen(BcParse * p,size_t * nexprs)255 static void bc_parse_rightParen(BcParse *p, size_t *nexprs) {
256
257 BcLexType top;
258
259 // Consume operators until a left paren.
260 while ((top = BC_PARSE_TOP_OP(p)) != BC_LEX_LPAREN) {
261 bc_parse_push(p, BC_PARSE_TOKEN_INST(top));
262 bc_vec_pop(&p->ops);
263 *nexprs -= !BC_PARSE_OP_PREFIX(top);
264 }
265
266 // We need to pop the left paren as well.
267 bc_vec_pop(&p->ops);
268
269 // Oh, and we also want the next token.
270 bc_lex_next(&p->l);
271 }
272
273 /**
274 * Parses function arguments.
275 * @param p The parser.
276 * @param flags Flags restricting what kind of expressions the arguments can
277 * be.
278 */
bc_parse_args(BcParse * p,uint8_t flags)279 static void bc_parse_args(BcParse *p, uint8_t flags) {
280
281 bool comma = false;
282 size_t nargs;
283
284 bc_lex_next(&p->l);
285
286 // Print and comparison operators not allowed. Well, comparison operators
287 // only for POSIX. But we do allow arrays, and we *must* get a value.
288 flags &= ~(BC_PARSE_PRINT | BC_PARSE_REL);
289 flags |= (BC_PARSE_ARRAY | BC_PARSE_NEEDVAL);
290
291 // Count the arguments and parse them.
292 for (nargs = 0; p->l.t != BC_LEX_RPAREN; ++nargs) {
293
294 bc_parse_expr_status(p, flags, bc_parse_next_arg);
295
296 comma = (p->l.t == BC_LEX_COMMA);
297 if (comma) bc_lex_next(&p->l);
298 }
299
300 // An ending comma is FAIL.
301 if (BC_ERR(comma)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
302
303 // Now do the call with the number of arguments.
304 bc_parse_push(p, BC_INST_CALL);
305 bc_parse_pushIndex(p, nargs);
306 }
307
308 /**
309 * Parses a function call.
310 * @param p The parser.
311 * @param flags Flags restricting what kind of expressions the arguments can
312 * be.
313 */
bc_parse_call(BcParse * p,const char * name,uint8_t flags)314 static void bc_parse_call(BcParse *p, const char *name, uint8_t flags) {
315
316 size_t idx;
317
318 bc_parse_args(p, flags);
319
320 // We just assert this because bc_parse_args() should
321 // ensure that the next token is what it should be.
322 assert(p->l.t == BC_LEX_RPAREN);
323
324 // We cannot use bc_program_insertFunc() here
325 // because it will overwrite an existing function.
326 idx = bc_map_index(&p->prog->fn_map, name);
327
328 // The function does not exist yet. Create a space for it. If the user does
329 // not define it, it's a *runtime* error, not a parse error.
330 if (idx == BC_VEC_INVALID_IDX) {
331
332 BC_SIG_LOCK;
333
334 idx = bc_program_insertFunc(p->prog, name);
335
336 BC_SIG_UNLOCK;
337
338 assert(idx != BC_VEC_INVALID_IDX);
339
340 // Make sure that this pointer was not invalidated.
341 p->func = bc_vec_item(&p->prog->fns, p->fidx);
342 }
343 // The function exists, so set the right function index.
344 else idx = ((BcId*) bc_vec_item(&p->prog->fn_map, idx))->idx;
345
346 bc_parse_pushIndex(p, idx);
347
348 // Make sure to get the next token.
349 bc_lex_next(&p->l);
350 }
351
352 /**
353 * Parses a name/identifier-based expression. It could be a variable, an array
354 * element, an array itself (for function arguments), a function call, etc.
355 *
356 */
bc_parse_name(BcParse * p,BcInst * type,bool * can_assign,uint8_t flags)357 static void bc_parse_name(BcParse *p, BcInst *type,
358 bool *can_assign, uint8_t flags)
359 {
360 char *name;
361
362 BC_SIG_LOCK;
363
364 // We want a copy of the name since the lexer might overwrite its copy.
365 name = bc_vm_strdup(p->l.str.v);
366
367 BC_SETJMP_LOCKED(err);
368
369 BC_SIG_UNLOCK;
370
371 // We need the next token to see if it's just a variable or something more.
372 bc_lex_next(&p->l);
373
374 // Array element or array.
375 if (p->l.t == BC_LEX_LBRACKET) {
376
377 bc_lex_next(&p->l);
378
379 // Array only. This has to be a function parameter.
380 if (p->l.t == BC_LEX_RBRACKET) {
381
382 // Error if arrays are not allowed.
383 if (BC_ERR(!(flags & BC_PARSE_ARRAY)))
384 bc_parse_err(p, BC_ERR_PARSE_EXPR);
385
386 *type = BC_INST_ARRAY;
387 *can_assign = false;
388 }
389 else {
390
391 // If we are here, we have an array element. We need to set the
392 // expression parsing flags.
393 uint8_t flags2 = (flags & ~(BC_PARSE_PRINT | BC_PARSE_REL)) |
394 BC_PARSE_NEEDVAL;
395
396 bc_parse_expr_status(p, flags2, bc_parse_next_elem);
397
398 // The next token *must* be a right bracket.
399 if (BC_ERR(p->l.t != BC_LEX_RBRACKET))
400 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
401
402 *type = BC_INST_ARRAY_ELEM;
403 *can_assign = true;
404 }
405
406 // Make sure to get the next token.
407 bc_lex_next(&p->l);
408
409 // Push the instruction and the name of the identifier.
410 bc_parse_push(p, *type);
411 bc_parse_pushName(p, name, false);
412 }
413 else if (p->l.t == BC_LEX_LPAREN) {
414
415 // We are parsing a function call; error if not allowed.
416 if (BC_ERR(flags & BC_PARSE_NOCALL))
417 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
418
419 *type = BC_INST_CALL;
420 *can_assign = false;
421
422 bc_parse_call(p, name, flags);
423 }
424 else {
425 // Just a variable.
426 *type = BC_INST_VAR;
427 *can_assign = true;
428 bc_parse_push(p, BC_INST_VAR);
429 bc_parse_pushName(p, name, true);
430 }
431
432 err:
433 // Need to make sure to unallocate the name.
434 BC_SIG_MAYLOCK;
435 free(name);
436 BC_LONGJMP_CONT;
437 }
438
439 /**
440 * Parses a builtin function that takes no arguments. This includes read(),
441 * rand(), maxibase(), maxobase(), maxscale(), and maxrand().
442 * @param p The parser.
443 * @param inst The instruction corresponding to the builtin.
444 */
bc_parse_noArgBuiltin(BcParse * p,BcInst inst)445 static void bc_parse_noArgBuiltin(BcParse *p, BcInst inst) {
446
447 // Must have a left paren.
448 bc_lex_next(&p->l);
449 if (BC_ERR(p->l.t != BC_LEX_LPAREN)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
450
451 // Must have a right paren.
452 bc_lex_next(&p->l);
453 if ((p->l.t != BC_LEX_RPAREN)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
454
455 bc_parse_push(p, inst);
456
457 bc_lex_next(&p->l);
458 }
459
460 /**
461 * Parses a builtin function that takes 1 argument. This includes length(),
462 * sqrt(), abs(), scale(), and irand().
463 * @param p The parser.
464 * @param type The lex token.
465 * @param flags The expression parsing flags for parsing the argument.
466 * @param prev An out parameter; the previous instruction pointer.
467 */
bc_parse_builtin(BcParse * p,BcLexType type,uint8_t flags,BcInst * prev)468 static void bc_parse_builtin(BcParse *p, BcLexType type,
469 uint8_t flags, BcInst *prev)
470 {
471 // Must have a left paren.
472 bc_lex_next(&p->l);
473 if (BC_ERR(p->l.t != BC_LEX_LPAREN))
474 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
475
476 bc_lex_next(&p->l);
477
478 // Change the flags as needed for parsing the argument.
479 flags &= ~(BC_PARSE_PRINT | BC_PARSE_REL);
480 flags |= BC_PARSE_NEEDVAL;
481
482 // Since length can take arrays, we need to specially add that flag.
483 if (type == BC_LEX_KW_LENGTH) flags |= BC_PARSE_ARRAY;
484
485 bc_parse_expr_status(p, flags, bc_parse_next_rel);
486
487 // Must have a right paren.
488 if (BC_ERR(p->l.t != BC_LEX_RPAREN))
489 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
490
491 // Adjust previous based on the token and push it.
492 *prev = type - BC_LEX_KW_LENGTH + BC_INST_LENGTH;
493 bc_parse_push(p, *prev);
494
495 bc_lex_next(&p->l);
496 }
497
498 /**
499 * Parses a builtin function that takes 3 arguments. This includes modexp() and
500 * divmod().
501 */
bc_parse_builtin3(BcParse * p,BcLexType type,uint8_t flags,BcInst * prev)502 static void bc_parse_builtin3(BcParse *p, BcLexType type,
503 uint8_t flags, BcInst *prev)
504 {
505 assert(type == BC_LEX_KW_MODEXP || type == BC_LEX_KW_DIVMOD);
506
507 // Must have a left paren.
508 bc_lex_next(&p->l);
509 if (BC_ERR(p->l.t != BC_LEX_LPAREN))
510 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
511
512 bc_lex_next(&p->l);
513
514 // Change the flags as needed for parsing the argument.
515 flags &= ~(BC_PARSE_PRINT | BC_PARSE_REL);
516 flags |= BC_PARSE_NEEDVAL;
517
518 bc_parse_expr_status(p, flags, bc_parse_next_builtin);
519
520 // Must have a comma.
521 if (BC_ERR(p->l.t != BC_LEX_COMMA))
522 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
523
524 bc_lex_next(&p->l);
525
526 bc_parse_expr_status(p, flags, bc_parse_next_builtin);
527
528 // Must have a comma.
529 if (BC_ERR(p->l.t != BC_LEX_COMMA))
530 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
531
532 bc_lex_next(&p->l);
533
534 // If it is a divmod, parse an array name. Otherwise, just parse another
535 // expression.
536 if (type == BC_LEX_KW_DIVMOD) {
537
538 // Must have a name.
539 if (BC_ERR(p->l.t != BC_LEX_NAME)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
540
541 // This is safe because the next token should not overwrite the name.
542 bc_lex_next(&p->l);
543
544 // Must have a left bracket.
545 if (BC_ERR(p->l.t != BC_LEX_LBRACKET))
546 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
547
548 // This is safe because the next token should not overwrite the name.
549 bc_lex_next(&p->l);
550
551 // Must have a right bracket.
552 if (BC_ERR(p->l.t != BC_LEX_RBRACKET))
553 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
554
555 // This is safe because the next token should not overwrite the name.
556 bc_lex_next(&p->l);
557 }
558 else bc_parse_expr_status(p, flags, bc_parse_next_rel);
559
560 // Must have a right paren.
561 if (BC_ERR(p->l.t != BC_LEX_RPAREN))
562 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
563
564 // Adjust previous based on the token and push it.
565 *prev = type - BC_LEX_KW_MODEXP + BC_INST_MODEXP;
566 bc_parse_push(p, *prev);
567
568 // If we have divmod, we need to assign the modulus to the array element, so
569 // we need to push the instructions for doing so.
570 if (type == BC_LEX_KW_DIVMOD) {
571
572 // The zeroth element.
573 bc_parse_push(p, BC_INST_ZERO);
574 bc_parse_push(p, BC_INST_ARRAY_ELEM);
575
576 // Push the array.
577 bc_parse_pushName(p, p->l.str.v, false);
578
579 // Swap them and assign. After this, the top item on the stack should
580 // be the quotient.
581 bc_parse_push(p, BC_INST_SWAP);
582 bc_parse_push(p, BC_INST_ASSIGN_NO_VAL);
583 }
584
585 bc_lex_next(&p->l);
586 }
587
588 /**
589 * Parses the scale keyword. This is special because scale can be a value or a
590 * builtin function.
591 * @param p The parser.
592 * @param type An out parameter; the instruction for the parse.
593 * @param can_assign An out parameter; whether the expression can be assigned
594 * to.
595 * @param flags The expression parsing flags for parsing a scale() arg.
596 */
bc_parse_scale(BcParse * p,BcInst * type,bool * can_assign,uint8_t flags)597 static void bc_parse_scale(BcParse *p, BcInst *type,
598 bool *can_assign, uint8_t flags)
599 {
600 bc_lex_next(&p->l);
601
602 // Without the left paren, it's just the keyword.
603 if (p->l.t != BC_LEX_LPAREN) {
604
605 // Set, push, and return.
606 *type = BC_INST_SCALE;
607 *can_assign = true;
608 bc_parse_push(p, BC_INST_SCALE);
609 return;
610 }
611
612 // Handle the scale function.
613 *type = BC_INST_SCALE_FUNC;
614 *can_assign = false;
615
616 // Once again, adjust the flags.
617 flags &= ~(BC_PARSE_PRINT | BC_PARSE_REL);
618 flags |= BC_PARSE_NEEDVAL;
619
620 bc_lex_next(&p->l);
621
622 bc_parse_expr_status(p, flags, bc_parse_next_rel);
623
624 // Must have a right paren.
625 if (BC_ERR(p->l.t != BC_LEX_RPAREN))
626 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
627
628 bc_parse_push(p, BC_INST_SCALE_FUNC);
629
630 bc_lex_next(&p->l);
631 }
632
633 /**
634 * Parses and increment or decrement operator. This is a bit complex.
635 * @param p The parser.
636 * @param prev An out parameter; the previous instruction pointer.
637 * @param can_assign An out parameter; whether the expression can be assigned
638 * to.
639 * @param nexs An in/out parameter; the number of expressions in the
640 * parse tree that are not used.
641 * @param flags The expression parsing flags for parsing a scale() arg.
642 */
bc_parse_incdec(BcParse * p,BcInst * prev,bool * can_assign,size_t * nexs,uint8_t flags)643 static void bc_parse_incdec(BcParse *p, BcInst *prev, bool *can_assign,
644 size_t *nexs, uint8_t flags)
645 {
646 BcLexType type;
647 uchar inst;
648 BcInst etype = *prev;
649 BcLexType last = p->l.last;
650
651 assert(prev != NULL && can_assign != NULL);
652
653 // If we can't assign to the previous token, then we have an error.
654 if (BC_ERR(last == BC_LEX_OP_INC || last == BC_LEX_OP_DEC ||
655 last == BC_LEX_RPAREN))
656 {
657 bc_parse_err(p, BC_ERR_PARSE_ASSIGN);
658 }
659
660 // Is the previous instruction for a variable?
661 if (BC_PARSE_INST_VAR(etype)) {
662
663 // If so, this is a postfix operator.
664 if (!*can_assign) bc_parse_err(p, BC_ERR_PARSE_ASSIGN);
665
666 // Only postfix uses BC_INST_INC and BC_INST_DEC.
667 *prev = inst = BC_INST_INC + (p->l.t != BC_LEX_OP_INC);
668 bc_parse_push(p, inst);
669 bc_lex_next(&p->l);
670 *can_assign = false;
671 }
672 else {
673
674 // This is a prefix operator. In that case, we just convert it to
675 // an assignment instruction.
676 *prev = inst = BC_INST_ASSIGN_PLUS + (p->l.t != BC_LEX_OP_INC);
677
678 bc_lex_next(&p->l);
679 type = p->l.t;
680
681 // Because we parse the next part of the expression
682 // right here, we need to increment this.
683 *nexs = *nexs + 1;
684
685 // Is the next token a normal identifier?
686 if (type == BC_LEX_NAME) {
687
688 // Parse the name.
689 uint8_t flags2 = flags & ~BC_PARSE_ARRAY;
690 bc_parse_name(p, prev, can_assign, flags2 | BC_PARSE_NOCALL);
691 }
692 // Is the next token a global?
693 else if (type >= BC_LEX_KW_LAST && type <= BC_LEX_KW_OBASE) {
694 bc_parse_push(p, type - BC_LEX_KW_LAST + BC_INST_LAST);
695 bc_lex_next(&p->l);
696 }
697 // Is the next token specifically scale, which needs special treatment?
698 else if (BC_NO_ERR(type == BC_LEX_KW_SCALE)) {
699
700 bc_lex_next(&p->l);
701
702 // Check that scale() was not used.
703 if (BC_ERR(p->l.t == BC_LEX_LPAREN))
704 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
705 else bc_parse_push(p, BC_INST_SCALE);
706 }
707 // Now we know we have an error.
708 else bc_parse_err(p, BC_ERR_PARSE_TOKEN);
709
710 *can_assign = false;
711
712 bc_parse_push(p, BC_INST_ONE);
713 bc_parse_push(p, inst);
714 }
715 }
716
717 /**
718 * Parses the minus operator. This needs special treatment because it is either
719 * subtract or negation.
720 * @param p The parser.
721 * @param prev An in/out parameter; the previous instruction.
722 * @param ops_bgn The size of the operator stack.
723 * @param rparen True if the last token was a right paren.
724 * @param binlast True if the last token was a binary operator.
725 * @param nexprs An in/out parameter; the number of unused expressions.
726 */
bc_parse_minus(BcParse * p,BcInst * prev,size_t ops_bgn,bool rparen,bool binlast,size_t * nexprs)727 static void bc_parse_minus(BcParse *p, BcInst *prev, size_t ops_bgn,
728 bool rparen, bool binlast, size_t *nexprs)
729 {
730 BcLexType type;
731
732 bc_lex_next(&p->l);
733
734 // Figure out if it's a minus or a negation.
735 type = BC_PARSE_LEAF(*prev, binlast, rparen) ? BC_LEX_OP_MINUS : BC_LEX_NEG;
736 *prev = BC_PARSE_TOKEN_INST(type);
737
738 // We can just push onto the op stack because this is the largest
739 // precedence operator that gets pushed. Inc/dec does not.
740 if (type != BC_LEX_OP_MINUS) bc_vec_push(&p->ops, &type);
741 else bc_parse_operator(p, type, ops_bgn, nexprs);
742 }
743
744 /**
745 * Parses a string.
746 * @param p The parser.
747 * @param inst The instruction corresponding to how the string was found and
748 * how it should be printed.
749 */
bc_parse_str(BcParse * p,BcInst inst)750 static void bc_parse_str(BcParse *p, BcInst inst) {
751 bc_parse_addString(p);
752 bc_parse_push(p, inst);
753 bc_lex_next(&p->l);
754 }
755
756 /**
757 * Parses a print statement.
758 * @param p The parser.
759 */
bc_parse_print(BcParse * p,BcLexType type)760 static void bc_parse_print(BcParse *p, BcLexType type) {
761
762 BcLexType t;
763 bool comma = false;
764 BcInst inst = type == BC_LEX_KW_STREAM ?
765 BC_INST_PRINT_STREAM : BC_INST_PRINT_POP;
766
767 bc_lex_next(&p->l);
768
769 t = p->l.t;
770
771 // A print or stream statement has to have *something*.
772 if (bc_parse_isDelimiter(p)) bc_parse_err(p, BC_ERR_PARSE_PRINT);
773
774 do {
775
776 // If the token is a string, then print it with escapes.
777 // BC_INST_PRINT_POP plays that role for bc.
778 if (t == BC_LEX_STR) bc_parse_str(p, inst);
779 else {
780 // We have an actual number; parse and add a print instruction.
781 bc_parse_expr_status(p, BC_PARSE_NEEDVAL, bc_parse_next_print);
782 bc_parse_push(p, inst);
783 }
784
785 // Is the next token a comma?
786 comma = (p->l.t == BC_LEX_COMMA);
787
788 // Get the next token if we have a comma.
789 if (comma) bc_lex_next(&p->l);
790 else {
791
792 // If we don't have a comma, the statement needs to end.
793 if (!bc_parse_isDelimiter(p))
794 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
795 else break;
796 }
797
798 t = p->l.t;
799
800 } while (true);
801
802 // If we have a comma but no token, that's bad.
803 if (BC_ERR(comma)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
804 }
805
806 /**
807 * Parses a return statement.
808 * @param p The parser.
809 */
bc_parse_return(BcParse * p)810 static void bc_parse_return(BcParse *p) {
811
812 BcLexType t;
813 bool paren;
814 uchar inst = BC_INST_RET0;
815
816 // If we are not in a function, that's an error.
817 if (BC_ERR(!BC_PARSE_FUNC(p))) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
818
819 // If we are in a void function, make sure to return void.
820 if (p->func->voidfn) inst = BC_INST_RET_VOID;
821
822 bc_lex_next(&p->l);
823
824 t = p->l.t;
825 paren = (t == BC_LEX_LPAREN);
826
827 // An empty return statement just needs to push the selected instruction.
828 if (bc_parse_isDelimiter(p)) bc_parse_push(p, inst);
829 else {
830
831 BcParseStatus s;
832
833 // Need to parse the expression whose value will be returned.
834 s = bc_parse_expr_err(p, BC_PARSE_NEEDVAL, bc_parse_next_expr);
835
836 // If the expression was empty, just push the selected instruction.
837 if (s == BC_PARSE_STATUS_EMPTY_EXPR) {
838 bc_parse_push(p, inst);
839 bc_lex_next(&p->l);
840 }
841
842 // POSIX requires parentheses.
843 if (!paren || p->l.last != BC_LEX_RPAREN) {
844 bc_parse_err(p, BC_ERR_POSIX_RET);
845 }
846
847 // Void functions require an empty expression.
848 if (BC_ERR(p->func->voidfn)) {
849 if (s != BC_PARSE_STATUS_EMPTY_EXPR)
850 bc_parse_verr(p, BC_ERR_PARSE_RET_VOID, p->func->name);
851 }
852 // If we got here, we want to be sure to end the function with a real
853 // return instruction, just in case.
854 else bc_parse_push(p, BC_INST_RET);
855 }
856 }
857
858 /**
859 * Clears flags that indicate the end of an if statement and its block and sets
860 * the jump location.
861 * @param p The parser.
862 */
bc_parse_noElse(BcParse * p)863 static void bc_parse_noElse(BcParse *p) {
864 uint16_t *flag_ptr = BC_PARSE_TOP_FLAG_PTR(p);
865 *flag_ptr = (*flag_ptr & ~(BC_PARSE_FLAG_IF_END));
866 bc_parse_setLabel(p);
867 }
868
869 /**
870 * Ends (finishes parsing) the body of a control statement or a function.
871 * @param p The parser.
872 * @param brace True if the body was ended by a brace, false otherwise.
873 */
bc_parse_endBody(BcParse * p,bool brace)874 static void bc_parse_endBody(BcParse *p, bool brace) {
875
876 bool has_brace, new_else = false;
877
878 // We cannot be ending a body if there are no bodies to end.
879 if (BC_ERR(p->flags.len <= 1)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
880
881 if (brace) {
882
883 // The brace was already gotten; make sure that the caller did not lie.
884 // We check for the requirement of braces later.
885 assert(p->l.t == BC_LEX_RBRACE);
886
887 bc_lex_next(&p->l);
888
889 // If the next token is not a delimiter, that is a problem.
890 if (BC_ERR(!bc_parse_isDelimiter(p)))
891 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
892 }
893
894 // Do we have a brace flag?
895 has_brace = (BC_PARSE_BRACE(p) != 0);
896
897 do {
898 size_t len = p->flags.len;
899 bool loop;
900
901 // If we have a brace flag but not a brace, that's a problem.
902 if (has_brace && !brace) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
903
904 // Are we inside a loop?
905 loop = (BC_PARSE_LOOP_INNER(p) != 0);
906
907 // If we are ending a loop or an else...
908 if (loop || BC_PARSE_ELSE(p)) {
909
910 // Loops have condition labels that we have to take care of as well.
911 if (loop) {
912
913 size_t *label = bc_vec_top(&p->conds);
914
915 bc_parse_push(p, BC_INST_JUMP);
916 bc_parse_pushIndex(p, *label);
917
918 bc_vec_pop(&p->conds);
919 }
920
921 bc_parse_setLabel(p);
922 bc_vec_pop(&p->flags);
923 }
924 // If we are ending a function...
925 else if (BC_PARSE_FUNC_INNER(p)) {
926 BcInst inst = (p->func->voidfn ? BC_INST_RET_VOID : BC_INST_RET0);
927 bc_parse_push(p, inst);
928 bc_parse_updateFunc(p, BC_PROG_MAIN);
929 bc_vec_pop(&p->flags);
930 }
931 // If we have a brace flag and not an if statement, we can pop the top
932 // of the flags stack because they have been taken care of above.
933 else if (has_brace && !BC_PARSE_IF(p)) bc_vec_pop(&p->flags);
934
935 // This needs to be last to parse nested if's properly.
936 if (BC_PARSE_IF(p) && (len == p->flags.len || !BC_PARSE_BRACE(p))) {
937
938 // Eat newlines.
939 while (p->l.t == BC_LEX_NLINE) bc_lex_next(&p->l);
940
941 // *Now* we can pop the flags.
942 bc_vec_pop(&p->flags);
943
944 // If we are allowed non-POSIX stuff...
945 if (!BC_S) {
946
947 // Have we found yet another dangling else?
948 *(BC_PARSE_TOP_FLAG_PTR(p)) |= BC_PARSE_FLAG_IF_END;
949 new_else = (p->l.t == BC_LEX_KW_ELSE);
950
951 // Parse the else or end the if statement body.
952 if (new_else) bc_parse_else(p);
953 else if (!has_brace && (!BC_PARSE_IF_END(p) || brace))
954 bc_parse_noElse(p);
955 }
956 // POSIX requires us to do the bare minimum only.
957 else bc_parse_noElse(p);
958 }
959
960 // If these are both true, we have "used" the braces that we found.
961 if (brace && has_brace) brace = false;
962
963 // This condition was perhaps the hardest single part of the parser. If the
964 // flags stack does not have enough, we should stop. If we have a new else
965 // statement, we should stop. If we do have the end of an if statement and
966 // we have eaten the brace, we should stop. If we do have a brace flag, we
967 // should stop.
968 } while (p->flags.len > 1 && !new_else && (!BC_PARSE_IF_END(p) || brace) &&
969 !(has_brace = (BC_PARSE_BRACE(p) != 0)));
970
971 // If we have a brace, yet no body for it, that's a problem.
972 if (BC_ERR(p->flags.len == 1 && brace))
973 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
974 else if (brace && BC_PARSE_BRACE(p)) {
975
976 // If we make it here, we have a brace and a flag for it.
977 uint16_t flags = BC_PARSE_TOP_FLAG(p);
978
979 // This condition ensure that the *last* body is correctly finished by
980 // popping its flags.
981 if (!(flags & (BC_PARSE_FLAG_FUNC_INNER | BC_PARSE_FLAG_LOOP_INNER)) &&
982 !(flags & (BC_PARSE_FLAG_IF | BC_PARSE_FLAG_ELSE)) &&
983 !(flags & (BC_PARSE_FLAG_IF_END)))
984 {
985 bc_vec_pop(&p->flags);
986 }
987 }
988 }
989
990 /**
991 * Starts the body of a control statement or function.
992 * @param p The parser.
993 * @param flags The current flags (will be edited).
994 */
bc_parse_startBody(BcParse * p,uint16_t flags)995 static void bc_parse_startBody(BcParse *p, uint16_t flags) {
996 assert(flags);
997 flags |= (BC_PARSE_TOP_FLAG(p) & (BC_PARSE_FLAG_FUNC | BC_PARSE_FLAG_LOOP));
998 flags |= BC_PARSE_FLAG_BODY;
999 bc_vec_push(&p->flags, &flags);
1000 }
1001
1002 /**
1003 * Parses an if statement.
1004 * @param p The parser.
1005 */
bc_parse_if(BcParse * p)1006 static void bc_parse_if(BcParse *p) {
1007
1008 // We are allowed relational operators, and we must have a value.
1009 size_t idx;
1010 uint8_t flags = (BC_PARSE_REL | BC_PARSE_NEEDVAL);
1011
1012 // Get the left paren and barf if necessary.
1013 bc_lex_next(&p->l);
1014 if (BC_ERR(p->l.t != BC_LEX_LPAREN))
1015 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1016
1017 // Parse the condition.
1018 bc_lex_next(&p->l);
1019 bc_parse_expr_status(p, flags, bc_parse_next_rel);
1020
1021 // Must have a right paren.
1022 if (BC_ERR(p->l.t != BC_LEX_RPAREN))
1023 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1024
1025 bc_lex_next(&p->l);
1026
1027 // Insert the conditional jump instruction.
1028 bc_parse_push(p, BC_INST_JUMP_ZERO);
1029
1030 idx = p->func->labels.len;
1031
1032 // Push the index for the instruction and create an exit label for an else
1033 // statement.
1034 bc_parse_pushIndex(p, idx);
1035 bc_parse_createExitLabel(p, idx, false);
1036
1037 bc_parse_startBody(p, BC_PARSE_FLAG_IF);
1038 }
1039
1040 /**
1041 * Parses an else statement.
1042 * @param p The parser.
1043 */
bc_parse_else(BcParse * p)1044 static void bc_parse_else(BcParse *p) {
1045
1046 size_t idx = p->func->labels.len;
1047
1048 // We must be at the end of an if statement.
1049 if (BC_ERR(!BC_PARSE_IF_END(p)))
1050 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1051
1052 // Push an unconditional jump to make bc jump over the else statement if it
1053 // executed the original if statement.
1054 bc_parse_push(p, BC_INST_JUMP);
1055 bc_parse_pushIndex(p, idx);
1056
1057 // Clear the else stuff. Yes, that function is misnamed for its use here,
1058 // but deal with it.
1059 bc_parse_noElse(p);
1060
1061 // Create the exit label and parse the body.
1062 bc_parse_createExitLabel(p, idx, false);
1063 bc_parse_startBody(p, BC_PARSE_FLAG_ELSE);
1064
1065 bc_lex_next(&p->l);
1066 }
1067
1068 /**
1069 * Parse a while loop.
1070 * @param p The parser.
1071 */
bc_parse_while(BcParse * p)1072 static void bc_parse_while(BcParse *p) {
1073
1074 // We are allowed relational operators, and we must have a value.
1075 size_t idx;
1076 uint8_t flags = (BC_PARSE_REL | BC_PARSE_NEEDVAL);
1077
1078 // Get the left paren and barf if necessary.
1079 bc_lex_next(&p->l);
1080 if (BC_ERR(p->l.t != BC_LEX_LPAREN))
1081 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1082 bc_lex_next(&p->l);
1083
1084 // Create the labels. Loops need both.
1085 bc_parse_createCondLabel(p, p->func->labels.len);
1086 idx = p->func->labels.len;
1087 bc_parse_createExitLabel(p, idx, true);
1088
1089 // Parse the actual condition and barf on non-right paren.
1090 bc_parse_expr_status(p, flags, bc_parse_next_rel);
1091 if (BC_ERR(p->l.t != BC_LEX_RPAREN))
1092 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1093 bc_lex_next(&p->l);
1094
1095 // Now we can push the conditional jump and start the body.
1096 bc_parse_push(p, BC_INST_JUMP_ZERO);
1097 bc_parse_pushIndex(p, idx);
1098 bc_parse_startBody(p, BC_PARSE_FLAG_LOOP | BC_PARSE_FLAG_LOOP_INNER);
1099 }
1100
1101 /**
1102 * Parse a for loop.
1103 * @param p The parser.
1104 */
bc_parse_for(BcParse * p)1105 static void bc_parse_for(BcParse *p) {
1106
1107 size_t cond_idx, exit_idx, body_idx, update_idx;
1108
1109 // Barf on the missing left paren.
1110 bc_lex_next(&p->l);
1111 if (BC_ERR(p->l.t != BC_LEX_LPAREN))
1112 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1113 bc_lex_next(&p->l);
1114
1115 // The first statement can be empty, but if it is, check for error in POSIX
1116 // mode. Otherwise, parse it.
1117 if (p->l.t != BC_LEX_SCOLON)
1118 bc_parse_expr_status(p, 0, bc_parse_next_for);
1119 else bc_parse_err(p, BC_ERR_POSIX_FOR);
1120
1121 // Must have a semicolon.
1122 if (BC_ERR(p->l.t != BC_LEX_SCOLON)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1123 bc_lex_next(&p->l);
1124
1125 // These are indices for labels. There are so many of them because the end
1126 // of the loop must unconditionally jump to the update code. Then the update
1127 // code must unconditionally jump to the condition code. Then the condition
1128 // code must *conditionally* jump to the exit.
1129 cond_idx = p->func->labels.len;
1130 update_idx = cond_idx + 1;
1131 body_idx = update_idx + 1;
1132 exit_idx = body_idx + 1;
1133
1134 // This creates the condition label.
1135 bc_parse_createLabel(p, p->func->code.len);
1136
1137 // Parse an expression if it exists.
1138 if (p->l.t != BC_LEX_SCOLON) {
1139 uint8_t flags = (BC_PARSE_REL | BC_PARSE_NEEDVAL);
1140 bc_parse_expr_status(p, flags, bc_parse_next_for);
1141 }
1142 else {
1143
1144 // Set this for the next call to bc_parse_number because an empty
1145 // condition means that it is an infinite loop, so the condition must be
1146 // non-zero. This is safe to set because the current token is a
1147 // semicolon, which has no string requirement.
1148 bc_vec_string(&p->l.str, sizeof(bc_parse_one) - 1, bc_parse_one);
1149 bc_parse_number(p);
1150
1151 // An empty condition makes POSIX mad.
1152 bc_parse_err(p, BC_ERR_POSIX_FOR);
1153 }
1154
1155 // Must have a semicolon.
1156 if (BC_ERR(p->l.t != BC_LEX_SCOLON))
1157 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1158 bc_lex_next(&p->l);
1159
1160 // Now we can set up the conditional jump to the exit and an unconditional
1161 // jump to the body right after. The unconditional jump to the body is
1162 // because there is update code coming right after the condition, so we need
1163 // to skip it to get to the body.
1164 bc_parse_push(p, BC_INST_JUMP_ZERO);
1165 bc_parse_pushIndex(p, exit_idx);
1166 bc_parse_push(p, BC_INST_JUMP);
1167 bc_parse_pushIndex(p, body_idx);
1168
1169 // Now create the label for the update code.
1170 bc_parse_createCondLabel(p, update_idx);
1171
1172 // Parse if not empty, and if it is, let POSIX yell if necessary.
1173 if (p->l.t != BC_LEX_RPAREN)
1174 bc_parse_expr_status(p, 0, bc_parse_next_rel);
1175 else bc_parse_err(p, BC_ERR_POSIX_FOR);
1176
1177 // Must have a right paren.
1178 if (BC_ERR(p->l.t != BC_LEX_RPAREN))
1179 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1180
1181 // Set up a jump to the condition right after the update code.
1182 bc_parse_push(p, BC_INST_JUMP);
1183 bc_parse_pushIndex(p, cond_idx);
1184 bc_parse_createLabel(p, p->func->code.len);
1185
1186 // Create an exit label for the body and start the body.
1187 bc_parse_createExitLabel(p, exit_idx, true);
1188 bc_lex_next(&p->l);
1189 bc_parse_startBody(p, BC_PARSE_FLAG_LOOP | BC_PARSE_FLAG_LOOP_INNER);
1190 }
1191
1192 /**
1193 * Parse a statement or token that indicates a loop exit. This includes an
1194 * actual loop exit, the break keyword, or the continue keyword.
1195 * @param p The parser.
1196 * @param type The type of exit.
1197 */
bc_parse_loopExit(BcParse * p,BcLexType type)1198 static void bc_parse_loopExit(BcParse *p, BcLexType type) {
1199
1200 size_t i;
1201 BcInstPtr *ip;
1202
1203 // Must have a loop. If we don't, that's an error.
1204 if (BC_ERR(!BC_PARSE_LOOP(p))) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1205
1206 // If we have a break statement...
1207 if (type == BC_LEX_KW_BREAK) {
1208
1209 // If there are no exits, something went wrong somewhere.
1210 if (BC_ERR(!p->exits.len)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1211
1212 // Get the exit.
1213 i = p->exits.len - 1;
1214 ip = bc_vec_item(&p->exits, i);
1215
1216 // The condition !ip->func is true if the exit is not for a loop, so we
1217 // need to find the first actual loop exit.
1218 while (!ip->func && i < p->exits.len) ip = bc_vec_item(&p->exits, i--);
1219
1220 // Make sure everything is hunky dory.
1221 assert(ip != NULL && (i < p->exits.len || ip->func));
1222
1223 // Set the index for the exit.
1224 i = ip->idx;
1225 }
1226 // If we have a continue statement or just the loop end, jump to the
1227 // condition (or update for a foor loop).
1228 else i = *((size_t*) bc_vec_top(&p->conds));
1229
1230 // Add the unconditional jump.
1231 bc_parse_push(p, BC_INST_JUMP);
1232 bc_parse_pushIndex(p, i);
1233
1234 bc_lex_next(&p->l);
1235 }
1236
1237 /**
1238 * Parse a function (header).
1239 * @param p The parser.
1240 */
bc_parse_func(BcParse * p)1241 static void bc_parse_func(BcParse *p) {
1242
1243 bool comma = false, voidfn;
1244 uint16_t flags;
1245 size_t idx;
1246
1247 bc_lex_next(&p->l);
1248
1249 // Must have a name.
1250 if (BC_ERR(p->l.t != BC_LEX_NAME)) bc_parse_err(p, BC_ERR_PARSE_FUNC);
1251
1252 // If the name is "void", and POSIX is not on, mark as void.
1253 voidfn = (!BC_IS_POSIX && p->l.t == BC_LEX_NAME &&
1254 !strcmp(p->l.str.v, "void"));
1255
1256 // We can safely do this because the expected token should not overwrite the
1257 // function name.
1258 bc_lex_next(&p->l);
1259
1260 // If we *don't* have another name, then void is the name of the function.
1261 voidfn = (voidfn && p->l.t == BC_LEX_NAME);
1262
1263 // With a void function, allow POSIX to complain and get a new token.
1264 if (voidfn) {
1265
1266 bc_parse_err(p, BC_ERR_POSIX_VOID);
1267
1268 // We can safely do this because the expected token should not overwrite
1269 // the function name.
1270 bc_lex_next(&p->l);
1271 }
1272
1273 // Must have a left paren.
1274 if (BC_ERR(p->l.t != BC_LEX_LPAREN))
1275 bc_parse_err(p, BC_ERR_PARSE_FUNC);
1276
1277 // Make sure the functions map and vector are synchronized.
1278 assert(p->prog->fns.len == p->prog->fn_map.len);
1279
1280 // Must lock signals because vectors are changed, and the vector functions
1281 // expect signals to be locked.
1282 BC_SIG_LOCK;
1283
1284 // Insert the function by name into the map and vector.
1285 idx = bc_program_insertFunc(p->prog, p->l.str.v);
1286
1287 BC_SIG_UNLOCK;
1288
1289 // Make sure the insert worked.
1290 assert(idx);
1291
1292 // Update the function pointer and stuff in the parser and set its void.
1293 bc_parse_updateFunc(p, idx);
1294 p->func->voidfn = voidfn;
1295
1296 bc_lex_next(&p->l);
1297
1298 // While we do not have a right paren, we are still parsing arguments.
1299 while (p->l.t != BC_LEX_RPAREN) {
1300
1301 BcType t = BC_TYPE_VAR;
1302
1303 // If we have an asterisk, we are parsing a reference argument.
1304 if (p->l.t == BC_LEX_OP_MULTIPLY) {
1305
1306 t = BC_TYPE_REF;
1307 bc_lex_next(&p->l);
1308
1309 // Let POSIX complain if necessary.
1310 bc_parse_err(p, BC_ERR_POSIX_REF);
1311 }
1312
1313 // If we don't have a name, the argument will not have a name. Barf.
1314 if (BC_ERR(p->l.t != BC_LEX_NAME))
1315 bc_parse_err(p, BC_ERR_PARSE_FUNC);
1316
1317 // Increment the number of parameters.
1318 p->func->nparams += 1;
1319
1320 // Copy the string in the lexer so that we can use the lexer again.
1321 bc_vec_string(&p->buf, p->l.str.len, p->l.str.v);
1322
1323 bc_lex_next(&p->l);
1324
1325 // We are parsing an array parameter if this is true.
1326 if (p->l.t == BC_LEX_LBRACKET) {
1327
1328 // Set the array type, unless we are already parsing a reference.
1329 if (t == BC_TYPE_VAR) t = BC_TYPE_ARRAY;
1330
1331 bc_lex_next(&p->l);
1332
1333 // The brackets *must* be empty.
1334 if (BC_ERR(p->l.t != BC_LEX_RBRACKET))
1335 bc_parse_err(p, BC_ERR_PARSE_FUNC);
1336
1337 bc_lex_next(&p->l);
1338 }
1339 // If we did *not* get a bracket, but we are expecting a reference, we
1340 // have a problem.
1341 else if (BC_ERR(t == BC_TYPE_REF))
1342 bc_parse_verr(p, BC_ERR_PARSE_REF_VAR, p->buf.v);
1343
1344 // Test for comma and get the next token if it exists.
1345 comma = (p->l.t == BC_LEX_COMMA);
1346 if (comma) bc_lex_next(&p->l);
1347
1348 // Insert the parameter into the function.
1349 bc_func_insert(p->func, p->prog, p->buf.v, t, p->l.line);
1350 }
1351
1352 // If we have a comma, but no parameter, barf.
1353 if (BC_ERR(comma)) bc_parse_err(p, BC_ERR_PARSE_FUNC);
1354
1355 // Start the body.
1356 flags = BC_PARSE_FLAG_FUNC | BC_PARSE_FLAG_FUNC_INNER;
1357 bc_parse_startBody(p, flags);
1358
1359 bc_lex_next(&p->l);
1360
1361 // POSIX requires that a brace be on the same line as the function header.
1362 // If we don't have a brace, let POSIX throw an error.
1363 if (p->l.t != BC_LEX_LBRACE) bc_parse_err(p, BC_ERR_POSIX_BRACE);
1364 }
1365
1366 /**
1367 * Parse an auto list.
1368 * @param p The parser.
1369 */
bc_parse_auto(BcParse * p)1370 static void bc_parse_auto(BcParse *p) {
1371
1372 bool comma, one;
1373
1374 // Error if the auto keyword appeared in the wrong place.
1375 if (BC_ERR(!p->auto_part)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1376 bc_lex_next(&p->l);
1377
1378 p->auto_part = comma = false;
1379
1380 // We need at least one variable or array.
1381 one = (p->l.t == BC_LEX_NAME);
1382
1383 // While we have a variable or array.
1384 while (p->l.t == BC_LEX_NAME) {
1385
1386 BcType t;
1387
1388 // Copy the name from the lexer, so we can use it again.
1389 bc_vec_string(&p->buf, p->l.str.len - 1, p->l.str.v);
1390
1391 bc_lex_next(&p->l);
1392
1393 // If we are parsing an array...
1394 if (p->l.t == BC_LEX_LBRACKET) {
1395
1396 t = BC_TYPE_ARRAY;
1397
1398 bc_lex_next(&p->l);
1399
1400 // The brackets *must* be empty.
1401 if (BC_ERR(p->l.t != BC_LEX_RBRACKET))
1402 bc_parse_err(p, BC_ERR_PARSE_FUNC);
1403
1404 bc_lex_next(&p->l);
1405 }
1406 else t = BC_TYPE_VAR;
1407
1408 // Test for comma and get the next token if it exists.
1409 comma = (p->l.t == BC_LEX_COMMA);
1410 if (comma) bc_lex_next(&p->l);
1411
1412 // Insert the auto into the function.
1413 bc_func_insert(p->func, p->prog, p->buf.v, t, p->l.line);
1414 }
1415
1416 // If we have a comma, but no auto, barf.
1417 if (BC_ERR(comma)) bc_parse_err(p, BC_ERR_PARSE_FUNC);
1418
1419 // If we don't have any variables or arrays, barf.
1420 if (BC_ERR(!one)) bc_parse_err(p, BC_ERR_PARSE_NO_AUTO);
1421
1422 // The auto statement should be all that's in the statement.
1423 if (BC_ERR(!bc_parse_isDelimiter(p)))
1424 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1425 }
1426
1427 /**
1428 * Parses a body.
1429 * @param p The parser.
1430 * @param brace True if a brace was encountered, false otherwise.
1431 */
bc_parse_body(BcParse * p,bool brace)1432 static void bc_parse_body(BcParse *p, bool brace) {
1433
1434 uint16_t *flag_ptr = BC_PARSE_TOP_FLAG_PTR(p);
1435
1436 assert(flag_ptr != NULL);
1437 assert(p->flags.len >= 2);
1438
1439 // The body flag is for when we expect a body. We got a body, so clear the
1440 // flag.
1441 *flag_ptr &= ~(BC_PARSE_FLAG_BODY);
1442
1443 // If we are inside a function, that means we just barely entered it, and
1444 // we can expect an auto list.
1445 if (*flag_ptr & BC_PARSE_FLAG_FUNC_INNER) {
1446
1447 // We *must* have a brace in this case.
1448 if (BC_ERR(!brace)) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1449
1450 p->auto_part = (p->l.t != BC_LEX_KW_AUTO);
1451
1452 if (!p->auto_part) {
1453
1454 // Make sure this is true to not get a parse error.
1455 p->auto_part = true;
1456
1457 // Since we already have the auto keyword, parse.
1458 bc_parse_auto(p);
1459 }
1460
1461 // Eat a newline.
1462 if (p->l.t == BC_LEX_NLINE) bc_lex_next(&p->l);
1463 }
1464 else {
1465
1466 // This is the easy part.
1467 size_t len = p->flags.len;
1468
1469 assert(*flag_ptr);
1470
1471 // Parse a statement.
1472 bc_parse_stmt(p);
1473
1474 // This is a very important condition to get right. If there is no
1475 // brace, and no body flag, and the flags len hasn't shrunk, then we
1476 // have a body that was not delimited by braces, so we need to end it
1477 // now, after just one statement.
1478 if (!brace && !BC_PARSE_BODY(p) && len <= p->flags.len)
1479 bc_parse_endBody(p, false);
1480 }
1481 }
1482
1483 /**
1484 * Parses a statement. This is the entry point for just about everything, except
1485 * function definitions.
1486 * @param p The parser.
1487 */
bc_parse_stmt(BcParse * p)1488 static void bc_parse_stmt(BcParse *p) {
1489
1490 size_t len;
1491 uint16_t flags;
1492 BcLexType type = p->l.t;
1493
1494 // Eat newline.
1495 if (type == BC_LEX_NLINE) {
1496 bc_lex_next(&p->l);
1497 return;
1498 }
1499
1500 // Eat auto list.
1501 if (type == BC_LEX_KW_AUTO) {
1502 bc_parse_auto(p);
1503 return;
1504 }
1505
1506 // If we reach this point, no auto list is allowed.
1507 p->auto_part = false;
1508
1509 // Everything but an else needs to be taken care of here, but else is
1510 // special.
1511 if (type != BC_LEX_KW_ELSE) {
1512
1513 // After an if, no else found.
1514 if (BC_PARSE_IF_END(p)) {
1515
1516 // Clear the expectation for else, end body, and return. Returning
1517 // gives us a clean slate for parsing again.
1518 bc_parse_noElse(p);
1519 if (p->flags.len > 1 && !BC_PARSE_BRACE(p))
1520 bc_parse_endBody(p, false);
1521 return;
1522 }
1523 // With a left brace, we are parsing a body.
1524 else if (type == BC_LEX_LBRACE) {
1525
1526 // We need to start a body if we are not expecting one yet.
1527 if (!BC_PARSE_BODY(p)) {
1528 bc_parse_startBody(p, BC_PARSE_FLAG_BRACE);
1529 bc_lex_next(&p->l);
1530 }
1531 // If we *are* expecting a body, that body should get a brace. This
1532 // takes care of braces being on a different line than if and loop
1533 // headers.
1534 else {
1535 *(BC_PARSE_TOP_FLAG_PTR(p)) |= BC_PARSE_FLAG_BRACE;
1536 bc_lex_next(&p->l);
1537 bc_parse_body(p, true);
1538 }
1539
1540 // If we have reached this point, we need to return for a clean
1541 // slate.
1542 return;
1543 }
1544 // This happens when we are expecting a body and get a single statement,
1545 // i.e., a body with no braces surrounding it. Returns after for a clean
1546 // slate.
1547 else if (BC_PARSE_BODY(p) && !BC_PARSE_BRACE(p)) {
1548 bc_parse_body(p, false);
1549 return;
1550 }
1551 }
1552
1553 len = p->flags.len;
1554 flags = BC_PARSE_TOP_FLAG(p);
1555
1556 switch (type) {
1557
1558 // All of these are valid for expressions.
1559 case BC_LEX_OP_INC:
1560 case BC_LEX_OP_DEC:
1561 case BC_LEX_OP_MINUS:
1562 case BC_LEX_OP_BOOL_NOT:
1563 case BC_LEX_LPAREN:
1564 case BC_LEX_NAME:
1565 case BC_LEX_NUMBER:
1566 case BC_LEX_KW_IBASE:
1567 case BC_LEX_KW_LAST:
1568 case BC_LEX_KW_LENGTH:
1569 case BC_LEX_KW_OBASE:
1570 case BC_LEX_KW_SCALE:
1571 #if BC_ENABLE_EXTRA_MATH
1572 case BC_LEX_KW_SEED:
1573 #endif // BC_ENABLE_EXTRA_MATH
1574 case BC_LEX_KW_SQRT:
1575 case BC_LEX_KW_ABS:
1576 #if BC_ENABLE_EXTRA_MATH
1577 case BC_LEX_KW_IRAND:
1578 #endif // BC_ENABLE_EXTRA_MATH
1579 case BC_LEX_KW_ASCIIFY:
1580 case BC_LEX_KW_MODEXP:
1581 case BC_LEX_KW_DIVMOD:
1582 case BC_LEX_KW_READ:
1583 #if BC_ENABLE_EXTRA_MATH
1584 case BC_LEX_KW_RAND:
1585 #endif // BC_ENABLE_EXTRA_MATH
1586 case BC_LEX_KW_MAXIBASE:
1587 case BC_LEX_KW_MAXOBASE:
1588 case BC_LEX_KW_MAXSCALE:
1589 #if BC_ENABLE_EXTRA_MATH
1590 case BC_LEX_KW_MAXRAND:
1591 #endif // BC_ENABLE_EXTRA_MATH
1592 {
1593 bc_parse_expr_status(p, BC_PARSE_PRINT, bc_parse_next_expr);
1594 break;
1595 }
1596
1597 case BC_LEX_KW_ELSE:
1598 {
1599 bc_parse_else(p);
1600 break;
1601 }
1602
1603 // Just eat.
1604 case BC_LEX_SCOLON:
1605 {
1606 // Do nothing.
1607 break;
1608 }
1609
1610 case BC_LEX_RBRACE:
1611 {
1612 bc_parse_endBody(p, true);
1613 break;
1614 }
1615
1616 case BC_LEX_STR:
1617 {
1618 bc_parse_str(p, BC_INST_PRINT_STR);
1619 break;
1620 }
1621
1622 case BC_LEX_KW_BREAK:
1623 case BC_LEX_KW_CONTINUE:
1624 {
1625 bc_parse_loopExit(p, p->l.t);
1626 break;
1627 }
1628
1629 case BC_LEX_KW_FOR:
1630 {
1631 bc_parse_for(p);
1632 break;
1633 }
1634
1635 case BC_LEX_KW_HALT:
1636 {
1637 bc_parse_push(p, BC_INST_HALT);
1638 bc_lex_next(&p->l);
1639 break;
1640 }
1641
1642 case BC_LEX_KW_IF:
1643 {
1644 bc_parse_if(p);
1645 break;
1646 }
1647
1648 case BC_LEX_KW_LIMITS:
1649 {
1650 // `limits` is a compile-time command, so execute it right away.
1651 bc_vm_printf("BC_LONG_BIT = %lu\n", (ulong) BC_LONG_BIT);
1652 bc_vm_printf("BC_BASE_DIGS = %lu\n", (ulong) BC_BASE_DIGS);
1653 bc_vm_printf("BC_BASE_POW = %lu\n", (ulong) BC_BASE_POW);
1654 bc_vm_printf("BC_OVERFLOW_MAX = %lu\n", (ulong) BC_NUM_BIGDIG_MAX);
1655 bc_vm_printf("\n");
1656 bc_vm_printf("BC_BASE_MAX = %lu\n", BC_MAX_OBASE);
1657 bc_vm_printf("BC_DIM_MAX = %lu\n", BC_MAX_DIM);
1658 bc_vm_printf("BC_SCALE_MAX = %lu\n", BC_MAX_SCALE);
1659 bc_vm_printf("BC_STRING_MAX = %lu\n", BC_MAX_STRING);
1660 bc_vm_printf("BC_NAME_MAX = %lu\n", BC_MAX_NAME);
1661 bc_vm_printf("BC_NUM_MAX = %lu\n", BC_MAX_NUM);
1662 #if BC_ENABLE_EXTRA_MATH
1663 bc_vm_printf("BC_RAND_MAX = %lu\n", BC_MAX_RAND);
1664 #endif // BC_ENABLE_EXTRA_MATH
1665 bc_vm_printf("MAX Exponent = %lu\n", BC_MAX_EXP);
1666 bc_vm_printf("Number of vars = %lu\n", BC_MAX_VARS);
1667
1668 bc_lex_next(&p->l);
1669
1670 break;
1671 }
1672
1673 case BC_LEX_KW_STREAM:
1674 case BC_LEX_KW_PRINT:
1675 {
1676 bc_parse_print(p, type);
1677 break;
1678 }
1679
1680 case BC_LEX_KW_QUIT:
1681 {
1682 // Quit is a compile-time command. We don't exit directly, so the vm
1683 // can clean up.
1684 vm.status = BC_STATUS_QUIT;
1685 BC_JMP;
1686 break;
1687 }
1688
1689 case BC_LEX_KW_RETURN:
1690 {
1691 bc_parse_return(p);
1692 break;
1693 }
1694
1695 case BC_LEX_KW_WHILE:
1696 {
1697 bc_parse_while(p);
1698 break;
1699 }
1700
1701 default:
1702 {
1703 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1704 }
1705 }
1706
1707 // If the flags did not change, we expect a delimiter.
1708 if (len == p->flags.len && flags == BC_PARSE_TOP_FLAG(p)) {
1709 if (BC_ERR(!bc_parse_isDelimiter(p)))
1710 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1711 }
1712
1713 // Make sure semicolons are eaten.
1714 while (p->l.t == BC_LEX_SCOLON) bc_lex_next(&p->l);
1715 }
1716
bc_parse_parse(BcParse * p)1717 void bc_parse_parse(BcParse *p) {
1718
1719 assert(p);
1720
1721 BC_SETJMP(exit);
1722
1723 // We should not let an EOF get here unless some partial parse was not
1724 // completed, in which case, it's the user's fault.
1725 if (BC_ERR(p->l.t == BC_LEX_EOF)) bc_parse_err(p, BC_ERR_PARSE_EOF);
1726
1727 // Functions need special parsing.
1728 else if (p->l.t == BC_LEX_KW_DEFINE) {
1729 if (BC_ERR(BC_PARSE_NO_EXEC(p)))
1730 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1731 bc_parse_func(p);
1732 }
1733
1734 // Otherwise, parse a normal statement.
1735 else bc_parse_stmt(p);
1736
1737 exit:
1738
1739 BC_SIG_MAYLOCK;
1740
1741 // We need to reset on error.
1742 if (BC_ERR(((vm.status && vm.status != BC_STATUS_QUIT) || vm.sig)))
1743 bc_parse_reset(p);
1744
1745 BC_LONGJMP_CONT;
1746 }
1747
1748 /**
1749 * Parse an expression. This is the actual implementation of the Shunting-Yard
1750 * Algorithm.
1751 * @param p The parser.
1752 * @param flags The flags for what is valid in the expression.
1753 * @param next A set of tokens for what is valid *after* the expression.
1754 * @return A parse status. In some places, an empty expression is an
1755 * error, and sometimes, it is required. This allows this function
1756 * to tell the caller if the expression was empty and let the
1757 * caller handle it.
1758 */
bc_parse_expr_err(BcParse * p,uint8_t flags,BcParseNext next)1759 static BcParseStatus bc_parse_expr_err(BcParse *p, uint8_t flags,
1760 BcParseNext next)
1761 {
1762 BcInst prev = BC_INST_PRINT;
1763 uchar inst = BC_INST_INVALID;
1764 BcLexType top, t;
1765 size_t nexprs, ops_bgn;
1766 uint32_t i, nparens, nrelops;
1767 bool pfirst, rprn, done, get_token, assign, bin_last, incdec, can_assign;
1768
1769 // One of these *must* be true.
1770 assert(!(flags & BC_PARSE_PRINT) || !(flags & BC_PARSE_NEEDVAL));
1771
1772 // These are set very carefully. In fact, controlling the values of these
1773 // locals is the biggest part of making this work. ops_bgn especially is
1774 // important because it marks where the operator stack begins for *this*
1775 // invocation of this function. That's because bc_parse_expr_err() is
1776 // recursive (the Shunting-Yard Algorithm is most easily expressed
1777 // recursively when parsing subexpressions), and each invocation needs to
1778 // know where to stop.
1779 //
1780 // - nparens is the number of left parens without matches.
1781 // - nrelops is the number of relational operators that appear in the expr.
1782 // - nexprs is the number of unused expressions.
1783 // - rprn is a right paren encountered last.
1784 // - done means the expression has been fully parsed.
1785 // - get_token is true when a token is needed at the end of an iteration.
1786 // - assign is true when an assignment statement was parsed last.
1787 // - incdec is true when the previous operator was an inc or dec operator.
1788 // - can_assign is true when an assignemnt is valid.
1789 // - bin_last is true when the previous instruction was a binary operator.
1790 t = p->l.t;
1791 pfirst = (p->l.t == BC_LEX_LPAREN);
1792 nparens = nrelops = 0;
1793 nexprs = 0;
1794 ops_bgn = p->ops.len;
1795 rprn = done = get_token = assign = incdec = can_assign = false;
1796 bin_last = true;
1797
1798 // We want to eat newlines if newlines are not a valid ending token.
1799 // This is for spacing in things like for loop headers.
1800 if (!(flags & BC_PARSE_NOREAD)) {
1801 while ((t = p->l.t) == BC_LEX_NLINE) bc_lex_next(&p->l);
1802 }
1803
1804 // This is the Shunting-Yard algorithm loop.
1805 for (; !done && BC_PARSE_EXPR(t); t = p->l.t)
1806 {
1807 switch (t) {
1808
1809 case BC_LEX_OP_INC:
1810 case BC_LEX_OP_DEC:
1811 {
1812 // These operators can only be used with items that can be
1813 // assigned to.
1814 if (BC_ERR(incdec)) bc_parse_err(p, BC_ERR_PARSE_ASSIGN);
1815
1816 bc_parse_incdec(p, &prev, &can_assign, &nexprs, flags);
1817
1818 rprn = get_token = bin_last = false;
1819 incdec = true;
1820 flags &= ~(BC_PARSE_ARRAY);
1821
1822 break;
1823 }
1824
1825 #if BC_ENABLE_EXTRA_MATH
1826 case BC_LEX_OP_TRUNC:
1827 {
1828 // The previous token must have been a leaf expression, or the
1829 // operator is in the wrong place.
1830 if (BC_ERR(!BC_PARSE_LEAF(prev, bin_last, rprn)))
1831 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
1832
1833 // I can just add the instruction because
1834 // negative will already be taken care of.
1835 bc_parse_push(p, BC_INST_TRUNC);
1836
1837 rprn = can_assign = incdec = false;
1838 get_token = true;
1839 flags &= ~(BC_PARSE_ARRAY);
1840
1841 break;
1842 }
1843 #endif // BC_ENABLE_EXTRA_MATH
1844
1845 case BC_LEX_OP_MINUS:
1846 {
1847 bc_parse_minus(p, &prev, ops_bgn, rprn, bin_last, &nexprs);
1848
1849 rprn = get_token = can_assign = false;
1850
1851 // This is true if it was a binary operator last.
1852 bin_last = (prev == BC_INST_MINUS);
1853 if (bin_last) incdec = false;
1854
1855 flags &= ~(BC_PARSE_ARRAY);
1856
1857 break;
1858 }
1859
1860 // All of this group, including the fallthrough, is to parse binary
1861 // operators.
1862 case BC_LEX_OP_ASSIGN_POWER:
1863 case BC_LEX_OP_ASSIGN_MULTIPLY:
1864 case BC_LEX_OP_ASSIGN_DIVIDE:
1865 case BC_LEX_OP_ASSIGN_MODULUS:
1866 case BC_LEX_OP_ASSIGN_PLUS:
1867 case BC_LEX_OP_ASSIGN_MINUS:
1868 #if BC_ENABLE_EXTRA_MATH
1869 case BC_LEX_OP_ASSIGN_PLACES:
1870 case BC_LEX_OP_ASSIGN_LSHIFT:
1871 case BC_LEX_OP_ASSIGN_RSHIFT:
1872 #endif // BC_ENABLE_EXTRA_MATH
1873 case BC_LEX_OP_ASSIGN:
1874 {
1875 // We need to make sure the assignment is valid.
1876 if (!BC_PARSE_INST_VAR(prev))
1877 bc_parse_err(p, BC_ERR_PARSE_ASSIGN);
1878 }
1879 // Fallthrough.
1880 BC_FALLTHROUGH
1881
1882 case BC_LEX_OP_POWER:
1883 case BC_LEX_OP_MULTIPLY:
1884 case BC_LEX_OP_DIVIDE:
1885 case BC_LEX_OP_MODULUS:
1886 case BC_LEX_OP_PLUS:
1887 #if BC_ENABLE_EXTRA_MATH
1888 case BC_LEX_OP_PLACES:
1889 case BC_LEX_OP_LSHIFT:
1890 case BC_LEX_OP_RSHIFT:
1891 #endif // BC_ENABLE_EXTRA_MATH
1892 case BC_LEX_OP_REL_EQ:
1893 case BC_LEX_OP_REL_LE:
1894 case BC_LEX_OP_REL_GE:
1895 case BC_LEX_OP_REL_NE:
1896 case BC_LEX_OP_REL_LT:
1897 case BC_LEX_OP_REL_GT:
1898 case BC_LEX_OP_BOOL_NOT:
1899 case BC_LEX_OP_BOOL_OR:
1900 case BC_LEX_OP_BOOL_AND:
1901 {
1902 // This is true if the operator if the token is a prefix
1903 // operator. This is only for boolean not.
1904 if (BC_PARSE_OP_PREFIX(t)) {
1905
1906 // Prefix operators are only allowed after binary operators
1907 // or prefix operators.
1908 if (BC_ERR(!bin_last && !BC_PARSE_OP_PREFIX(p->l.last)))
1909 bc_parse_err(p, BC_ERR_PARSE_EXPR);
1910 }
1911 // If we execute the else, that means we have a binary operator.
1912 // If the previous operator was a prefix or a binary operator,
1913 // then a binary operator is not allowed.
1914 else if (BC_ERR(BC_PARSE_PREV_PREFIX(prev) || bin_last))
1915 bc_parse_err(p, BC_ERR_PARSE_EXPR);
1916
1917 nrelops += (t >= BC_LEX_OP_REL_EQ && t <= BC_LEX_OP_REL_GT);
1918 prev = BC_PARSE_TOKEN_INST(t);
1919
1920 bc_parse_operator(p, t, ops_bgn, &nexprs);
1921
1922 rprn = incdec = can_assign = false;
1923 get_token = true;
1924 bin_last = !BC_PARSE_OP_PREFIX(t);
1925 flags &= ~(BC_PARSE_ARRAY);
1926
1927 break;
1928 }
1929
1930 case BC_LEX_LPAREN:
1931 {
1932 // A left paren is *not* allowed right after a leaf expr.
1933 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn)))
1934 bc_parse_err(p, BC_ERR_PARSE_EXPR);
1935
1936 nparens += 1;
1937 rprn = incdec = can_assign = false;
1938 get_token = true;
1939
1940 // Push the paren onto the operator stack.
1941 bc_vec_push(&p->ops, &t);
1942
1943 break;
1944 }
1945
1946 case BC_LEX_RPAREN:
1947 {
1948 // This needs to be a status. The error is handled in
1949 // bc_parse_expr_status().
1950 if (BC_ERR(p->l.last == BC_LEX_LPAREN))
1951 return BC_PARSE_STATUS_EMPTY_EXPR;
1952
1953 // The right paren must not come after a prefix or binary
1954 // operator.
1955 if (BC_ERR(bin_last || BC_PARSE_PREV_PREFIX(prev)))
1956 bc_parse_err(p, BC_ERR_PARSE_EXPR);
1957
1958 // If there are no parens left, we are done, but we need another
1959 // token.
1960 if (!nparens) {
1961 done = true;
1962 get_token = false;
1963 break;
1964 }
1965
1966 nparens -= 1;
1967 rprn = true;
1968 get_token = bin_last = incdec = false;
1969
1970 bc_parse_rightParen(p, &nexprs);
1971
1972 break;
1973 }
1974
1975 case BC_LEX_STR:
1976 {
1977 // POSIX only allows strings alone.
1978 if (BC_IS_POSIX) bc_parse_err(p, BC_ERR_POSIX_EXPR_STRING);
1979
1980 // A string is a leaf and cannot come right after a leaf.
1981 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn)))
1982 bc_parse_err(p, BC_ERR_PARSE_EXPR);
1983
1984 bc_parse_addString(p);
1985
1986 get_token = true;
1987 bin_last = rprn = false;
1988 nexprs += 1;
1989
1990 break;
1991 }
1992
1993 case BC_LEX_NAME:
1994 {
1995 // A name is a leaf and cannot come right after a leaf.
1996 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn)))
1997 bc_parse_err(p, BC_ERR_PARSE_EXPR);
1998
1999 get_token = bin_last = false;
2000
2001 bc_parse_name(p, &prev, &can_assign, flags & ~BC_PARSE_NOCALL);
2002
2003 rprn = (prev == BC_INST_CALL);
2004 nexprs += 1;
2005 flags &= ~(BC_PARSE_ARRAY);
2006
2007 break;
2008 }
2009
2010 case BC_LEX_NUMBER:
2011 {
2012 // A number is a leaf and cannot come right after a leaf.
2013 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn)))
2014 bc_parse_err(p, BC_ERR_PARSE_EXPR);
2015
2016 // The number instruction is pushed in here.
2017 bc_parse_number(p);
2018
2019 nexprs += 1;
2020 prev = BC_INST_NUM;
2021 get_token = true;
2022 rprn = bin_last = can_assign = false;
2023 flags &= ~(BC_PARSE_ARRAY);
2024
2025 break;
2026 }
2027
2028 case BC_LEX_KW_IBASE:
2029 case BC_LEX_KW_LAST:
2030 case BC_LEX_KW_OBASE:
2031 #if BC_ENABLE_EXTRA_MATH
2032 case BC_LEX_KW_SEED:
2033 #endif // BC_ENABLE_EXTRA_MATH
2034 {
2035 // All of these are leaves and cannot come right after a leaf.
2036 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn)))
2037 bc_parse_err(p, BC_ERR_PARSE_EXPR);
2038
2039 prev = t - BC_LEX_KW_LAST + BC_INST_LAST;
2040 bc_parse_push(p, prev);
2041
2042 get_token = can_assign = true;
2043 rprn = bin_last = false;
2044 nexprs += 1;
2045 flags &= ~(BC_PARSE_ARRAY);
2046
2047 break;
2048 }
2049
2050 case BC_LEX_KW_LENGTH:
2051 case BC_LEX_KW_SQRT:
2052 case BC_LEX_KW_ABS:
2053 #if BC_ENABLE_EXTRA_MATH
2054 case BC_LEX_KW_IRAND:
2055 #endif // BC_ENABLE_EXTRA_MATH
2056 case BC_LEX_KW_ASCIIFY:
2057 {
2058 // All of these are leaves and cannot come right after a leaf.
2059 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn)))
2060 bc_parse_err(p, BC_ERR_PARSE_EXPR);
2061
2062 bc_parse_builtin(p, t, flags, &prev);
2063
2064 rprn = get_token = bin_last = incdec = can_assign = false;
2065 nexprs += 1;
2066 flags &= ~(BC_PARSE_ARRAY);
2067
2068 break;
2069 }
2070
2071 case BC_LEX_KW_READ:
2072 #if BC_ENABLE_EXTRA_MATH
2073 case BC_LEX_KW_RAND:
2074 #endif // BC_ENABLE_EXTRA_MATH
2075 case BC_LEX_KW_MAXIBASE:
2076 case BC_LEX_KW_MAXOBASE:
2077 case BC_LEX_KW_MAXSCALE:
2078 #if BC_ENABLE_EXTRA_MATH
2079 case BC_LEX_KW_MAXRAND:
2080 #endif // BC_ENABLE_EXTRA_MATH
2081 {
2082 // All of these are leaves and cannot come right after a leaf.
2083 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn)))
2084 bc_parse_err(p, BC_ERR_PARSE_EXPR);
2085
2086 // Error if we have read and it's not allowed.
2087 else if (t == BC_LEX_KW_READ && BC_ERR(flags & BC_PARSE_NOREAD))
2088 bc_parse_err(p, BC_ERR_EXEC_REC_READ);
2089
2090 prev = t - BC_LEX_KW_READ + BC_INST_READ;
2091 bc_parse_noArgBuiltin(p, prev);
2092
2093 rprn = get_token = bin_last = incdec = can_assign = false;
2094 nexprs += 1;
2095 flags &= ~(BC_PARSE_ARRAY);
2096
2097 break;
2098 }
2099
2100 case BC_LEX_KW_SCALE:
2101 {
2102 // This is a leaf and cannot come right after a leaf.
2103 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn)))
2104 bc_parse_err(p, BC_ERR_PARSE_EXPR);
2105
2106 // Scale needs special work because it can be a variable *or* a
2107 // function.
2108 bc_parse_scale(p, &prev, &can_assign, flags);
2109
2110 rprn = get_token = bin_last = false;
2111 nexprs += 1;
2112 flags &= ~(BC_PARSE_ARRAY);
2113
2114 break;
2115 }
2116
2117 case BC_LEX_KW_MODEXP:
2118 case BC_LEX_KW_DIVMOD:
2119 {
2120 // This is a leaf and cannot come right after a leaf.
2121 if (BC_ERR(BC_PARSE_LEAF(prev, bin_last, rprn)))
2122 bc_parse_err(p, BC_ERR_PARSE_EXPR);
2123
2124 bc_parse_builtin3(p, t, flags, &prev);
2125
2126 rprn = get_token = bin_last = incdec = can_assign = false;
2127 nexprs += 1;
2128 flags &= ~(BC_PARSE_ARRAY);
2129
2130 break;
2131 }
2132
2133 default:
2134 {
2135 #ifndef NDEBUG
2136 // We should never get here, even in debug builds.
2137 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
2138 break;
2139 #endif // NDEBUG
2140 }
2141 }
2142
2143 if (get_token) bc_lex_next(&p->l);
2144 }
2145
2146 // Now that we have parsed the expression, we need to empty the operator
2147 // stack.
2148 while (p->ops.len > ops_bgn) {
2149
2150 top = BC_PARSE_TOP_OP(p);
2151 assign = top >= BC_LEX_OP_ASSIGN_POWER && top <= BC_LEX_OP_ASSIGN;
2152
2153 // There should not be *any* parens on the stack anymore.
2154 if (BC_ERR(top == BC_LEX_LPAREN || top == BC_LEX_RPAREN))
2155 bc_parse_err(p, BC_ERR_PARSE_EXPR);
2156
2157 bc_parse_push(p, BC_PARSE_TOKEN_INST(top));
2158
2159 // Adjust the number of unused expressions.
2160 nexprs -= !BC_PARSE_OP_PREFIX(top);
2161 bc_vec_pop(&p->ops);
2162
2163 incdec = false;
2164 }
2165
2166 // There must be only one expression at the top.
2167 if (BC_ERR(nexprs != 1)) bc_parse_err(p, BC_ERR_PARSE_EXPR);
2168
2169 // Check that the next token is correct.
2170 for (i = 0; i < next.len && t != next.tokens[i]; ++i);
2171 if (BC_ERR(i == next.len && !bc_parse_isDelimiter(p)))
2172 bc_parse_err(p, BC_ERR_PARSE_EXPR);
2173
2174 // Check that POSIX would be happy with the number of relational operators.
2175 if (!(flags & BC_PARSE_REL) && nrelops)
2176 bc_parse_err(p, BC_ERR_POSIX_REL_POS);
2177 else if ((flags & BC_PARSE_REL) && nrelops > 1)
2178 bc_parse_err(p, BC_ERR_POSIX_MULTIREL);
2179
2180 // If this is true, then we might be in a situation where we don't print.
2181 // We would want to have the increment/decrement operator not make an extra
2182 // copy if it's not necessary.
2183 if (!(flags & BC_PARSE_NEEDVAL) && !pfirst) {
2184
2185 // We have the easy case if the last operator was an assignment
2186 // operator.
2187 if (assign) {
2188 inst = *((uchar*) bc_vec_top(&p->func->code));
2189 inst += (BC_INST_ASSIGN_POWER_NO_VAL - BC_INST_ASSIGN_POWER);
2190 incdec = false;
2191 }
2192 // If we have an inc/dec operator and we are *not* printing, implement
2193 // the optimization to get rid of the extra copy.
2194 else if (incdec && !(flags & BC_PARSE_PRINT)) {
2195 inst = *((uchar*) bc_vec_top(&p->func->code));
2196 incdec = (inst <= BC_INST_DEC);
2197 inst = BC_INST_ASSIGN_PLUS_NO_VAL + (inst != BC_INST_INC &&
2198 inst != BC_INST_ASSIGN_PLUS);
2199 }
2200
2201 // This condition allows us to change the previous assignment
2202 // instruction (which does a copy) for a NO_VAL version, which does not.
2203 // This condition is set if either of the above if statements ends up
2204 // being true.
2205 if (inst >= BC_INST_ASSIGN_POWER_NO_VAL &&
2206 inst <= BC_INST_ASSIGN_NO_VAL)
2207 {
2208 // Pop the previous assignment instruction and push a new one.
2209 // Inc/dec needs the extra instruction because it is now a binary
2210 // operator and needs a second operand.
2211 bc_vec_pop(&p->func->code);
2212 if (incdec) bc_parse_push(p, BC_INST_ONE);
2213 bc_parse_push(p, inst);
2214 }
2215 }
2216
2217 // If we might have to print...
2218 if ((flags & BC_PARSE_PRINT)) {
2219
2220 // With a paren first or the last operator not being an assignment, we
2221 // *do* want to print.
2222 if (pfirst || !assign) bc_parse_push(p, BC_INST_PRINT);
2223 }
2224 // We need to make sure to push a pop instruction for assignment statements
2225 // that will not print. The print will pop, but without it, we need to pop.
2226 else if (!(flags & BC_PARSE_NEEDVAL) &&
2227 (inst < BC_INST_ASSIGN_POWER_NO_VAL ||
2228 inst > BC_INST_ASSIGN_NO_VAL))
2229 {
2230 bc_parse_push(p, BC_INST_POP);
2231 }
2232
2233 // We want to eat newlines if newlines are not a valid ending token.
2234 // This is for spacing in things like for loop headers.
2235 //
2236 // Yes, this is one case where I reuse a variable for a different purpose;
2237 // in this case, incdec being true now means that newlines are not valid.
2238 for (incdec = true, i = 0; i < next.len && incdec; ++i)
2239 incdec = (next.tokens[i] != BC_LEX_NLINE);
2240 if (incdec) {
2241 while (p->l.t == BC_LEX_NLINE) bc_lex_next(&p->l);
2242 }
2243
2244 return BC_PARSE_STATUS_SUCCESS;
2245 }
2246
2247 /**
2248 * Parses an expression with bc_parse_expr_err(), but throws an error if it gets
2249 * an empty expression.
2250 * @param p The parser.
2251 * @param flags The flags for what is valid in the expression.
2252 * @param next A set of tokens for what is valid *after* the expression.
2253 */
bc_parse_expr_status(BcParse * p,uint8_t flags,BcParseNext next)2254 static void bc_parse_expr_status(BcParse *p, uint8_t flags, BcParseNext next) {
2255
2256 BcParseStatus s = bc_parse_expr_err(p, flags, next);
2257
2258 if (BC_ERR(s == BC_PARSE_STATUS_EMPTY_EXPR))
2259 bc_parse_err(p, BC_ERR_PARSE_EMPTY_EXPR);
2260 }
2261
bc_parse_expr(BcParse * p,uint8_t flags)2262 void bc_parse_expr(BcParse *p, uint8_t flags) {
2263 assert(p);
2264 bc_parse_expr_status(p, flags, bc_parse_next_read);
2265 }
2266 #endif // BC_ENABLED
2267