1 /* $OpenBSD: syn.c,v 1.30 2015/09/01 13:12:31 tedu Exp $ */
2
3 /*-
4 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009,
5 * 2011, 2012, 2013, 2014, 2015, 2016, 2017,
6 * 2018, 2020
7 * mirabilos <m@mirbsd.org>
8 *
9 * Provided that these terms and disclaimer and all copyright notices
10 * are retained or reproduced in an accompanying document, permission
11 * is granted to deal in this work without restriction, including un-
12 * limited rights to use, publicly perform, distribute, sell, modify,
13 * merge, give away, or sublicence.
14 *
15 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
16 * the utmost extent permitted by applicable law, neither express nor
17 * implied; without malicious intent or gross negligence. In no event
18 * may a licensor, author or contributor be held liable for indirect,
19 * direct, other damage, loss, or other issues arising in any way out
20 * of dealing in the work, even if advised of the possibility of such
21 * damage or existence of a defect, except proven that it results out
22 * of said person's immediate fault when using the work as intended.
23 */
24
25 #include "sh.h"
26
27 __RCSID("$MirOS: src/bin/mksh/syn.c,v 1.129 2020/10/31 01:21:58 tg Exp $");
28
29 struct nesting_state {
30 int start_token; /* token than began nesting (eg, FOR) */
31 int start_line; /* line nesting began on */
32 };
33
34 struct yyrecursive_state {
35 struct ioword *old_heres[HERES];
36 struct yyrecursive_state *next;
37 struct ioword **old_herep;
38 int old_symbol;
39 unsigned int old_nesting_type;
40 bool old_reject;
41 };
42
43 static void yyparse(bool);
44 static struct op *pipeline(int, int);
45 static struct op *andor(int);
46 static struct op *c_list(int, bool);
47 static struct ioword *synio(int);
48 static struct op *nested(int, int, int, int);
49 static struct op *get_command(int, int);
50 static struct op *dogroup(int);
51 static struct op *thenpart(int);
52 static struct op *elsepart(int);
53 static struct op *caselist(int);
54 static struct op *casepart(int, int);
55 static struct op *function_body(char *, int, bool);
56 static char **wordlist(int);
57 static struct op *block(int, struct op *, struct op *);
58 static struct op *newtp(int);
59 static void syntaxerr(const char *) MKSH_A_NORETURN;
60 static void nesting_push(struct nesting_state *, int);
61 static void nesting_pop(struct nesting_state *);
62 static int inalias(struct source *) MKSH_A_PURE;
63 static Test_op dbtestp_isa(Test_env *, Test_meta);
64 static const char *dbtestp_getopnd(Test_env *, Test_op, bool);
65 static int dbtestp_eval(Test_env *, Test_op, const char *,
66 const char *, bool);
67 static void dbtestp_error(Test_env *, int, const char *) MKSH_A_NORETURN;
68
69 static struct op *outtree; /* yyparse output */
70 static struct nesting_state nesting; /* \n changed to ; */
71
72 static bool reject; /* token(cf) gets symbol again */
73 static int symbol; /* yylex value */
74
75 #define REJECT (reject = true)
76 #define ACCEPT (reject = false)
77 #define token(cf) ((reject) ? (ACCEPT, symbol) : (symbol = yylex(cf)))
78 #define tpeek(cf) ((reject) ? (symbol) : (REJECT, symbol = yylex(cf)))
79 #define musthave(c,cf) do { \
80 if ((unsigned int)token(cf) != (unsigned int)(c)) \
81 syntaxerr(NULL); \
82 } while (/* CONSTCOND */ 0)
83
84 static const char Tcbrace[] = "}";
85 static const char Tesac[] = "esac";
86
87 static void
yyparse(bool doalias)88 yyparse(bool doalias)
89 {
90 int c;
91
92 ACCEPT;
93
94 outtree = c_list(doalias ? ALIAS : 0, source->type == SSTRING);
95 c = tpeek(0);
96 if (c == 0 && !outtree)
97 outtree = newtp(TEOF);
98 else if (!cinttype(c, C_LF | C_NUL))
99 syntaxerr(NULL);
100 }
101
102 static struct op *
pipeline(int cf,int sALIAS)103 pipeline(int cf, int sALIAS)
104 {
105 struct op *t, *p, *tl = NULL;
106
107 t = get_command(cf, sALIAS);
108 if (t != NULL) {
109 while (token(0) == '|') {
110 if ((p = get_command(CONTIN, sALIAS)) == NULL)
111 syntaxerr(NULL);
112 if (tl == NULL)
113 t = tl = block(TPIPE, t, p);
114 else
115 tl = tl->right = block(TPIPE, tl->right, p);
116 }
117 REJECT;
118 }
119 return (t);
120 }
121
122 static struct op *
andor(int sALIAS)123 andor(int sALIAS)
124 {
125 struct op *t, *p;
126 int c;
127
128 t = pipeline(0, sALIAS);
129 if (t != NULL) {
130 while ((c = token(0)) == LOGAND || c == LOGOR) {
131 if ((p = pipeline(CONTIN, sALIAS)) == NULL)
132 syntaxerr(NULL);
133 t = block(c == LOGAND? TAND: TOR, t, p);
134 }
135 REJECT;
136 }
137 return (t);
138 }
139
140 static struct op *
c_list(int sALIAS,bool multi)141 c_list(int sALIAS, bool multi)
142 {
143 struct op *t = NULL, *p, *tl = NULL;
144 int c;
145 bool have_sep;
146
147 while (/* CONSTCOND */ 1) {
148 p = andor(sALIAS);
149 /*
150 * Token has always been read/rejected at this point, so
151 * we don't worry about what flags to pass token()
152 */
153 c = token(0);
154 have_sep = true;
155 if (c == '\n' && (multi || inalias(source))) {
156 if (!p)
157 /* ignore blank lines */
158 continue;
159 } else if (!p)
160 break;
161 else if (c == '&' || c == COPROC)
162 p = block(c == '&' ? TASYNC : TCOPROC, p, NULL);
163 else if (c != ';')
164 have_sep = false;
165 if (!t)
166 t = p;
167 else if (!tl)
168 t = tl = block(TLIST, t, p);
169 else
170 tl = tl->right = block(TLIST, tl->right, p);
171 if (!have_sep)
172 break;
173 }
174 REJECT;
175 return (t);
176 }
177
178 static const char IONDELIM_delim[] = { CHAR, '<', CHAR, '<', EOS };
179
180 static struct ioword *
synio(int cf)181 synio(int cf)
182 {
183 struct ioword *iop;
184 static struct ioword *nextiop;
185 bool ishere;
186
187 if (nextiop != NULL) {
188 iop = nextiop;
189 nextiop = NULL;
190 return (iop);
191 }
192
193 if (tpeek(cf) != REDIR)
194 return (NULL);
195 ACCEPT;
196 iop = yylval.iop;
197 ishere = (iop->ioflag & IOTYPE) == IOHERE;
198 if (iop->ioflag & IOHERESTR) {
199 musthave(LWORD, 0);
200 } else if (ishere && tpeek(HEREDELIM) == '\n') {
201 ACCEPT;
202 yylval.cp = wdcopy(IONDELIM_delim, ATEMP);
203 iop->ioflag |= IOEVAL | IONDELIM;
204 } else
205 musthave(LWORD, ishere ? HEREDELIM : 0);
206 if (ishere) {
207 iop->delim = yylval.cp;
208 if (*ident != 0 && !(iop->ioflag & IOHERESTR)) {
209 /* unquoted */
210 iop->ioflag |= IOEVAL;
211 }
212 if (herep > &heres[HERES - 1])
213 yyerror(Tf_toomany, "<<");
214 *herep++ = iop;
215 } else
216 iop->ioname = yylval.cp;
217
218 if (iop->ioflag & IOBASH) {
219 char *cp;
220
221 nextiop = alloc(sizeof(*iop), ATEMP);
222 nextiop->ioname = cp = alloc(3, ATEMP);
223 *cp++ = CHAR;
224 *cp++ = digits_lc[iop->unit % 10];
225 *cp = EOS;
226
227 iop->ioflag &= ~IOBASH;
228 nextiop->unit = 2;
229 nextiop->ioflag = IODUP;
230 nextiop->delim = NULL;
231 nextiop->heredoc = NULL;
232 }
233 return (iop);
234 }
235
236 static struct op *
nested(int type,int smark,int emark,int sALIAS)237 nested(int type, int smark, int emark, int sALIAS)
238 {
239 struct op *t;
240 struct nesting_state old_nesting;
241
242 nesting_push(&old_nesting, smark);
243 t = c_list(sALIAS, true);
244 musthave(emark, KEYWORD|sALIAS);
245 nesting_pop(&old_nesting);
246 return (block(type, t, NULL));
247 }
248
249 static const char builtin_cmd[] = {
250 QCHAR, '\\', CHAR, 'b', CHAR, 'u', CHAR, 'i',
251 CHAR, 'l', CHAR, 't', CHAR, 'i', CHAR, 'n', EOS
252 };
253 static const char let_cmd[] = {
254 CHAR, 'l', CHAR, 'e', CHAR, 't', EOS
255 };
256 static const char setA_cmd0[] = {
257 CHAR, 's', CHAR, 'e', CHAR, 't', EOS
258 };
259 static const char setA_cmd1[] = {
260 CHAR, '-', CHAR, 'A', EOS
261 };
262 static const char setA_cmd2[] = {
263 CHAR, '-', CHAR, '-', EOS
264 };
265
266 static struct op *
get_command(int cf,int sALIAS)267 get_command(int cf, int sALIAS)
268 {
269 struct op *t;
270 int c, iopn = 0, syniocf, lno;
271 struct ioword *iop;
272 XPtrV args, vars;
273 struct nesting_state old_nesting;
274 bool check_decl_utility;
275 static struct ioword *iops[NUFILE + 1];
276
277 XPinit(args, 16);
278 XPinit(vars, 16);
279
280 syniocf = KEYWORD|sALIAS;
281 switch (c = token(cf|KEYWORD|sALIAS|CMDASN)) {
282 default:
283 REJECT;
284 XPfree(args);
285 XPfree(vars);
286 /* empty line */
287 return (NULL);
288
289 case LWORD:
290 case REDIR:
291 REJECT;
292 syniocf &= ~(KEYWORD|sALIAS);
293 t = newtp(TCOM);
294 t->lineno = source->line;
295 goto get_command_start;
296
297 get_command_loop:
298 if (XPsize(args) == 0) {
299 get_command_start:
300 check_decl_utility = true;
301 cf = sALIAS | CMDASN;
302 } else if (t->u.evalflags)
303 cf = CMDWORD | CMDASN;
304 else
305 cf = CMDWORD;
306
307 switch (tpeek(cf)) {
308 case REDIR:
309 while ((iop = synio(cf)) != NULL) {
310 if (iopn >= NUFILE)
311 yyerror(Tf_toomany, Tredirection);
312 iops[iopn++] = iop;
313 }
314 goto get_command_loop;
315
316 case LWORD:
317 ACCEPT;
318 if (check_decl_utility) {
319 struct tbl *tt = get_builtin(ident);
320 uint32_t flag;
321
322 flag = tt ? tt->flag : 0;
323 if (flag & DECL_UTIL)
324 t->u.evalflags = DOVACHECK;
325 if (!(flag & DECL_FWDR))
326 check_decl_utility = false;
327 }
328 if ((XPsize(args) == 0 || Flag(FKEYWORD)) &&
329 is_wdvarassign(yylval.cp))
330 XPput(vars, yylval.cp);
331 else
332 XPput(args, yylval.cp);
333 goto get_command_loop;
334
335 case ORD('(' /*)*/):
336 if (XPsize(args) == 0 && XPsize(vars) == 1 &&
337 is_wdvarassign(yylval.cp)) {
338 char *tcp;
339
340 /* wdarrassign: foo=(bar) */
341 ACCEPT;
342
343 /* manipulate the vars string */
344 tcp = XPptrv(vars)[(vars.len = 0)];
345 /* 'varname=' -> 'varname' */
346 tcp[wdscan(tcp, EOS) - tcp - 3] = EOS;
347
348 /* construct new args strings */
349 XPput(args, wdcopy(builtin_cmd, ATEMP));
350 XPput(args, wdcopy(setA_cmd0, ATEMP));
351 XPput(args, wdcopy(setA_cmd1, ATEMP));
352 XPput(args, tcp);
353 XPput(args, wdcopy(setA_cmd2, ATEMP));
354
355 /* slurp in words till closing paren */
356 while (token(CONTIN) == LWORD)
357 XPput(args, yylval.cp);
358 if (symbol != /*(*/ ')')
359 syntaxerr(NULL);
360 break;
361 }
362
363 afree(t, ATEMP);
364
365 /*
366 * Check for "> foo (echo hi)" which AT&T ksh allows
367 * (not POSIX, but not disallowed)
368 */
369 if (XPsize(args) == 0 && XPsize(vars) == 0) {
370 ACCEPT;
371 goto Subshell;
372 }
373
374 /* must be a function */
375 if (iopn != 0 || XPsize(args) != 1 || XPsize(vars) != 0)
376 syntaxerr(NULL);
377 ACCEPT;
378 musthave(/*(*/ ')', 0);
379 t = function_body(XPptrv(args)[0],
380 sALIAS, false);
381 break;
382 }
383 break;
384
385 case ORD('(' /*)*/): {
386 unsigned int subshell_nesting_type_saved;
387 Subshell:
388 subshell_nesting_type_saved = subshell_nesting_type;
389 subshell_nesting_type = ORD(')');
390 t = nested(TPAREN, ORD('('), ORD(')'), sALIAS);
391 subshell_nesting_type = subshell_nesting_type_saved;
392 break;
393 }
394
395 case ORD('{' /*}*/):
396 t = nested(TBRACE, ORD('{'), ORD('}'), sALIAS);
397 break;
398
399 case MDPAREN:
400 /* leave KEYWORD in syniocf (allow if (( 1 )) then ...) */
401 lno = source->line;
402 ACCEPT;
403 switch (token(LETEXPR)) {
404 case LWORD:
405 break;
406 case ORD('(' /*)*/):
407 c = ORD('(');
408 goto Subshell;
409 default:
410 syntaxerr(NULL);
411 }
412 t = newtp(TCOM);
413 t->lineno = lno;
414 XPput(args, wdcopy(builtin_cmd, ATEMP));
415 XPput(args, wdcopy(let_cmd, ATEMP));
416 XPput(args, yylval.cp);
417 break;
418
419 case DBRACKET: /* [[ .. ]] */
420 /* leave KEYWORD in syniocf (allow if [[ -n 1 ]] then ...) */
421 t = newtp(TDBRACKET);
422 ACCEPT;
423 {
424 Test_env te;
425
426 te.flags = TEF_DBRACKET;
427 te.pos.av = &args;
428 te.isa = dbtestp_isa;
429 te.getopnd = dbtestp_getopnd;
430 te.eval = dbtestp_eval;
431 te.error = dbtestp_error;
432
433 test_parse(&te);
434 }
435 break;
436
437 case FOR:
438 case SELECT:
439 t = newtp((c == FOR) ? TFOR : TSELECT);
440 musthave(LWORD, CMDASN);
441 if (!is_wdvarname(yylval.cp, true))
442 yyerror("%s: bad identifier",
443 c == FOR ? "for" : Tselect);
444 strdupx(t->str, ident, ATEMP);
445 nesting_push(&old_nesting, c);
446 t->vars = wordlist(sALIAS);
447 t->left = dogroup(sALIAS);
448 nesting_pop(&old_nesting);
449 break;
450
451 case WHILE:
452 case UNTIL:
453 nesting_push(&old_nesting, c);
454 t = newtp((c == WHILE) ? TWHILE : TUNTIL);
455 t->left = c_list(sALIAS, true);
456 t->right = dogroup(sALIAS);
457 nesting_pop(&old_nesting);
458 break;
459
460 case CASE:
461 t = newtp(TCASE);
462 musthave(LWORD, 0);
463 t->str = yylval.cp;
464 nesting_push(&old_nesting, c);
465 t->left = caselist(sALIAS);
466 nesting_pop(&old_nesting);
467 break;
468
469 case IF:
470 nesting_push(&old_nesting, c);
471 t = newtp(TIF);
472 t->left = c_list(sALIAS, true);
473 t->right = thenpart(sALIAS);
474 musthave(FI, KEYWORD|sALIAS);
475 nesting_pop(&old_nesting);
476 break;
477
478 case BANG:
479 syniocf &= ~(KEYWORD|sALIAS);
480 t = pipeline(0, sALIAS);
481 if (t == NULL)
482 syntaxerr(NULL);
483 t = block(TBANG, NULL, t);
484 break;
485
486 case TIME:
487 syniocf &= ~(KEYWORD|sALIAS);
488 t = pipeline(0, sALIAS);
489 if (t && t->type == TCOM) {
490 t->str = alloc(2, ATEMP);
491 /* TF_* flags */
492 t->str[0] = '\0';
493 t->str[1] = '\0';
494 }
495 t = block(TTIME, t, NULL);
496 break;
497
498 case FUNCTION:
499 musthave(LWORD, 0);
500 t = function_body(yylval.cp, sALIAS, true);
501 break;
502 }
503
504 while ((iop = synio(syniocf)) != NULL) {
505 if (iopn >= NUFILE)
506 yyerror(Tf_toomany, Tredirection);
507 iops[iopn++] = iop;
508 }
509
510 if (iopn == 0) {
511 t->ioact = NULL;
512 } else {
513 iops[iopn++] = NULL;
514 t->ioact = alloc2(iopn, sizeof(struct ioword *), ATEMP);
515 memcpy(t->ioact, iops, iopn * sizeof(struct ioword *));
516 }
517
518 if (t->type == TCOM || t->type == TDBRACKET) {
519 XPput(args, NULL);
520 t->args = (const char **)XPclose(args);
521 XPput(vars, NULL);
522 t->vars = (char **)XPclose(vars);
523 } else {
524 XPfree(args);
525 XPfree(vars);
526 }
527
528 if (c == MDPAREN) {
529 t = block(TBRACE, t, NULL);
530 t->ioact = t->left->ioact;
531 t->left->ioact = NULL;
532 }
533
534 return (t);
535 }
536
537 static struct op *
dogroup(int sALIAS)538 dogroup(int sALIAS)
539 {
540 int c;
541 struct op *list;
542
543 c = token(CONTIN|KEYWORD|sALIAS);
544 /*
545 * A {...} can be used instead of do...done for for/select loops
546 * but not for while/until loops - we don't need to check if it
547 * is a while loop because it would have been parsed as part of
548 * the conditional command list...
549 */
550 if (c == DO)
551 c = DONE;
552 else if ((unsigned int)c == ORD('{'))
553 c = ORD('}');
554 else
555 syntaxerr(NULL);
556 list = c_list(sALIAS, true);
557 musthave(c, KEYWORD|sALIAS);
558 return (list);
559 }
560
561 static struct op *
thenpart(int sALIAS)562 thenpart(int sALIAS)
563 {
564 struct op *t;
565
566 musthave(THEN, KEYWORD|sALIAS);
567 t = newtp(0);
568 t->left = c_list(sALIAS, true);
569 if (t->left == NULL)
570 syntaxerr(NULL);
571 t->right = elsepart(sALIAS);
572 return (t);
573 }
574
575 static struct op *
elsepart(int sALIAS)576 elsepart(int sALIAS)
577 {
578 struct op *t;
579
580 switch (token(KEYWORD|sALIAS|CMDASN)) {
581 case ELSE:
582 if ((t = c_list(sALIAS, true)) == NULL)
583 syntaxerr(NULL);
584 return (t);
585
586 case ELIF:
587 t = newtp(TELIF);
588 t->left = c_list(sALIAS, true);
589 t->right = thenpart(sALIAS);
590 return (t);
591
592 default:
593 REJECT;
594 }
595 return (NULL);
596 }
597
598 static struct op *
caselist(int sALIAS)599 caselist(int sALIAS)
600 {
601 struct op *t, *tl;
602 int c;
603
604 c = token(CONTIN|KEYWORD|sALIAS);
605 /* A {...} can be used instead of in...esac for case statements */
606 if (c == IN)
607 c = ESAC;
608 else if ((unsigned int)c == ORD('{'))
609 c = ORD('}');
610 else
611 syntaxerr(NULL);
612 t = tl = NULL;
613 /* no ALIAS here */
614 while ((tpeek(CONTIN|KEYWORD|ESACONLY)) != c) {
615 struct op *tc = casepart(c, sALIAS);
616 if (tl == NULL)
617 t = tl = tc, tl->right = NULL;
618 else
619 tl->right = tc, tl = tc;
620 }
621 musthave(c, KEYWORD|sALIAS);
622 return (t);
623 }
624
625 static struct op *
casepart(int endtok,int sALIAS)626 casepart(int endtok, int sALIAS)
627 {
628 struct op *t;
629 XPtrV ptns;
630
631 XPinit(ptns, 16);
632 t = newtp(TPAT);
633 /* no ALIAS here */
634 if ((unsigned int)token(CONTIN | KEYWORD) != ORD('('))
635 REJECT;
636 do {
637 switch (token(0)) {
638 case LWORD:
639 break;
640 case ORD('}'):
641 case ESAC:
642 if (symbol != endtok) {
643 strdupx(yylval.cp, (unsigned int)symbol ==
644 ORD('}') ? Tcbrace : Tesac, ATEMP);
645 break;
646 }
647 /* FALLTHROUGH */
648 default:
649 syntaxerr(NULL);
650 }
651 XPput(ptns, yylval.cp);
652 } while (token(0) == '|');
653 REJECT;
654 XPput(ptns, NULL);
655 t->vars = (char **)XPclose(ptns);
656 musthave(ORD(')'), 0);
657
658 t->left = c_list(sALIAS, true);
659
660 /* initialise to default for ;; or omitted */
661 t->u.charflag = ORD(';');
662 /* SUSv4 requires the ;; except in the last casepart */
663 if ((tpeek(CONTIN|KEYWORD|sALIAS)) != endtok)
664 switch (symbol) {
665 default:
666 syntaxerr(NULL);
667 case BRKEV:
668 t->u.charflag = ORD('|');
669 if (0)
670 /* FALLTHROUGH */
671 case BRKFT:
672 t->u.charflag = ORD('&');
673 /* FALLTHROUGH */
674 case BREAK:
675 /* initialised above, but we need to eat the token */
676 ACCEPT;
677 }
678 return (t);
679 }
680
681 static struct op *
function_body(char * name,int sALIAS,bool ksh_func)682 function_body(char *name, int sALIAS,
683 /* function foo { ... } vs foo() { .. } */
684 bool ksh_func)
685 {
686 char *sname, *p;
687 struct op *t;
688
689 sname = wdstrip(name, 0);
690 /*-
691 * Check for valid characters in name. POSIX and AT&T ksh93 say
692 * only allow [a-zA-Z_0-9] but this allows more as old pdkshs
693 * have allowed more; the following were never allowed:
694 * NUL TAB NL SP " $ & ' ( ) ; < = > \ ` |
695 * C_QUOTE|C_SPC covers all but adds # * ? [ ]
696 */
697 for (p = sname; *p; p++)
698 if (ctype(*p, C_QUOTE | C_SPC))
699 yyerror(Tinvname, sname, Tfunction);
700
701 /*
702 * Note that POSIX allows only compound statements after foo(),
703 * sh and AT&T ksh allow any command, go with the later since it
704 * shouldn't break anything. However, for function foo, AT&T ksh
705 * only accepts an open-brace.
706 */
707 if (ksh_func) {
708 if ((unsigned int)tpeek(CONTIN|KEYWORD|sALIAS) == ORD('(' /*)*/)) {
709 /* function foo () { //}*/
710 ACCEPT;
711 musthave(ORD(/*(*/ ')'), 0);
712 /* degrade to POSIX function */
713 ksh_func = false;
714 }
715 musthave(ORD('{' /*}*/), CONTIN|KEYWORD|sALIAS);
716 REJECT;
717 }
718
719 t = newtp(TFUNCT);
720 t->str = sname;
721 t->u.ksh_func = tobool(ksh_func);
722 t->lineno = source->line;
723
724 if ((t->left = get_command(CONTIN, sALIAS)) == NULL) {
725 char *tv;
726 /*
727 * Probably something like foo() followed by EOF or ';'.
728 * This is accepted by sh and ksh88.
729 * To make "typeset -f foo" work reliably (so its output can
730 * be used as input), we pretend there is a colon here.
731 */
732 t->left = newtp(TCOM);
733 /* (2 * sizeof(char *)) is small enough */
734 t->left->args = alloc(2 * sizeof(char *), ATEMP);
735 t->left->args[0] = tv = alloc(3, ATEMP);
736 tv[0] = QCHAR;
737 tv[1] = ':';
738 tv[2] = EOS;
739 t->left->args[1] = NULL;
740 t->left->vars = alloc(sizeof(char *), ATEMP);
741 t->left->vars[0] = NULL;
742 t->left->lineno = 1;
743 }
744
745 return (t);
746 }
747
748 static char **
wordlist(int sALIAS)749 wordlist(int sALIAS)
750 {
751 int c;
752 XPtrV args;
753
754 XPinit(args, 16);
755 /* POSIX does not do alias expansion here... */
756 if ((c = token(CONTIN|KEYWORD|sALIAS)) != IN) {
757 if (c != ';')
758 /* non-POSIX, but AT&T ksh accepts a ; here */
759 REJECT;
760 return (NULL);
761 }
762 while ((c = token(0)) == LWORD)
763 XPput(args, yylval.cp);
764 if (c != '\n' && c != ';')
765 syntaxerr(NULL);
766 XPput(args, NULL);
767 return ((char **)XPclose(args));
768 }
769
770 /*
771 * supporting functions
772 */
773
774 static struct op *
block(int type,struct op * t1,struct op * t2)775 block(int type, struct op *t1, struct op *t2)
776 {
777 struct op *t;
778
779 t = newtp(type);
780 t->left = t1;
781 t->right = t2;
782 return (t);
783 }
784
785 static const struct tokeninfo {
786 const char *name;
787 short val;
788 short reserved;
789 } tokentab[] = {
790 /* Reserved words */
791 { "if", IF, true },
792 { "then", THEN, true },
793 { "else", ELSE, true },
794 { "elif", ELIF, true },
795 { "fi", FI, true },
796 { "case", CASE, true },
797 { Tesac, ESAC, true },
798 { "for", FOR, true },
799 { Tselect, SELECT, true },
800 { "while", WHILE, true },
801 { "until", UNTIL, true },
802 { "do", DO, true },
803 { "done", DONE, true },
804 { "in", IN, true },
805 { Tfunction, FUNCTION, true },
806 { Ttime, TIME, true },
807 { "{", ORD('{'), true },
808 { Tcbrace, ORD('}'), true },
809 { "!", BANG, true },
810 { "[[", DBRACKET, true },
811 /* Lexical tokens (0[EOF], LWORD and REDIR handled specially) */
812 { "&&", LOGAND, false },
813 { "||", LOGOR, false },
814 { ";;", BREAK, false },
815 { ";|", BRKEV, false },
816 { ";&", BRKFT, false },
817 { "((", MDPAREN, false },
818 { "|&", COPROC, false },
819 /* and some special cases... */
820 { "newline", ORD('\n'), false },
821 { NULL, 0, false }
822 };
823
824 void
initkeywords(void)825 initkeywords(void)
826 {
827 struct tokeninfo const *tt;
828 struct tbl *p;
829
830 ktinit(APERM, &keywords,
831 /* currently 28 keywords: 75% of 64 = 2^6 */
832 6);
833 for (tt = tokentab; tt->name; tt++) {
834 if (tt->reserved) {
835 p = ktenter(&keywords, tt->name, hash(tt->name));
836 p->flag |= DEFINED|ISSET;
837 p->type = CKEYWD;
838 p->val.i = tt->val;
839 }
840 }
841 }
842
843 static void
syntaxerr(const char * what)844 syntaxerr(const char *what)
845 {
846 /* 23<<- is the longest redirection, I think */
847 char redir[8];
848 const char *s;
849 struct tokeninfo const *tt;
850 int c;
851
852 if (!what)
853 what = Tunexpected;
854 REJECT;
855 c = token(0);
856 Again:
857 switch (c) {
858 case 0:
859 if (nesting.start_token) {
860 c = nesting.start_token;
861 source->errline = nesting.start_line;
862 what = "unmatched";
863 goto Again;
864 }
865 /* don't quote the EOF */
866 yyerror("%s: unexpected EOF", Tsynerr);
867 /* NOTREACHED */
868
869 case LWORD:
870 s = snptreef(NULL, 32, Tf_S, yylval.cp);
871 break;
872
873 case REDIR:
874 s = snptreef(redir, sizeof(redir), Tft_R, yylval.iop);
875 break;
876
877 default:
878 for (tt = tokentab; tt->name; tt++)
879 if (tt->val == c)
880 break;
881 if (tt->name)
882 s = tt->name;
883 else {
884 if (c > 0 && c < 256) {
885 redir[0] = c;
886 redir[1] = '\0';
887 } else
888 shf_snprintf(redir, sizeof(redir),
889 "?%d", c);
890 s = redir;
891 }
892 }
893 yyerror(Tf_sD_s_qs, Tsynerr, what, s);
894 }
895
896 static void
nesting_push(struct nesting_state * save,int tok)897 nesting_push(struct nesting_state *save, int tok)
898 {
899 *save = nesting;
900 nesting.start_token = tok;
901 nesting.start_line = source->line;
902 }
903
904 static void
nesting_pop(struct nesting_state * saved)905 nesting_pop(struct nesting_state *saved)
906 {
907 nesting = *saved;
908 }
909
910 static struct op *
newtp(int type)911 newtp(int type)
912 {
913 struct op *t;
914
915 t = alloc(sizeof(struct op), ATEMP);
916 t->type = type;
917 t->u.evalflags = 0;
918 t->args = NULL;
919 t->vars = NULL;
920 t->ioact = NULL;
921 t->left = t->right = NULL;
922 t->str = NULL;
923 return (t);
924 }
925
926 struct op *
compile(Source * s,bool skiputf8bom,bool doalias)927 compile(Source *s, bool skiputf8bom, bool doalias)
928 {
929 nesting.start_token = 0;
930 nesting.start_line = 0;
931 herep = heres;
932 source = s;
933 if (skiputf8bom)
934 yyskiputf8bom();
935 yyparse(doalias);
936 return (outtree);
937 }
938
939 /* Check if we are in the middle of reading an alias */
940 static int
inalias(struct source * s)941 inalias(struct source *s)
942 {
943 while (s && s->type == SALIAS) {
944 if (!(s->flags & SF_ALIASEND))
945 return (1);
946 s = s->next;
947 }
948 return (0);
949 }
950
951
952 /*
953 * Order important - indexed by Test_meta values
954 * Note that ||, &&, ( and ) can't appear in as unquoted strings
955 * in normal shell input, so these can be interpreted unambiguously
956 * in the evaluation pass.
957 */
958 static const char dbtest_or[] = { CHAR, '|', CHAR, '|', EOS };
959 static const char dbtest_and[] = { CHAR, '&', CHAR, '&', EOS };
960 static const char dbtest_not[] = { CHAR, '!', EOS };
961 static const char dbtest_oparen[] = { CHAR, '(', EOS };
962 static const char dbtest_cparen[] = { CHAR, ')', EOS };
963 const char * const dbtest_tokens[] = {
964 dbtest_or, dbtest_and, dbtest_not,
965 dbtest_oparen, dbtest_cparen
966 };
967 static const char db_close[] = { CHAR, ']', CHAR, ']', EOS };
968 static const char db_lthan[] = { CHAR, '<', EOS };
969 static const char db_gthan[] = { CHAR, '>', EOS };
970
971 /*
972 * Test if the current token is a whatever. Accepts the current token if
973 * it is. Returns 0 if it is not, non-zero if it is (in the case of
974 * TM_UNOP and TM_BINOP, the returned value is a Test_op).
975 */
976 static Test_op
dbtestp_isa(Test_env * te,Test_meta meta)977 dbtestp_isa(Test_env *te, Test_meta meta)
978 {
979 int c = tpeek(CMDASN | (meta == TM_BINOP ? 0 : CONTIN));
980 bool uqword;
981 char *save = NULL;
982 Test_op ret = TO_NONOP;
983
984 /* unquoted word? */
985 uqword = c == LWORD && *ident;
986
987 if (meta == TM_OR)
988 ret = c == LOGOR ? TO_NONNULL : TO_NONOP;
989 else if (meta == TM_AND)
990 ret = c == LOGAND ? TO_NONNULL : TO_NONOP;
991 else if (meta == TM_NOT)
992 ret = (uqword && !strcmp(yylval.cp,
993 dbtest_tokens[(int)TM_NOT])) ? TO_NONNULL : TO_NONOP;
994 else if (meta == TM_OPAREN)
995 ret = (unsigned int)c == ORD('(') /*)*/ ? TO_NONNULL : TO_NONOP;
996 else if (meta == TM_CPAREN)
997 ret = (unsigned int)c == /*(*/ ORD(')') ? TO_NONNULL : TO_NONOP;
998 else if (meta == TM_UNOP || meta == TM_BINOP) {
999 if (meta == TM_BINOP && c == REDIR &&
1000 (yylval.iop->ioflag == IOREAD ||
1001 yylval.iop->ioflag == IOWRITE)) {
1002 ret = TO_NONNULL;
1003 save = wdcopy(yylval.iop->ioflag == IOREAD ?
1004 db_lthan : db_gthan, ATEMP);
1005 } else if (uqword && (ret = test_isop(meta, ident)))
1006 save = yylval.cp;
1007 } else
1008 /* meta == TM_END */
1009 ret = (uqword && !strcmp(yylval.cp,
1010 db_close)) ? TO_NONNULL : TO_NONOP;
1011 if (ret != TO_NONOP) {
1012 ACCEPT;
1013 if ((unsigned int)meta < NELEM(dbtest_tokens))
1014 save = wdcopy(dbtest_tokens[(int)meta], ATEMP);
1015 if (save)
1016 XPput(*te->pos.av, save);
1017 }
1018 return (ret);
1019 }
1020
1021 static const char *
dbtestp_getopnd(Test_env * te,Test_op op MKSH_A_UNUSED,bool do_eval MKSH_A_UNUSED)1022 dbtestp_getopnd(Test_env *te, Test_op op MKSH_A_UNUSED,
1023 bool do_eval MKSH_A_UNUSED)
1024 {
1025 int c = tpeek(CMDASN);
1026
1027 if (c != LWORD)
1028 return (NULL);
1029
1030 ACCEPT;
1031 XPput(*te->pos.av, yylval.cp);
1032
1033 return (null);
1034 }
1035
1036 static int
dbtestp_eval(Test_env * te MKSH_A_UNUSED,Test_op op MKSH_A_UNUSED,const char * opnd1 MKSH_A_UNUSED,const char * opnd2 MKSH_A_UNUSED,bool do_eval MKSH_A_UNUSED)1037 dbtestp_eval(Test_env *te MKSH_A_UNUSED, Test_op op MKSH_A_UNUSED,
1038 const char *opnd1 MKSH_A_UNUSED, const char *opnd2 MKSH_A_UNUSED,
1039 bool do_eval MKSH_A_UNUSED)
1040 {
1041 return (1);
1042 }
1043
1044 static void
dbtestp_error(Test_env * te,int offset,const char * msg)1045 dbtestp_error(Test_env *te, int offset, const char *msg)
1046 {
1047 te->flags |= TEF_ERROR;
1048
1049 if (offset < 0) {
1050 REJECT;
1051 /* Kludgy to say the least... */
1052 symbol = LWORD;
1053 yylval.cp = *(XPptrv(*te->pos.av) + XPsize(*te->pos.av) +
1054 offset);
1055 }
1056 syntaxerr(msg);
1057 }
1058
1059 #if HAVE_SELECT
1060
1061 #ifndef EOVERFLOW
1062 #ifdef ERANGE
1063 #define EOVERFLOW ERANGE
1064 #else
1065 #define EOVERFLOW EINVAL
1066 #endif
1067 #endif
1068
1069 bool
parse_usec(const char * s,struct timeval * tv)1070 parse_usec(const char *s, struct timeval *tv)
1071 {
1072 struct timeval tt;
1073 int i;
1074
1075 tv->tv_sec = 0;
1076 /* parse integral part */
1077 while (ctype(*s, C_DIGIT)) {
1078 tt.tv_sec = tv->tv_sec * 10 + ksh_numdig(*s++);
1079 /*XXX this overflow check maybe UB */
1080 if (tt.tv_sec / 10 != tv->tv_sec) {
1081 errno = EOVERFLOW;
1082 return (true);
1083 }
1084 tv->tv_sec = tt.tv_sec;
1085 }
1086
1087 tv->tv_usec = 0;
1088 if (!*s)
1089 /* no decimal fraction */
1090 return (false);
1091 else if (*s++ != '.') {
1092 /* junk after integral part */
1093 errno = EINVAL;
1094 return (true);
1095 }
1096
1097 /* parse decimal fraction */
1098 i = 100000;
1099 while (ctype(*s, C_DIGIT)) {
1100 tv->tv_usec += i * ksh_numdig(*s++);
1101 if (i == 1)
1102 break;
1103 i /= 10;
1104 }
1105 /* check for junk after fractional part */
1106 while (ctype(*s, C_DIGIT))
1107 ++s;
1108 if (*s) {
1109 errno = EINVAL;
1110 return (true);
1111 }
1112
1113 /* end of input string reached, no errors */
1114 return (false);
1115 }
1116 #endif
1117
1118 /*
1119 * Helper function called from within lex.c:yylex() to parse
1120 * a COMSUB recursively using the main shell parser and lexer
1121 */
1122 char *
yyrecursive(int subtype)1123 yyrecursive(int subtype)
1124 {
1125 struct op *t;
1126 char *cp;
1127 struct yyrecursive_state *ys;
1128 unsigned int stok, etok;
1129
1130 if (subtype != COMSUB) {
1131 stok = ORD('{');
1132 etok = ORD('}');
1133 } else {
1134 stok = ORD('(');
1135 etok = ORD(')');
1136 }
1137
1138 ys = alloc(sizeof(struct yyrecursive_state), ATEMP);
1139
1140 /* tell the lexer to accept a closing parenthesis as EOD */
1141 ys->old_nesting_type = subshell_nesting_type;
1142 subshell_nesting_type = etok;
1143
1144 /* push reject state, parse recursively, pop reject state */
1145 ys->old_reject = reject;
1146 ys->old_symbol = symbol;
1147 ACCEPT;
1148 memcpy(ys->old_heres, heres, sizeof(heres));
1149 ys->old_herep = herep;
1150 herep = heres;
1151 ys->next = e->yyrecursive_statep;
1152 e->yyrecursive_statep = ys;
1153 /* we use TPAREN as a helper container here */
1154 t = nested(TPAREN, stok, etok, ALIAS);
1155 yyrecursive_pop(false);
1156
1157 /* t->left because nested(TPAREN, ...) hides our goodies there */
1158 cp = snptreef(NULL, 0, Tf_T, t->left);
1159 tfree(t, ATEMP);
1160
1161 return (cp);
1162 }
1163
1164 void
yyrecursive_pop(bool popall)1165 yyrecursive_pop(bool popall)
1166 {
1167 struct yyrecursive_state *ys;
1168
1169 popnext:
1170 if (!(ys = e->yyrecursive_statep))
1171 return;
1172 e->yyrecursive_statep = ys->next;
1173
1174 memcpy(heres, ys->old_heres, sizeof(heres));
1175 herep = ys->old_herep;
1176 reject = ys->old_reject;
1177 symbol = ys->old_symbol;
1178
1179 subshell_nesting_type = ys->old_nesting_type;
1180
1181 afree(ys, ATEMP);
1182 if (popall)
1183 goto popnext;
1184 }
1185