1 /* $OpenBSD: lex.c,v 1.51 2015/09/10 22:48:58 nicm Exp $ */
2
3 /*-
4 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 * 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018
6 * mirabilos <m@mirbsd.org>
7 *
8 * Provided that these terms and disclaimer and all copyright notices
9 * are retained or reproduced in an accompanying document, permission
10 * is granted to deal in this work without restriction, including un-
11 * limited rights to use, publicly perform, distribute, sell, modify,
12 * merge, give away, or sublicence.
13 *
14 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
15 * the utmost extent permitted by applicable law, neither express nor
16 * implied; without malicious intent or gross negligence. In no event
17 * may a licensor, author or contributor be held liable for indirect,
18 * direct, other damage, loss, or other issues arising in any way out
19 * of dealing in the work, even if advised of the possibility of such
20 * damage or existence of a defect, except proven that it results out
21 * of said person's immediate fault when using the work as intended.
22 */
23
24 #include "sh.h"
25
26 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.251 2020/03/10 23:48:40 tg Exp $");
27
28 /*
29 * states while lexing word
30 */
31 #define SBASE 0 /* outside any lexical constructs */
32 #define SWORD 1 /* implicit quoting for substitute() */
33 #define SLETPAREN 2 /* inside (( )), implicit quoting */
34 #define SSQUOTE 3 /* inside '' */
35 #define SDQUOTE 4 /* inside "" */
36 #define SEQUOTE 5 /* inside $'' */
37 #define SBRACE 6 /* inside ${} */
38 #define SQBRACE 7 /* inside "${}" */
39 #define SBQUOTE 8 /* inside `` */
40 #define SASPAREN 9 /* inside $(( )) */
41 #define SHEREDELIM 10 /* parsing << or <<- delimiter */
42 #define SHEREDQUOTE 11 /* parsing " in << or <<- delimiter */
43 #define SPATTERN 12 /* parsing *(...|...) pattern (*+?@!) */
44 #define SADELIM 13 /* like SBASE, looking for delimiter */
45 #define STBRACEKORN 14 /* parsing ${...[#%]...} !FSH */
46 #define STBRACEBOURNE 15 /* parsing ${...[#%]...} FSH */
47 #define SINVALID 255 /* invalid state */
48
49 struct sretrace_info {
50 struct sretrace_info *next;
51 XString xs;
52 char *xp;
53 };
54
55 /*
56 * Structure to keep track of the lexing state and the various pieces of info
57 * needed for each particular state.
58 */
59 typedef struct lex_state {
60 union {
61 /* point to the next state block */
62 struct lex_state *base;
63 /* marks start of state output in output string */
64 size_t start;
65 /* SBQUOTE: true if in double quotes: "`...`" */
66 /* SEQUOTE: got NUL, ignore rest of string */
67 bool abool;
68 /* SADELIM information */
69 struct {
70 /* character to search for */
71 unsigned char delimiter;
72 /* max. number of delimiters */
73 unsigned char num;
74 } adelim;
75 } u;
76 /* count open parentheses */
77 short nparen;
78 /* type of this state */
79 uint8_t type;
80 /* extra flags */
81 uint8_t ls_flags;
82 } Lex_state;
83 #define ls_base u.base
84 #define ls_start u.start
85 #define ls_bool u.abool
86 #define ls_adelim u.adelim
87
88 /* ls_flags */
89 #define LS_HEREDOC BIT(0)
90
91 typedef struct {
92 Lex_state *base;
93 Lex_state *end;
94 } State_info;
95
96 static void readhere(struct ioword *);
97 static void ungetsc(int);
98 static void ungetsc_i(int);
99 static int getsc_uu(void);
100 static void getsc_line(Source *);
101 static int getsc_bn(void);
102 static int getsc_i(void);
103 static char *get_brace_var(XString *, char *);
104 static bool arraysub(char **);
105 static void gethere(void);
106 static Lex_state *push_state_i(State_info *, Lex_state *);
107 static Lex_state *pop_state_i(State_info *, Lex_state *);
108
109 static int backslash_skip;
110 static int ignore_backslash_newline;
111
112 /* optimised getsc_bn() */
113 #define o_getsc() (*source->str != '\0' && *source->str != '\\' && \
114 !backslash_skip ? *source->str++ : getsc_bn())
115 /* optimised getsc_uu() */
116 #define o_getsc_u() ((*source->str != '\0') ? *source->str++ : getsc_uu())
117
118 /* retrace helper */
119 #define o_getsc_r(carg) \
120 int cev = (carg); \
121 struct sretrace_info *rp = retrace_info; \
122 \
123 while (rp) { \
124 Xcheck(rp->xs, rp->xp); \
125 *rp->xp++ = cev; \
126 rp = rp->next; \
127 } \
128 \
129 return (cev);
130
131 /* callback */
132 static int
getsc_i(void)133 getsc_i(void)
134 {
135 o_getsc_r((unsigned int)(unsigned char)o_getsc());
136 }
137
138 #if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST)
139 #define getsc() getsc_i()
140 #else
141 static int getsc_r(int);
142
143 static int
getsc_r(int c)144 getsc_r(int c)
145 {
146 o_getsc_r(c);
147 }
148
149 #define getsc() getsc_r((unsigned int)(unsigned char)o_getsc())
150 #endif
151
152 #define STATE_BSIZE 8
153
154 #define PUSH_STATE(s) do { \
155 uint8_t state_flags = statep->ls_flags; \
156 if (++statep == state_info.end) \
157 statep = push_state_i(&state_info, statep); \
158 state = statep->type = (s); \
159 statep->ls_flags = state_flags; \
160 } while (/* CONSTCOND */ 0)
161
162 #define POP_STATE() do { \
163 if (--statep == state_info.base) \
164 statep = pop_state_i(&state_info, statep); \
165 state = statep->type; \
166 } while (/* CONSTCOND */ 0)
167
168 #define PUSH_SRETRACE(s) do { \
169 struct sretrace_info *ri; \
170 \
171 PUSH_STATE(s); \
172 statep->ls_start = Xsavepos(ws, wp); \
173 ri = alloc(sizeof(struct sretrace_info), ATEMP); \
174 Xinit(ri->xs, ri->xp, 64, ATEMP); \
175 ri->next = retrace_info; \
176 retrace_info = ri; \
177 } while (/* CONSTCOND */ 0)
178
179 #define POP_SRETRACE() do { \
180 wp = Xrestpos(ws, wp, statep->ls_start); \
181 *retrace_info->xp = '\0'; \
182 sp = Xstring(retrace_info->xs, retrace_info->xp); \
183 dp = (void *)retrace_info; \
184 retrace_info = retrace_info->next; \
185 afree(dp, ATEMP); \
186 POP_STATE(); \
187 } while (/* CONSTCOND */ 0)
188
189 /**
190 * Lexical analyser
191 *
192 * tokens are not regular expressions, they are LL(1).
193 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
194 * hence the state stack. Note "$(...)" are now parsed recursively.
195 */
196
197 int
yylex(int cf)198 yylex(int cf)
199 {
200 Lex_state states[STATE_BSIZE], *statep, *s2, *base;
201 State_info state_info;
202 int c, c2, state;
203 size_t cz;
204 XString ws; /* expandable output word */
205 char *wp; /* output word pointer */
206 char *sp, *dp;
207
208 Again:
209 states[0].type = SINVALID;
210 states[0].ls_base = NULL;
211 statep = &states[1];
212 state_info.base = states;
213 state_info.end = &state_info.base[STATE_BSIZE];
214
215 Xinit(ws, wp, 64, ATEMP);
216
217 backslash_skip = 0;
218 ignore_backslash_newline = 0;
219
220 if (cf & ONEWORD)
221 state = SWORD;
222 else if (cf & LETEXPR) {
223 /* enclose arguments in (double) quotes */
224 *wp++ = OQUOTE;
225 state = SLETPAREN;
226 statep->nparen = 0;
227 } else {
228 /* normal lexing */
229 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
230 do {
231 c = getsc();
232 } while (ctype(c, C_BLANK));
233 if (c == '#') {
234 ignore_backslash_newline++;
235 do {
236 c = getsc();
237 } while (!ctype(c, C_NUL | C_LF));
238 ignore_backslash_newline--;
239 }
240 ungetsc(c);
241 }
242 if (source->flags & SF_ALIAS) {
243 /* trailing ' ' in alias definition */
244 source->flags &= ~SF_ALIAS;
245 /* POSIX: trailing space only counts if parsing simple cmd */
246 if (!Flag(FPOSIX) || (cf & CMDWORD))
247 cf |= ALIAS;
248 }
249
250 /* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */
251 statep->type = state;
252 statep->ls_flags = (cf & HEREDOC) ? LS_HEREDOC : 0;
253
254 /* collect non-special or quoted characters to form word */
255 while (!((c = getsc()) == 0 ||
256 ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) {
257 if (state == SBASE &&
258 subshell_nesting_type == ORD(/*{*/ '}') &&
259 (unsigned int)c == ORD(/*{*/ '}'))
260 /* possibly end ${ :;} */
261 break;
262 Xcheck(ws, wp);
263 switch (state) {
264 case SADELIM:
265 if ((unsigned int)c == ORD('('))
266 statep->nparen++;
267 else if ((unsigned int)c == ORD(')'))
268 statep->nparen--;
269 else if (statep->nparen == 0 &&
270 ((unsigned int)c == ORD(/*{*/ '}') ||
271 c == (int)statep->ls_adelim.delimiter)) {
272 *wp++ = ADELIM;
273 *wp++ = c;
274 if ((unsigned int)c == ORD(/*{*/ '}') ||
275 --statep->ls_adelim.num == 0)
276 POP_STATE();
277 if ((unsigned int)c == ORD(/*{*/ '}'))
278 POP_STATE();
279 break;
280 }
281 /* FALLTHROUGH */
282 case SBASE:
283 if ((unsigned int)c == ORD('[') && (cf & CMDASN)) {
284 /* temporary */
285 *wp = EOS;
286 if (is_wdvarname(Xstring(ws, wp), false)) {
287 char *p, *tmp;
288
289 if (arraysub(&tmp)) {
290 *wp++ = CHAR;
291 *wp++ = c;
292 for (p = tmp; *p; ) {
293 Xcheck(ws, wp);
294 *wp++ = CHAR;
295 *wp++ = *p++;
296 }
297 afree(tmp, ATEMP);
298 break;
299 }
300 }
301 *wp++ = CHAR;
302 *wp++ = c;
303 break;
304 }
305 /* FALLTHROUGH */
306 Sbase1: /* includes *(...|...) pattern (*+?@!) */
307 if (ctype(c, C_PATMO)) {
308 c2 = getsc();
309 if ((unsigned int)c2 == ORD('(' /*)*/)) {
310 *wp++ = OPAT;
311 *wp++ = c;
312 PUSH_STATE(SPATTERN);
313 break;
314 }
315 ungetsc(c2);
316 }
317 /* FALLTHROUGH */
318 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */
319 switch (c) {
320 case ORD('\\'):
321 getsc_qchar:
322 if ((c = getsc())) {
323 /* trailing \ is lost */
324 *wp++ = QCHAR;
325 *wp++ = c;
326 }
327 break;
328 case ORD('\''):
329 open_ssquote_unless_heredoc:
330 if ((statep->ls_flags & LS_HEREDOC))
331 goto store_char;
332 *wp++ = OQUOTE;
333 ignore_backslash_newline++;
334 PUSH_STATE(SSQUOTE);
335 break;
336 case ORD('"'):
337 open_sdquote:
338 *wp++ = OQUOTE;
339 PUSH_STATE(SDQUOTE);
340 break;
341 case ORD('$'):
342 /*
343 * processing of dollar sign belongs into
344 * Subst, except for those which can open
345 * a string: $'…' and $"…"
346 */
347 subst_dollar_ex:
348 c = getsc();
349 switch (c) {
350 case ORD('"'):
351 goto open_sdquote;
352 case ORD('\''):
353 goto open_sequote;
354 default:
355 goto SubstS;
356 }
357 default:
358 goto Subst;
359 }
360 break;
361
362 Subst:
363 switch (c) {
364 case ORD('\\'):
365 c = getsc();
366 switch (c) {
367 case ORD('"'):
368 if ((statep->ls_flags & LS_HEREDOC))
369 goto heredocquote;
370 /* FALLTHROUGH */
371 case ORD('\\'):
372 case ORD('$'):
373 case ORD('`'):
374 store_qchar:
375 *wp++ = QCHAR;
376 *wp++ = c;
377 break;
378 default:
379 heredocquote:
380 Xcheck(ws, wp);
381 if (c) {
382 /* trailing \ is lost */
383 *wp++ = CHAR;
384 *wp++ = '\\';
385 *wp++ = CHAR;
386 *wp++ = c;
387 }
388 break;
389 }
390 break;
391 case ORD('$'):
392 c = getsc();
393 SubstS:
394 if ((unsigned int)c == ORD('(' /*)*/)) {
395 c = getsc();
396 if ((unsigned int)c == ORD('(' /*)*/)) {
397 *wp++ = EXPRSUB;
398 PUSH_SRETRACE(SASPAREN);
399 /* unneeded? */
400 /*statep->ls_flags &= ~LS_HEREDOC;*/
401 statep->nparen = 2;
402 *retrace_info->xp++ = '(';
403 } else {
404 ungetsc(c);
405 subst_command:
406 c = COMSUB;
407 subst_command2:
408 sp = yyrecursive(c);
409 cz = strlen(sp) + 1;
410 XcheckN(ws, wp, cz);
411 *wp++ = c;
412 memcpy(wp, sp, cz);
413 wp += cz;
414 }
415 } else if ((unsigned int)c == ORD('{' /*}*/)) {
416 if ((unsigned int)(c = getsc()) == ORD('|')) {
417 /*
418 * non-subenvironment
419 * value substitution
420 */
421 c = VALSUB;
422 goto subst_command2;
423 } else if (ctype(c, C_IFSWS)) {
424 /*
425 * non-subenvironment
426 * "command" substitution
427 */
428 c = FUNSUB;
429 goto subst_command2;
430 }
431 ungetsc(c);
432 *wp++ = OSUBST;
433 *wp++ = '{' /*}*/;
434 wp = get_brace_var(&ws, wp);
435 c = getsc();
436 /* allow :# and :% (ksh88 compat) */
437 if ((unsigned int)c == ORD(':')) {
438 *wp++ = CHAR;
439 *wp++ = c;
440 c = getsc();
441 if ((unsigned int)c == ORD(':')) {
442 *wp++ = CHAR;
443 *wp++ = '0';
444 *wp++ = ADELIM;
445 *wp++ = ':';
446 PUSH_STATE(SBRACE);
447 /* perhaps unneeded? */
448 statep->ls_flags &= ~LS_HEREDOC;
449 PUSH_STATE(SADELIM);
450 statep->ls_adelim.delimiter = ':';
451 statep->ls_adelim.num = 1;
452 statep->nparen = 0;
453 break;
454 } else if (ctype(c, C_ALNUX | C_DOLAR | C_SPC) ||
455 c == '(' /*)*/) {
456 /* substring subst. */
457 if (c != ' ') {
458 *wp++ = CHAR;
459 *wp++ = ' ';
460 }
461 ungetsc(c);
462 PUSH_STATE(SBRACE);
463 /* perhaps unneeded? */
464 statep->ls_flags &= ~LS_HEREDOC;
465 PUSH_STATE(SADELIM);
466 statep->ls_adelim.delimiter = ':';
467 statep->ls_adelim.num = 2;
468 statep->nparen = 0;
469 break;
470 }
471 } else if (c == '/') {
472 c2 = ADELIM;
473 parse_adelim_slash:
474 *wp++ = CHAR;
475 *wp++ = c;
476 if ((unsigned int)(c = getsc()) == ORD('/')) {
477 *wp++ = c2;
478 *wp++ = c;
479 } else
480 ungetsc(c);
481 PUSH_STATE(SBRACE);
482 /* perhaps unneeded? */
483 statep->ls_flags &= ~LS_HEREDOC;
484 PUSH_STATE(SADELIM);
485 statep->ls_adelim.delimiter = '/';
486 statep->ls_adelim.num = 1;
487 statep->nparen = 0;
488 break;
489 } else if (c == '@') {
490 c2 = getsc();
491 ungetsc(c2);
492 if ((unsigned int)c2 == ORD('/')) {
493 c2 = CHAR;
494 goto parse_adelim_slash;
495 }
496 }
497 /*
498 * If this is a trim operation,
499 * treat (,|,) specially in STBRACE.
500 */
501 if (ctype(c, C_SUB2)) {
502 ungetsc(c);
503 if (Flag(FSH))
504 PUSH_STATE(STBRACEBOURNE);
505 else
506 PUSH_STATE(STBRACEKORN);
507 /* single-quotes-in-heredoc-trim */
508 statep->ls_flags &= ~LS_HEREDOC;
509 } else {
510 ungetsc(c);
511 if (state == SDQUOTE ||
512 state == SQBRACE)
513 PUSH_STATE(SQBRACE);
514 else
515 PUSH_STATE(SBRACE);
516 /* here no LS_HEREDOC removal */
517 /* single-quotes-in-heredoc-braces */
518 }
519 } else if (ctype(c, C_ALPHX)) {
520 *wp++ = OSUBST;
521 *wp++ = 'X';
522 do {
523 Xcheck(ws, wp);
524 *wp++ = c;
525 c = getsc();
526 } while (ctype(c, C_ALNUX));
527 *wp++ = '\0';
528 *wp++ = CSUBST;
529 *wp++ = 'X';
530 ungetsc(c);
531 } else if (ctype(c, C_VAR1 | C_DIGIT)) {
532 Xcheck(ws, wp);
533 *wp++ = OSUBST;
534 *wp++ = 'X';
535 *wp++ = c;
536 *wp++ = '\0';
537 *wp++ = CSUBST;
538 *wp++ = 'X';
539 } else {
540 *wp++ = CHAR;
541 *wp++ = '$';
542 ungetsc(c);
543 }
544 break;
545 case ORD('`'):
546 subst_gravis:
547 PUSH_STATE(SBQUOTE);
548 *wp++ = COMASUB;
549 /*
550 * We need to know whether we are within double
551 * quotes in order to translate \" to " within
552 * "…`…\"…`…" because, unlike for COMSUBs, the
553 * outer double quoteing changes the backslash
554 * meaning for the inside. For more details:
555 * http://austingroupbugs.net/view.php?id=1015
556 */
557 statep->ls_bool = false;
558 s2 = statep;
559 base = state_info.base;
560 while (/* CONSTCOND */ 1) {
561 for (; s2 != base; s2--) {
562 if (s2->type == SDQUOTE) {
563 statep->ls_bool = true;
564 break;
565 }
566 }
567 if (s2 != base)
568 break;
569 if (!(s2 = s2->ls_base))
570 break;
571 base = s2-- - STATE_BSIZE;
572 }
573 break;
574 case QCHAR:
575 if (cf & LQCHAR) {
576 *wp++ = QCHAR;
577 *wp++ = getsc();
578 break;
579 }
580 /* FALLTHROUGH */
581 default:
582 store_char:
583 *wp++ = CHAR;
584 *wp++ = c;
585 }
586 break;
587
588 case SEQUOTE:
589 if ((unsigned int)c == ORD('\'')) {
590 POP_STATE();
591 *wp++ = CQUOTE;
592 ignore_backslash_newline--;
593 } else if ((unsigned int)c == ORD('\\')) {
594 if ((c2 = unbksl(true, getsc_i, ungetsc)) == -1)
595 c2 = getsc();
596 if (c2 == 0)
597 statep->ls_bool = true;
598 if (!statep->ls_bool) {
599 char ts[4];
600
601 if ((unsigned int)c2 < 0x100) {
602 *wp++ = QCHAR;
603 *wp++ = c2;
604 } else {
605 cz = utf_wctomb(ts, c2 - 0x100);
606 ts[cz] = 0;
607 cz = 0;
608 do {
609 *wp++ = QCHAR;
610 *wp++ = ts[cz];
611 } while (ts[++cz]);
612 }
613 }
614 } else if (!statep->ls_bool) {
615 *wp++ = QCHAR;
616 *wp++ = c;
617 }
618 break;
619
620 case SSQUOTE:
621 if ((unsigned int)c == ORD('\'')) {
622 POP_STATE();
623 if ((statep->ls_flags & LS_HEREDOC) ||
624 state == SQBRACE)
625 goto store_char;
626 *wp++ = CQUOTE;
627 ignore_backslash_newline--;
628 } else {
629 *wp++ = QCHAR;
630 *wp++ = c;
631 }
632 break;
633
634 case SDQUOTE:
635 if ((unsigned int)c == ORD('"')) {
636 POP_STATE();
637 *wp++ = CQUOTE;
638 } else
639 goto Subst;
640 break;
641
642 /* $(( ... )) */
643 case SASPAREN:
644 if ((unsigned int)c == ORD('('))
645 statep->nparen++;
646 else if ((unsigned int)c == ORD(')')) {
647 statep->nparen--;
648 if (statep->nparen == 1) {
649 /* end of EXPRSUB */
650 POP_SRETRACE();
651
652 if ((unsigned int)(c2 = getsc()) == ORD(/*(*/ ')')) {
653 cz = strlen(sp) - 2;
654 XcheckN(ws, wp, cz);
655 memcpy(wp, sp + 1, cz);
656 wp += cz;
657 afree(sp, ATEMP);
658 *wp++ = '\0';
659 break;
660 } else {
661 Source *s;
662
663 ungetsc(c2);
664 /*
665 * mismatched parenthesis -
666 * assume we were really
667 * parsing a $(...) expression
668 */
669 --wp;
670 s = pushs(SREREAD,
671 source->areap);
672 s->start = s->str =
673 s->u.freeme = sp;
674 s->next = source;
675 source = s;
676 goto subst_command;
677 }
678 }
679 }
680 /* reuse existing state machine */
681 goto Sbase2;
682
683 case SQBRACE:
684 if ((unsigned int)c == ORD('\\')) {
685 /*
686 * perform POSIX "quote removal" if the back-
687 * slash is "special", i.e. same cases as the
688 * {case '\\':} in Subst: plus closing brace;
689 * in mksh code "quote removal" on '\c' means
690 * write QCHAR+c, otherwise CHAR+\+CHAR+c are
691 * emitted (in heredocquote:)
692 */
693 if ((unsigned int)(c = getsc()) == ORD('"') ||
694 (unsigned int)c == ORD('\\') ||
695 ctype(c, C_DOLAR | C_GRAVE) ||
696 (unsigned int)c == ORD(/*{*/ '}'))
697 goto store_qchar;
698 goto heredocquote;
699 }
700 goto common_SQBRACE;
701
702 case SBRACE:
703 if ((unsigned int)c == ORD('\''))
704 goto open_ssquote_unless_heredoc;
705 else if ((unsigned int)c == ORD('\\'))
706 goto getsc_qchar;
707 common_SQBRACE:
708 if ((unsigned int)c == ORD('"'))
709 goto open_sdquote;
710 else if ((unsigned int)c == ORD('$'))
711 goto subst_dollar_ex;
712 else if ((unsigned int)c == ORD('`'))
713 goto subst_gravis;
714 else if ((unsigned int)c != ORD(/*{*/ '}'))
715 goto store_char;
716 POP_STATE();
717 *wp++ = CSUBST;
718 *wp++ = /*{*/ '}';
719 break;
720
721 /* Same as SBASE, except (,|,) treated specially */
722 case STBRACEKORN:
723 if ((unsigned int)c == ORD('|'))
724 *wp++ = SPAT;
725 else if ((unsigned int)c == ORD('(')) {
726 *wp++ = OPAT;
727 /* simile for @ */
728 *wp++ = ' ';
729 PUSH_STATE(SPATTERN);
730 } else /* FALLTHROUGH */
731 case STBRACEBOURNE:
732 if ((unsigned int)c == ORD(/*{*/ '}')) {
733 POP_STATE();
734 *wp++ = CSUBST;
735 *wp++ = /*{*/ '}';
736 } else
737 goto Sbase1;
738 break;
739
740 case SBQUOTE:
741 if ((unsigned int)c == ORD('`')) {
742 *wp++ = 0;
743 POP_STATE();
744 } else if ((unsigned int)c == ORD('\\')) {
745 switch (c = getsc()) {
746 case 0:
747 /* trailing \ is lost */
748 break;
749 case ORD('$'):
750 case ORD('`'):
751 case ORD('\\'):
752 *wp++ = c;
753 break;
754 case ORD('"'):
755 if (statep->ls_bool) {
756 *wp++ = c;
757 break;
758 }
759 /* FALLTHROUGH */
760 default:
761 *wp++ = '\\';
762 *wp++ = c;
763 break;
764 }
765 } else
766 *wp++ = c;
767 break;
768
769 /* ONEWORD */
770 case SWORD:
771 goto Subst;
772
773 /* LETEXPR: (( ... )) */
774 case SLETPAREN:
775 if ((unsigned int)c == ORD(/*(*/ ')')) {
776 if (statep->nparen > 0)
777 --statep->nparen;
778 else if ((unsigned int)(c2 = getsc()) == ORD(/*(*/ ')')) {
779 c = 0;
780 *wp++ = CQUOTE;
781 goto Done;
782 } else {
783 Source *s;
784
785 ungetsc(c2);
786 ungetsc(c);
787 /*
788 * mismatched parenthesis -
789 * assume we were really
790 * parsing a (...) expression
791 */
792 *wp = EOS;
793 sp = Xstring(ws, wp);
794 dp = wdstrip(sp + 1, WDS_TPUTS);
795 s = pushs(SREREAD, source->areap);
796 s->start = s->str = s->u.freeme = dp;
797 s->next = source;
798 source = s;
799 ungetsc('(' /*)*/);
800 return (ORD('(' /*)*/));
801 }
802 } else if ((unsigned int)c == ORD('('))
803 /*
804 * parentheses inside quotes and
805 * backslashes are lost, but AT&T ksh
806 * doesn't count them either
807 */
808 ++statep->nparen;
809 goto Sbase2;
810
811 /* << or <<- delimiter */
812 case SHEREDELIM:
813 /*
814 * here delimiters need a special case since
815 * $ and `...` are not to be treated specially
816 */
817 switch (c) {
818 case ORD('\\'):
819 if ((c = getsc())) {
820 /* trailing \ is lost */
821 *wp++ = QCHAR;
822 *wp++ = c;
823 }
824 break;
825 case ORD('\''):
826 goto open_ssquote_unless_heredoc;
827 case ORD('$'):
828 if ((unsigned int)(c2 = getsc()) == ORD('\'')) {
829 open_sequote:
830 *wp++ = OQUOTE;
831 ignore_backslash_newline++;
832 PUSH_STATE(SEQUOTE);
833 statep->ls_bool = false;
834 break;
835 } else if ((unsigned int)c2 == ORD('"')) {
836 /* FALLTHROUGH */
837 case ORD('"'):
838 PUSH_SRETRACE(SHEREDQUOTE);
839 break;
840 }
841 ungetsc(c2);
842 /* FALLTHROUGH */
843 default:
844 *wp++ = CHAR;
845 *wp++ = c;
846 }
847 break;
848
849 /* " in << or <<- delimiter */
850 case SHEREDQUOTE:
851 if ((unsigned int)c != ORD('"'))
852 goto Subst;
853 POP_SRETRACE();
854 dp = strnul(sp) - 1;
855 /* remove the trailing double quote */
856 *dp = '\0';
857 /* store the quoted string */
858 *wp++ = OQUOTE;
859 XcheckN(ws, wp, (dp - sp) * 2);
860 dp = sp;
861 while ((c = *dp++)) {
862 if (c == '\\') {
863 switch ((c = *dp++)) {
864 case ORD('\\'):
865 case ORD('"'):
866 case ORD('$'):
867 case ORD('`'):
868 break;
869 default:
870 *wp++ = CHAR;
871 *wp++ = '\\';
872 break;
873 }
874 }
875 *wp++ = CHAR;
876 *wp++ = c;
877 }
878 afree(sp, ATEMP);
879 *wp++ = CQUOTE;
880 state = statep->type = SHEREDELIM;
881 break;
882
883 /* in *(...|...) pattern (*+?@!) */
884 case SPATTERN:
885 if ((unsigned int)c == ORD(/*(*/ ')')) {
886 *wp++ = CPAT;
887 POP_STATE();
888 } else if ((unsigned int)c == ORD('|')) {
889 *wp++ = SPAT;
890 } else if ((unsigned int)c == ORD('(')) {
891 *wp++ = OPAT;
892 /* simile for @ */
893 *wp++ = ' ';
894 PUSH_STATE(SPATTERN);
895 } else
896 goto Sbase1;
897 break;
898 }
899 }
900 Done:
901 Xcheck(ws, wp);
902 if (statep != &states[1])
903 /* XXX figure out what is missing */
904 yyerror("no closing quote");
905
906 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */
907 if (state == SHEREDELIM)
908 state = SBASE;
909
910 dp = Xstring(ws, wp);
911 if (state == SBASE && (
912 (c == '&' && !Flag(FSH) && !Flag(FPOSIX)) ||
913 ctype(c, C_ANGLE)) && ((c2 = Xlength(ws, wp)) == 0 ||
914 (c2 == 2 && dp[0] == CHAR && ctype(dp[1], C_DIGIT)))) {
915 struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
916
917 iop->unit = c2 == 2 ? ksh_numdig(dp[1]) : c == '<' ? 0 : 1;
918
919 if (c == '&') {
920 if ((unsigned int)(c2 = getsc()) != ORD('>')) {
921 ungetsc(c2);
922 goto no_iop;
923 }
924 c = c2;
925 iop->ioflag = IOBASH;
926 } else
927 iop->ioflag = 0;
928
929 c2 = getsc();
930 /* <<, >>, <> are ok, >< is not */
931 if (c == c2 || ((unsigned int)c == ORD('<') &&
932 (unsigned int)c2 == ORD('>'))) {
933 iop->ioflag |= c == c2 ?
934 ((unsigned int)c == ORD('>') ? IOCAT : IOHERE) : IORDWR;
935 if (iop->ioflag == IOHERE) {
936 if ((unsigned int)(c2 = getsc()) == ORD('-'))
937 iop->ioflag |= IOSKIP;
938 else if ((unsigned int)c2 == ORD('<'))
939 iop->ioflag |= IOHERESTR;
940 else
941 ungetsc(c2);
942 }
943 } else if ((unsigned int)c2 == ORD('&'))
944 iop->ioflag |= IODUP | ((unsigned int)c == ORD('<') ? IORDUP : 0);
945 else {
946 iop->ioflag |= (unsigned int)c == ORD('>') ? IOWRITE : IOREAD;
947 if ((unsigned int)c == ORD('>') && (unsigned int)c2 == ORD('|'))
948 iop->ioflag |= IOCLOB;
949 else
950 ungetsc(c2);
951 }
952
953 iop->ioname = NULL;
954 iop->delim = NULL;
955 iop->heredoc = NULL;
956 /* free word */
957 Xfree(ws, wp);
958 yylval.iop = iop;
959 return (REDIR);
960 no_iop:
961 afree(iop, ATEMP);
962 }
963
964 if (wp == dp && state == SBASE) {
965 /* free word */
966 Xfree(ws, wp);
967 /* no word, process LEX1 character */
968 if (((unsigned int)c == ORD('|')) ||
969 ((unsigned int)c == ORD('&')) ||
970 ((unsigned int)c == ORD(';')) ||
971 ((unsigned int)c == ORD('(' /*)*/))) {
972 if ((c2 = getsc()) == c)
973 c = ((unsigned int)c == ORD(';')) ? BREAK :
974 ((unsigned int)c == ORD('|')) ? LOGOR :
975 ((unsigned int)c == ORD('&')) ? LOGAND :
976 /* (unsigned int)c == ORD('(' )) */ MDPAREN;
977 else if ((unsigned int)c == ORD('|') && (unsigned int)c2 == ORD('&'))
978 c = COPROC;
979 else if ((unsigned int)c == ORD(';') && (unsigned int)c2 == ORD('|'))
980 c = BRKEV;
981 else if ((unsigned int)c == ORD(';') && (unsigned int)c2 == ORD('&'))
982 c = BRKFT;
983 else
984 ungetsc(c2);
985 #ifndef MKSH_SMALL
986 if (c == BREAK) {
987 if ((unsigned int)(c2 = getsc()) == ORD('&'))
988 c = BRKEV;
989 else
990 ungetsc(c2);
991 }
992 #endif
993 } else if ((unsigned int)c == ORD('\n')) {
994 if (cf & HEREDELIM)
995 ungetsc(c);
996 else {
997 gethere();
998 if (cf & CONTIN)
999 goto Again;
1000 }
1001 } else if (c == '\0' && !(cf & HEREDELIM)) {
1002 struct ioword **p = heres;
1003
1004 while (p < herep)
1005 if ((*p)->ioflag & IOHERESTR)
1006 ++p;
1007 else
1008 /* ksh -c 'cat <<EOF' can cause this */
1009 yyerror(Tf_heredoc,
1010 evalstr((*p)->delim, 0));
1011 }
1012 return (c);
1013 }
1014
1015 /* terminate word */
1016 *wp++ = EOS;
1017 yylval.cp = Xclose(ws, wp);
1018 if (state == SWORD || state == SLETPAREN
1019 /* XXX ONEWORD? */)
1020 return (LWORD);
1021
1022 /* unget terminator */
1023 ungetsc(c);
1024
1025 /*
1026 * note: the alias-vs-function code below depends on several
1027 * interna: starting from here, source->str is not modified;
1028 * the way getsc() and ungetsc() operate; etc.
1029 */
1030
1031 /* copy word to unprefixed string ident */
1032 sp = yylval.cp;
1033 dp = ident;
1034 while ((dp - ident) < IDENT && (c = *sp++) == CHAR)
1035 *dp++ = *sp++;
1036 if (c != EOS)
1037 /* word is not unquoted, or space ran out */
1038 dp = ident;
1039 /* make sure the ident array stays NUL padded */
1040 memset(dp, 0, (ident + IDENT) - dp + 1);
1041
1042 if (*ident != '\0' && (cf & (KEYWORD | ALIAS))) {
1043 struct tbl *p;
1044 uint32_t h = hash(ident);
1045
1046 if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
1047 (!(cf & ESACONLY) || p->val.i == ESAC ||
1048 (unsigned int)p->val.i == ORD(/*{*/ '}'))) {
1049 afree(yylval.cp, ATEMP);
1050 return (p->val.i);
1051 }
1052 if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
1053 (p->flag & ISSET)) {
1054 /*
1055 * this still points to the same character as the
1056 * ungetsc'd terminator from above
1057 */
1058 const char *cp = source->str;
1059
1060 /* prefer POSIX but not Korn functions over aliases */
1061 while (ctype(*cp, C_BLANK))
1062 /*
1063 * this is like getsc() without skipping
1064 * over Source boundaries (including not
1065 * parsing ungetsc'd characters that got
1066 * pushed into an SREREAD) which is what
1067 * we want here anyway: find out whether
1068 * the alias name is followed by a POSIX
1069 * function definition
1070 */
1071 ++cp;
1072 /* prefer functions over aliases */
1073 if (cp[0] != '(' || cp[1] != ')') {
1074 Source *s = source;
1075
1076 while (s && (s->flags & SF_HASALIAS))
1077 if (s->u.tblp == p)
1078 return (LWORD);
1079 else
1080 s = s->next;
1081 /* push alias expansion */
1082 s = pushs(SALIAS, source->areap);
1083 s->start = s->str = p->val.s;
1084 s->u.tblp = p;
1085 s->flags |= SF_HASALIAS;
1086 s->line = source->line;
1087 s->next = source;
1088 if (source->type == SEOF) {
1089 /* prevent infinite recursion at EOS */
1090 source->u.tblp = p;
1091 source->flags |= SF_HASALIAS;
1092 }
1093 source = s;
1094 afree(yylval.cp, ATEMP);
1095 goto Again;
1096 }
1097 }
1098 } else if (*ident == '\0') {
1099 /* retain typeset et al. even when quoted */
1100 struct tbl *tt = get_builtin((dp = wdstrip(yylval.cp, 0)));
1101 uint32_t flag = tt ? tt->flag : 0;
1102
1103 if (flag & (DECL_UTIL | DECL_FWDR))
1104 strlcpy(ident, dp, sizeof(ident));
1105 afree(dp, ATEMP);
1106 }
1107
1108 return (LWORD);
1109 }
1110
1111 static void
gethere(void)1112 gethere(void)
1113 {
1114 struct ioword **p;
1115
1116 for (p = heres; p < herep; p++)
1117 if (!((*p)->ioflag & IOHERESTR))
1118 readhere(*p);
1119 herep = heres;
1120 }
1121
1122 /*
1123 * read "<<word" text into temp file
1124 */
1125
1126 static void
readhere(struct ioword * iop)1127 readhere(struct ioword *iop)
1128 {
1129 int c;
1130 const char *eof, *eofp;
1131 XString xs;
1132 char *xp;
1133 size_t xpos;
1134
1135 eof = evalstr(iop->delim, 0);
1136
1137 if (!(iop->ioflag & IOEVAL))
1138 ignore_backslash_newline++;
1139
1140 Xinit(xs, xp, 256, ATEMP);
1141
1142 heredoc_read_line:
1143 /* beginning of line */
1144 eofp = eof;
1145 xpos = Xsavepos(xs, xp);
1146 if (iop->ioflag & IOSKIP) {
1147 /* skip over leading tabs */
1148 while ((c = getsc()) == '\t')
1149 ; /* nothing */
1150 goto heredoc_parse_char;
1151 }
1152 heredoc_read_char:
1153 c = getsc();
1154 heredoc_parse_char:
1155 /* compare with here document marker */
1156 if (!*eofp) {
1157 /* end of here document marker, what to do? */
1158 switch (c) {
1159 case ORD(/*(*/ ')'):
1160 if (!subshell_nesting_type)
1161 /*-
1162 * not allowed outside $(...) or (...)
1163 * => mismatch
1164 */
1165 break;
1166 /* allow $(...) or (...) to close here */
1167 ungetsc(/*(*/ ')');
1168 /* FALLTHROUGH */
1169 case 0:
1170 /*
1171 * Allow EOF here to commands without trailing
1172 * newlines (mksh -c '...') will work as well.
1173 */
1174 case ORD('\n'):
1175 /* Newline terminates here document marker */
1176 goto heredoc_found_terminator;
1177 }
1178 } else if ((unsigned int)c == ord(*eofp++))
1179 /* store; then read and compare next character */
1180 goto heredoc_store_and_loop;
1181 /* nope, mismatch; read until end of line */
1182 while (c != '\n') {
1183 if (!c)
1184 /* oops, reached EOF */
1185 yyerror(Tf_heredoc, eof);
1186 /* store character */
1187 Xcheck(xs, xp);
1188 Xput(xs, xp, c);
1189 /* read next character */
1190 c = getsc();
1191 }
1192 /* we read a newline as last character */
1193 heredoc_store_and_loop:
1194 /* store character */
1195 Xcheck(xs, xp);
1196 Xput(xs, xp, c);
1197 if (c == '\n')
1198 goto heredoc_read_line;
1199 goto heredoc_read_char;
1200
1201 heredoc_found_terminator:
1202 /* jump back to saved beginning of line */
1203 xp = Xrestpos(xs, xp, xpos);
1204 /* terminate, close and store */
1205 Xput(xs, xp, '\0');
1206 iop->heredoc = Xclose(xs, xp);
1207
1208 if (!(iop->ioflag & IOEVAL))
1209 ignore_backslash_newline--;
1210 }
1211
1212 void
yyerror(const char * fmt,...)1213 yyerror(const char *fmt, ...)
1214 {
1215 va_list va;
1216
1217 /* pop aliases and re-reads */
1218 while (source->type == SALIAS || source->type == SREREAD)
1219 source = source->next;
1220 /* zap pending input */
1221 source->str = null;
1222
1223 error_prefix(true);
1224 va_start(va, fmt);
1225 shf_vfprintf(shl_out, fmt, va);
1226 shf_putc('\n', shl_out);
1227 va_end(va);
1228 errorfz();
1229 }
1230
1231 /*
1232 * input for yylex with alias expansion
1233 */
1234
1235 Source *
pushs(int type,Area * areap)1236 pushs(int type, Area *areap)
1237 {
1238 Source *s;
1239
1240 s = alloc(sizeof(Source), areap);
1241 memset(s, 0, sizeof(Source));
1242 s->type = type;
1243 s->str = null;
1244 s->areap = areap;
1245 if (type == SFILE || type == SSTDIN)
1246 XinitN(s->xs, 256, s->areap);
1247 return (s);
1248 }
1249
1250 static int
getsc_uu(void)1251 getsc_uu(void)
1252 {
1253 Source *s = source;
1254 int c;
1255
1256 while ((c = ord(*s->str++)) == 0) {
1257 /* return 0 for EOF by default */
1258 s->str = NULL;
1259 switch (s->type) {
1260 case SEOF:
1261 s->str = null;
1262 return (0);
1263
1264 case SSTDIN:
1265 case SFILE:
1266 getsc_line(s);
1267 break;
1268
1269 case SWSTR:
1270 break;
1271
1272 case SSTRING:
1273 case SSTRINGCMDLINE:
1274 break;
1275
1276 case SWORDS:
1277 s->start = s->str = *s->u.strv++;
1278 s->type = SWORDSEP;
1279 break;
1280
1281 case SWORDSEP:
1282 if (*s->u.strv == NULL) {
1283 s->start = s->str = "\n";
1284 s->type = SEOF;
1285 } else {
1286 s->start = s->str = T1space;
1287 s->type = SWORDS;
1288 }
1289 break;
1290
1291 case SALIAS:
1292 if (s->flags & SF_ALIASEND) {
1293 /* pass on an unused SF_ALIAS flag */
1294 source = s->next;
1295 source->flags |= s->flags & SF_ALIAS;
1296 s = source;
1297 } else if (*s->u.tblp->val.s &&
1298 ctype((c = strnul(s->u.tblp->val.s)[-1]), C_SPACE)) {
1299 /* pop source stack */
1300 source = s = s->next;
1301 /*
1302 * Note that this alias ended with a
1303 * space, enabling alias expansion on
1304 * the following word.
1305 */
1306 s->flags |= SF_ALIAS;
1307 } else {
1308 /*
1309 * At this point, we need to keep the current
1310 * alias in the source list so recursive
1311 * aliases can be detected and we also need to
1312 * return the next character. Do this by
1313 * temporarily popping the alias to get the
1314 * next character and then put it back in the
1315 * source list with the SF_ALIASEND flag set.
1316 */
1317 /* pop source stack */
1318 source = s->next;
1319 source->flags |= s->flags & SF_ALIAS;
1320 c = getsc_uu();
1321 if (c) {
1322 s->flags |= SF_ALIASEND;
1323 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1324 s->start = s->str = s->ugbuf;
1325 s->next = source;
1326 source = s;
1327 } else {
1328 s = source;
1329 /* avoid reading EOF twice */
1330 s->str = NULL;
1331 break;
1332 }
1333 }
1334 continue;
1335
1336 case SREREAD:
1337 if (s->start != s->ugbuf)
1338 /* yuck */
1339 afree(s->u.freeme, ATEMP);
1340 source = s = s->next;
1341 continue;
1342 }
1343 if (s->str == NULL) {
1344 s->type = SEOF;
1345 s->start = s->str = null;
1346 return ('\0');
1347 }
1348 if (s->flags & SF_ECHO) {
1349 shf_puts(s->str, shl_out);
1350 shf_flush(shl_out);
1351 }
1352 }
1353 return (c);
1354 }
1355
1356 static void
getsc_line(Source * s)1357 getsc_line(Source *s)
1358 {
1359 char *xp = Xstring(s->xs, xp), *cp;
1360 bool interactive = Flag(FTALKING) && s->type == SSTDIN;
1361 bool have_tty = interactive && (s->flags & SF_TTY) && tty_hasstate;
1362
1363 /* Done here to ensure nothing odd happens when a timeout occurs */
1364 XcheckN(s->xs, xp, LINE);
1365 *xp = '\0';
1366 s->start = s->str = xp;
1367
1368 if (have_tty && ksh_tmout) {
1369 ksh_tmout_state = TMOUT_READING;
1370 alarm(ksh_tmout);
1371 }
1372 if (interactive) {
1373 if (cur_prompt == PS1)
1374 histsave(&s->line, NULL, HIST_FLUSH, true);
1375 change_winsz();
1376 }
1377 #ifndef MKSH_NO_CMDLINE_EDITING
1378 if (have_tty && (
1379 #if !MKSH_S_NOVI
1380 Flag(FVI) ||
1381 #endif
1382 Flag(FEMACS) || Flag(FGMACS))) {
1383 int nread;
1384
1385 nread = x_read(xp);
1386 if (nread < 0)
1387 /* read error */
1388 nread = 0;
1389 xp[nread] = '\0';
1390 xp += nread;
1391 } else
1392 #endif
1393 {
1394 if (interactive)
1395 pprompt(prompt, 0);
1396 else
1397 s->line++;
1398
1399 while (/* CONSTCOND */ 1) {
1400 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1401
1402 if (!p && shf_error(s->u.shf) &&
1403 shf_errno(s->u.shf) == EINTR) {
1404 shf_clearerr(s->u.shf);
1405 if (trap)
1406 runtraps(0);
1407 continue;
1408 }
1409 if (!p || (xp = p, xp[-1] == '\n'))
1410 break;
1411 /* double buffer size */
1412 /* move past NUL so doubling works... */
1413 xp++;
1414 XcheckN(s->xs, xp, Xlength(s->xs, xp));
1415 /* ...and move back again */
1416 xp--;
1417 }
1418 /*
1419 * flush any unwanted input so other programs/builtins
1420 * can read it. Not very optimal, but less error prone
1421 * than flushing else where, dealing with redirections,
1422 * etc.
1423 * TODO: reduce size of shf buffer (~128?) if SSTDIN
1424 */
1425 if (s->type == SSTDIN)
1426 shf_flush(s->u.shf);
1427 }
1428 /*
1429 * XXX: temporary kludge to restore source after a
1430 * trap may have been executed.
1431 */
1432 source = s;
1433 if (have_tty && ksh_tmout) {
1434 ksh_tmout_state = TMOUT_EXECUTING;
1435 alarm(0);
1436 }
1437 cp = Xstring(s->xs, xp);
1438 rndpush(cp);
1439 s->start = s->str = cp;
1440 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1441 /* Note: if input is all nulls, this is not eof */
1442 if (Xlength(s->xs, xp) == 0) {
1443 /* EOF */
1444 if (s->type == SFILE)
1445 shf_fdclose(s->u.shf);
1446 s->str = NULL;
1447 } else if (interactive && *s->str) {
1448 if (cur_prompt != PS1)
1449 histsave(&s->line, s->str, HIST_APPEND, true);
1450 else if (!ctype(*s->str, C_IFS | C_IFSWS))
1451 histsave(&s->line, s->str, HIST_QUEUE, true);
1452 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
1453 else
1454 goto check_for_sole_return;
1455 } else if (interactive && cur_prompt == PS1) {
1456 check_for_sole_return:
1457 cp = Xstring(s->xs, xp);
1458 while (ctype(*cp, C_IFSWS))
1459 ++cp;
1460 if (!*cp) {
1461 histsave(&s->line, NULL, HIST_FLUSH, true);
1462 histsync();
1463 }
1464 #endif
1465 }
1466 if (interactive)
1467 set_prompt(PS2, NULL);
1468 }
1469
1470 void
set_prompt(int to,Source * s)1471 set_prompt(int to, Source *s)
1472 {
1473 cur_prompt = (uint8_t)to;
1474
1475 switch (to) {
1476 /* command */
1477 case PS1:
1478 /*
1479 * Substitute ! and !! here, before substitutions are done
1480 * so ! in expanded variables are not expanded.
1481 * NOTE: this is not what AT&T ksh does (it does it after
1482 * substitutions, POSIX doesn't say which is to be done.
1483 */
1484 {
1485 struct shf *shf;
1486 char * volatile ps1;
1487 Area *saved_atemp;
1488 int saved_lineno;
1489
1490 ps1 = str_val(global("PS1"));
1491 shf = shf_sopen(NULL, strlen(ps1) * 2,
1492 SHF_WR | SHF_DYNAMIC, NULL);
1493 while (*ps1)
1494 if (*ps1 != '!' || *++ps1 == '!')
1495 shf_putchar(*ps1++, shf);
1496 else
1497 shf_fprintf(shf, Tf_lu, s ?
1498 (unsigned long)s->line + 1 : 0UL);
1499 ps1 = shf_sclose(shf);
1500 saved_lineno = current_lineno;
1501 if (s)
1502 current_lineno = s->line + 1;
1503 saved_atemp = ATEMP;
1504 newenv(E_ERRH);
1505 if (kshsetjmp(e->jbuf)) {
1506 prompt = safe_prompt;
1507 /*
1508 * Don't print an error - assume it has already
1509 * been printed. Reason is we may have forked
1510 * to run a command and the child may be
1511 * unwinding its stack through this code as it
1512 * exits.
1513 */
1514 } else {
1515 char *cp = substitute(ps1, 0);
1516 strdupx(prompt, cp, saved_atemp);
1517 }
1518 current_lineno = saved_lineno;
1519 quitenv(NULL);
1520 }
1521 break;
1522 /* command continuation */
1523 case PS2:
1524 prompt = str_val(global("PS2"));
1525 break;
1526 }
1527 }
1528
1529 int
pprompt(const char * cp,int ntruncate)1530 pprompt(const char *cp, int ntruncate)
1531 {
1532 char delimiter = 0;
1533 bool doprint = (ntruncate != -1);
1534 bool indelimit = false;
1535 int columns = 0, lines = 0;
1536
1537 /*
1538 * Undocumented AT&T ksh feature:
1539 * If the second char in the prompt string is \r then the first
1540 * char is taken to be a non-printing delimiter and any chars
1541 * between two instances of the delimiter are not considered to
1542 * be part of the prompt length
1543 */
1544 if (*cp && cp[1] == '\r') {
1545 delimiter = *cp;
1546 cp += 2;
1547 }
1548 for (; *cp; cp++) {
1549 if (indelimit && *cp != delimiter)
1550 ;
1551 else if (ctype(*cp, C_CR | C_LF)) {
1552 lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
1553 columns = 0;
1554 } else if (*cp == '\t') {
1555 columns = (columns | 7) + 1;
1556 } else if (*cp == '\b') {
1557 if (columns > 0)
1558 columns--;
1559 } else if (*cp == delimiter)
1560 indelimit = !indelimit;
1561 else if (UTFMODE && (rtt2asc(*cp) > 0x7F)) {
1562 const char *cp2;
1563 columns += utf_widthadj(cp, &cp2);
1564 if (doprint && (indelimit ||
1565 (ntruncate < (x_cols * lines + columns))))
1566 shf_write(cp, cp2 - cp, shl_out);
1567 cp = cp2 - /* loop increment */ 1;
1568 continue;
1569 } else
1570 columns++;
1571 if (doprint && (*cp != delimiter) &&
1572 (indelimit || (ntruncate < (x_cols * lines + columns))))
1573 shf_putc(*cp, shl_out);
1574 }
1575 if (doprint)
1576 shf_flush(shl_out);
1577 return (x_cols * lines + columns);
1578 }
1579
1580 /*
1581 * Read the variable part of a ${...} expression (i.e. up to but not
1582 * including the :[-+?=#%] or close-brace).
1583 */
1584 static char *
get_brace_var(XString * wsp,char * wp)1585 get_brace_var(XString *wsp, char *wp)
1586 {
1587 char c;
1588 enum parse_state {
1589 PS_INITIAL, PS_SAW_PERCENT, PS_SAW_HASH, PS_SAW_BANG,
1590 PS_IDENT, PS_NUMBER, PS_VAR1
1591 } state = PS_INITIAL;
1592
1593 while (/* CONSTCOND */ 1) {
1594 c = getsc();
1595 /* State machine to figure out where the variable part ends. */
1596 switch (state) {
1597 case PS_SAW_HASH:
1598 if (ctype(c, C_VAR1)) {
1599 char c2;
1600
1601 c2 = getsc();
1602 ungetsc(c2);
1603 if (ord(c2) != ORD(/*{*/ '}')) {
1604 ungetsc(c);
1605 goto out;
1606 }
1607 }
1608 goto ps_common;
1609 case PS_SAW_BANG:
1610 switch (ord(c)) {
1611 case ORD('@'):
1612 case ORD('#'):
1613 case ORD('-'):
1614 case ORD('?'):
1615 goto out;
1616 }
1617 goto ps_common;
1618 case PS_INITIAL:
1619 switch (ord(c)) {
1620 case ORD('%'):
1621 state = PS_SAW_PERCENT;
1622 goto next;
1623 case ORD('#'):
1624 state = PS_SAW_HASH;
1625 goto next;
1626 case ORD('!'):
1627 state = PS_SAW_BANG;
1628 goto next;
1629 }
1630 /* FALLTHROUGH */
1631 case PS_SAW_PERCENT:
1632 ps_common:
1633 if (ctype(c, C_ALPHX))
1634 state = PS_IDENT;
1635 else if (ctype(c, C_DIGIT))
1636 state = PS_NUMBER;
1637 else if (ctype(c, C_VAR1))
1638 state = PS_VAR1;
1639 else
1640 goto out;
1641 break;
1642 case PS_IDENT:
1643 if (!ctype(c, C_ALNUX)) {
1644 if (ord(c) == ORD('[')) {
1645 char *tmp, *p;
1646
1647 if (!arraysub(&tmp))
1648 yyerror("missing ]");
1649 *wp++ = c;
1650 p = tmp;
1651 while (*p) {
1652 Xcheck(*wsp, wp);
1653 *wp++ = *p++;
1654 }
1655 afree(tmp, ATEMP);
1656 /* the ] */
1657 c = getsc();
1658 }
1659 goto out;
1660 }
1661 next:
1662 break;
1663 case PS_NUMBER:
1664 if (!ctype(c, C_DIGIT))
1665 goto out;
1666 break;
1667 case PS_VAR1:
1668 goto out;
1669 }
1670 Xcheck(*wsp, wp);
1671 *wp++ = c;
1672 }
1673 out:
1674 /* end of variable part */
1675 *wp++ = '\0';
1676 ungetsc(c);
1677 return (wp);
1678 }
1679
1680 /*
1681 * Save an array subscript - returns true if matching bracket found, false
1682 * if eof or newline was found.
1683 * (Returned string double null terminated)
1684 */
1685 static bool
arraysub(char ** strp)1686 arraysub(char **strp)
1687 {
1688 XString ws;
1689 char *wp, c;
1690 /* we are just past the initial [ */
1691 unsigned int depth = 1;
1692
1693 Xinit(ws, wp, 32, ATEMP);
1694
1695 do {
1696 c = getsc();
1697 Xcheck(ws, wp);
1698 *wp++ = c;
1699 if (ord(c) == ORD('['))
1700 depth++;
1701 else if (ord(c) == ORD(']'))
1702 depth--;
1703 } while (depth > 0 && c && c != '\n');
1704
1705 *wp++ = '\0';
1706 *strp = Xclose(ws, wp);
1707
1708 return (tobool(depth == 0));
1709 }
1710
1711 /* Unget a char: handles case when we are already at the start of the buffer */
1712 static void
ungetsc(int c)1713 ungetsc(int c)
1714 {
1715 struct sretrace_info *rp = retrace_info;
1716
1717 if (backslash_skip)
1718 backslash_skip--;
1719 /* Don't unget EOF... */
1720 if (source->str == null && c == '\0')
1721 return;
1722 while (rp) {
1723 if (Xlength(rp->xs, rp->xp))
1724 rp->xp--;
1725 rp = rp->next;
1726 }
1727 ungetsc_i(c);
1728 }
1729 static void
ungetsc_i(int c)1730 ungetsc_i(int c)
1731 {
1732 if (source->str > source->start)
1733 source->str--;
1734 else {
1735 Source *s;
1736
1737 s = pushs(SREREAD, source->areap);
1738 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1739 s->start = s->str = s->ugbuf;
1740 s->next = source;
1741 source = s;
1742 }
1743 }
1744
1745
1746 /* Called to get a char that isn't a \newline sequence. */
1747 static int
getsc_bn(void)1748 getsc_bn(void)
1749 {
1750 int c, c2;
1751
1752 if (ignore_backslash_newline)
1753 return (o_getsc_u());
1754
1755 if (backslash_skip == 1) {
1756 backslash_skip = 2;
1757 return (o_getsc_u());
1758 }
1759
1760 backslash_skip = 0;
1761
1762 while (/* CONSTCOND */ 1) {
1763 c = o_getsc_u();
1764 if (c == '\\') {
1765 if ((c2 = o_getsc_u()) == '\n')
1766 /* ignore the \newline; get the next char... */
1767 continue;
1768 ungetsc_i(c2);
1769 backslash_skip = 1;
1770 }
1771 return (c);
1772 }
1773 }
1774
1775 void
yyskiputf8bom(void)1776 yyskiputf8bom(void)
1777 {
1778 int c;
1779
1780 if (rtt2asc((c = o_getsc_u())) != 0xEF) {
1781 ungetsc_i(c);
1782 return;
1783 }
1784 if (rtt2asc((c = o_getsc_u())) != 0xBB) {
1785 ungetsc_i(c);
1786 ungetsc_i(asc2rtt(0xEF));
1787 return;
1788 }
1789 if (rtt2asc((c = o_getsc_u())) != 0xBF) {
1790 ungetsc_i(c);
1791 ungetsc_i(asc2rtt(0xBB));
1792 ungetsc_i(asc2rtt(0xEF));
1793 return;
1794 }
1795 UTFMODE |= 8;
1796 }
1797
1798 static Lex_state *
push_state_i(State_info * si,Lex_state * old_end)1799 push_state_i(State_info *si, Lex_state *old_end)
1800 {
1801 Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP);
1802
1803 news[0].ls_base = old_end;
1804 si->base = &news[0];
1805 si->end = &news[STATE_BSIZE];
1806 return (&news[1]);
1807 }
1808
1809 static Lex_state *
pop_state_i(State_info * si,Lex_state * old_end)1810 pop_state_i(State_info *si, Lex_state *old_end)
1811 {
1812 Lex_state *old_base = si->base;
1813
1814 si->base = old_end->ls_base - STATE_BSIZE;
1815 si->end = old_end->ls_base;
1816
1817 afree(old_base, ATEMP);
1818
1819 return (si->base + STATE_BSIZE - 1);
1820 }
1821