1 /* $OpenBSD: lex.c,v 1.51 2015/09/10 22:48:58 nicm Exp $ */
2
3 /*-
4 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 * 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018
6 * mirabilos <m@mirbsd.org>
7 *
8 * Provided that these terms and disclaimer and all copyright notices
9 * are retained or reproduced in an accompanying document, permission
10 * is granted to deal in this work without restriction, including un-
11 * limited rights to use, publicly perform, distribute, sell, modify,
12 * merge, give away, or sublicence.
13 *
14 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
15 * the utmost extent permitted by applicable law, neither express nor
16 * implied; without malicious intent or gross negligence. In no event
17 * may a licensor, author or contributor be held liable for indirect,
18 * direct, other damage, loss, or other issues arising in any way out
19 * of dealing in the work, even if advised of the possibility of such
20 * damage or existence of a defect, except proven that it results out
21 * of said person's immediate fault when using the work as intended.
22 */
23
24 #include "sh.h"
25
26 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.247 2018/01/14 01:44:01 tg Exp $");
27
28 /*
29 * states while lexing word
30 */
31 #define SBASE 0 /* outside any lexical constructs */
32 #define SWORD 1 /* implicit quoting for substitute() */
33 #define SLETPAREN 2 /* inside (( )), implicit quoting */
34 #define SSQUOTE 3 /* inside '' */
35 #define SDQUOTE 4 /* inside "" */
36 #define SEQUOTE 5 /* inside $'' */
37 #define SBRACE 6 /* inside ${} */
38 #define SQBRACE 7 /* inside "${}" */
39 #define SBQUOTE 8 /* inside `` */
40 #define SASPAREN 9 /* inside $(( )) */
41 #define SHEREDELIM 10 /* parsing << or <<- delimiter */
42 #define SHEREDQUOTE 11 /* parsing " in << or <<- delimiter */
43 #define SPATTERN 12 /* parsing *(...|...) pattern (*+?@!) */
44 #define SADELIM 13 /* like SBASE, looking for delimiter */
45 #define STBRACEKORN 14 /* parsing ${...[#%]...} !FSH */
46 #define STBRACEBOURNE 15 /* parsing ${...[#%]...} FSH */
47 #define SINVALID 255 /* invalid state */
48
49 struct sretrace_info {
50 struct sretrace_info *next;
51 XString xs;
52 char *xp;
53 };
54
55 /*
56 * Structure to keep track of the lexing state and the various pieces of info
57 * needed for each particular state.
58 */
59 typedef struct lex_state {
60 union {
61 /* point to the next state block */
62 struct lex_state *base;
63 /* marks start of state output in output string */
64 size_t start;
65 /* SBQUOTE: true if in double quotes: "`...`" */
66 /* SEQUOTE: got NUL, ignore rest of string */
67 bool abool;
68 /* SADELIM information */
69 struct {
70 /* character to search for */
71 unsigned char delimiter;
72 /* max. number of delimiters */
73 unsigned char num;
74 } adelim;
75 } u;
76 /* count open parentheses */
77 short nparen;
78 /* type of this state */
79 uint8_t type;
80 } Lex_state;
81 #define ls_base u.base
82 #define ls_start u.start
83 #define ls_bool u.abool
84 #define ls_adelim u.adelim
85
86 typedef struct {
87 Lex_state *base;
88 Lex_state *end;
89 } State_info;
90
91 static void readhere(struct ioword *);
92 static void ungetsc(int);
93 static void ungetsc_i(int);
94 static int getsc_uu(void);
95 static void getsc_line(Source *);
96 static int getsc_bn(void);
97 static int getsc_i(void);
98 static char *get_brace_var(XString *, char *);
99 static bool arraysub(char **);
100 static void gethere(void);
101 static Lex_state *push_state_i(State_info *, Lex_state *);
102 static Lex_state *pop_state_i(State_info *, Lex_state *);
103
104 static int backslash_skip;
105 static int ignore_backslash_newline;
106
107 /* optimised getsc_bn() */
108 #define o_getsc() (*source->str != '\0' && *source->str != '\\' && \
109 !backslash_skip ? *source->str++ : getsc_bn())
110 /* optimised getsc_uu() */
111 #define o_getsc_u() ((*source->str != '\0') ? *source->str++ : getsc_uu())
112
113 /* retrace helper */
114 #define o_getsc_r(carg) \
115 int cev = (carg); \
116 struct sretrace_info *rp = retrace_info; \
117 \
118 while (rp) { \
119 Xcheck(rp->xs, rp->xp); \
120 *rp->xp++ = cev; \
121 rp = rp->next; \
122 } \
123 \
124 return (cev);
125
126 /* callback */
127 static int
getsc_i(void)128 getsc_i(void)
129 {
130 o_getsc_r((unsigned int)(unsigned char)o_getsc());
131 }
132
133 #if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST)
134 #define getsc() getsc_i()
135 #else
136 static int getsc_r(int);
137
138 static int
getsc_r(int c)139 getsc_r(int c)
140 {
141 o_getsc_r(c);
142 }
143
144 #define getsc() getsc_r((unsigned int)(unsigned char)o_getsc())
145 #endif
146
147 #define STATE_BSIZE 8
148
149 #define PUSH_STATE(s) do { \
150 if (++statep == state_info.end) \
151 statep = push_state_i(&state_info, statep); \
152 state = statep->type = (s); \
153 } while (/* CONSTCOND */ 0)
154
155 #define POP_STATE() do { \
156 if (--statep == state_info.base) \
157 statep = pop_state_i(&state_info, statep); \
158 state = statep->type; \
159 } while (/* CONSTCOND */ 0)
160
161 #define PUSH_SRETRACE(s) do { \
162 struct sretrace_info *ri; \
163 \
164 PUSH_STATE(s); \
165 statep->ls_start = Xsavepos(ws, wp); \
166 ri = alloc(sizeof(struct sretrace_info), ATEMP); \
167 Xinit(ri->xs, ri->xp, 64, ATEMP); \
168 ri->next = retrace_info; \
169 retrace_info = ri; \
170 } while (/* CONSTCOND */ 0)
171
172 #define POP_SRETRACE() do { \
173 wp = Xrestpos(ws, wp, statep->ls_start); \
174 *retrace_info->xp = '\0'; \
175 sp = Xstring(retrace_info->xs, retrace_info->xp); \
176 dp = (void *)retrace_info; \
177 retrace_info = retrace_info->next; \
178 afree(dp, ATEMP); \
179 POP_STATE(); \
180 } while (/* CONSTCOND */ 0)
181
182 /**
183 * Lexical analyser
184 *
185 * tokens are not regular expressions, they are LL(1).
186 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
187 * hence the state stack. Note "$(...)" are now parsed recursively.
188 */
189
190 int
yylex(int cf)191 yylex(int cf)
192 {
193 Lex_state states[STATE_BSIZE], *statep, *s2, *base;
194 State_info state_info;
195 int c, c2, state;
196 size_t cz;
197 XString ws; /* expandable output word */
198 char *wp; /* output word pointer */
199 char *sp, *dp;
200
201 Again:
202 states[0].type = SINVALID;
203 states[0].ls_base = NULL;
204 statep = &states[1];
205 state_info.base = states;
206 state_info.end = &state_info.base[STATE_BSIZE];
207
208 Xinit(ws, wp, 64, ATEMP);
209
210 backslash_skip = 0;
211 ignore_backslash_newline = 0;
212
213 if (cf & ONEWORD)
214 state = SWORD;
215 else if (cf & LETEXPR) {
216 /* enclose arguments in (double) quotes */
217 *wp++ = OQUOTE;
218 state = SLETPAREN;
219 statep->nparen = 0;
220 } else {
221 /* normal lexing */
222 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
223 do {
224 c = getsc();
225 } while (ctype(c, C_BLANK));
226 if (c == '#') {
227 ignore_backslash_newline++;
228 do {
229 c = getsc();
230 } while (!ctype(c, C_NUL | C_LF));
231 ignore_backslash_newline--;
232 }
233 ungetsc(c);
234 }
235 if (source->flags & SF_ALIAS) {
236 /* trailing ' ' in alias definition */
237 source->flags &= ~SF_ALIAS;
238 /* POSIX: trailing space only counts if parsing simple cmd */
239 if (!Flag(FPOSIX) || (cf & CMDWORD))
240 cf |= ALIAS;
241 }
242
243 /* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */
244 statep->type = state;
245
246 /* collect non-special or quoted characters to form word */
247 while (!((c = getsc()) == 0 ||
248 ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) {
249 if (state == SBASE &&
250 subshell_nesting_type == ORD(/*{*/ '}') &&
251 (unsigned int)c == ORD(/*{*/ '}'))
252 /* possibly end ${ :;} */
253 break;
254 Xcheck(ws, wp);
255 switch (state) {
256 case SADELIM:
257 if ((unsigned int)c == ORD('('))
258 statep->nparen++;
259 else if ((unsigned int)c == ORD(')'))
260 statep->nparen--;
261 else if (statep->nparen == 0 &&
262 ((unsigned int)c == ORD(/*{*/ '}') ||
263 c == (int)statep->ls_adelim.delimiter)) {
264 *wp++ = ADELIM;
265 *wp++ = c;
266 if ((unsigned int)c == ORD(/*{*/ '}') ||
267 --statep->ls_adelim.num == 0)
268 POP_STATE();
269 if ((unsigned int)c == ORD(/*{*/ '}'))
270 POP_STATE();
271 break;
272 }
273 /* FALLTHROUGH */
274 case SBASE:
275 if ((unsigned int)c == ORD('[') && (cf & CMDASN)) {
276 /* temporary */
277 *wp = EOS;
278 if (is_wdvarname(Xstring(ws, wp), false)) {
279 char *p, *tmp;
280
281 if (arraysub(&tmp)) {
282 *wp++ = CHAR;
283 *wp++ = c;
284 for (p = tmp; *p; ) {
285 Xcheck(ws, wp);
286 *wp++ = CHAR;
287 *wp++ = *p++;
288 }
289 afree(tmp, ATEMP);
290 break;
291 }
292 }
293 *wp++ = CHAR;
294 *wp++ = c;
295 break;
296 }
297 /* FALLTHROUGH */
298 Sbase1: /* includes *(...|...) pattern (*+?@!) */
299 if (ctype(c, C_PATMO)) {
300 c2 = getsc();
301 if ((unsigned int)c2 == ORD('(' /*)*/)) {
302 *wp++ = OPAT;
303 *wp++ = c;
304 PUSH_STATE(SPATTERN);
305 break;
306 }
307 ungetsc(c2);
308 }
309 /* FALLTHROUGH */
310 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */
311 switch (c) {
312 case ORD('\\'):
313 getsc_qchar:
314 if ((c = getsc())) {
315 /* trailing \ is lost */
316 *wp++ = QCHAR;
317 *wp++ = c;
318 }
319 break;
320 case ORD('\''):
321 open_ssquote_unless_heredoc:
322 if ((cf & HEREDOC))
323 goto store_char;
324 *wp++ = OQUOTE;
325 ignore_backslash_newline++;
326 PUSH_STATE(SSQUOTE);
327 break;
328 case ORD('"'):
329 open_sdquote:
330 *wp++ = OQUOTE;
331 PUSH_STATE(SDQUOTE);
332 break;
333 case ORD('$'):
334 /*
335 * processing of dollar sign belongs into
336 * Subst, except for those which can open
337 * a string: $'…' and $"…"
338 */
339 subst_dollar_ex:
340 c = getsc();
341 switch (c) {
342 case ORD('"'):
343 goto open_sdquote;
344 case ORD('\''):
345 goto open_sequote;
346 default:
347 goto SubstS;
348 }
349 default:
350 goto Subst;
351 }
352 break;
353
354 Subst:
355 switch (c) {
356 case ORD('\\'):
357 c = getsc();
358 switch (c) {
359 case ORD('"'):
360 if ((cf & HEREDOC))
361 goto heredocquote;
362 /* FALLTHROUGH */
363 case ORD('\\'):
364 case ORD('$'):
365 case ORD('`'):
366 store_qchar:
367 *wp++ = QCHAR;
368 *wp++ = c;
369 break;
370 default:
371 heredocquote:
372 Xcheck(ws, wp);
373 if (c) {
374 /* trailing \ is lost */
375 *wp++ = CHAR;
376 *wp++ = '\\';
377 *wp++ = CHAR;
378 *wp++ = c;
379 }
380 break;
381 }
382 break;
383 case ORD('$'):
384 c = getsc();
385 SubstS:
386 if ((unsigned int)c == ORD('(' /*)*/)) {
387 c = getsc();
388 if ((unsigned int)c == ORD('(' /*)*/)) {
389 *wp++ = EXPRSUB;
390 PUSH_SRETRACE(SASPAREN);
391 statep->nparen = 2;
392 *retrace_info->xp++ = '(';
393 } else {
394 ungetsc(c);
395 subst_command:
396 c = COMSUB;
397 subst_command2:
398 sp = yyrecursive(c);
399 cz = strlen(sp) + 1;
400 XcheckN(ws, wp, cz);
401 *wp++ = c;
402 memcpy(wp, sp, cz);
403 wp += cz;
404 }
405 } else if ((unsigned int)c == ORD('{' /*}*/)) {
406 if ((unsigned int)(c = getsc()) == ORD('|')) {
407 /*
408 * non-subenvironment
409 * value substitution
410 */
411 c = VALSUB;
412 goto subst_command2;
413 } else if (ctype(c, C_IFSWS)) {
414 /*
415 * non-subenvironment
416 * "command" substitution
417 */
418 c = FUNSUB;
419 goto subst_command2;
420 }
421 ungetsc(c);
422 *wp++ = OSUBST;
423 *wp++ = '{' /*}*/;
424 wp = get_brace_var(&ws, wp);
425 c = getsc();
426 /* allow :# and :% (ksh88 compat) */
427 if ((unsigned int)c == ORD(':')) {
428 *wp++ = CHAR;
429 *wp++ = c;
430 c = getsc();
431 if ((unsigned int)c == ORD(':')) {
432 *wp++ = CHAR;
433 *wp++ = '0';
434 *wp++ = ADELIM;
435 *wp++ = ':';
436 PUSH_STATE(SBRACE);
437 PUSH_STATE(SADELIM);
438 statep->ls_adelim.delimiter = ':';
439 statep->ls_adelim.num = 1;
440 statep->nparen = 0;
441 break;
442 } else if (ctype(c, C_DIGIT | C_DOLAR | C_SPC) ||
443 /*XXX what else? */
444 c == '(' /*)*/) {
445 /* substring subst. */
446 if (c != ' ') {
447 *wp++ = CHAR;
448 *wp++ = ' ';
449 }
450 ungetsc(c);
451 PUSH_STATE(SBRACE);
452 PUSH_STATE(SADELIM);
453 statep->ls_adelim.delimiter = ':';
454 statep->ls_adelim.num = 2;
455 statep->nparen = 0;
456 break;
457 }
458 } else if (c == '/') {
459 c2 = ADELIM;
460 parse_adelim_slash:
461 *wp++ = CHAR;
462 *wp++ = c;
463 if ((unsigned int)(c = getsc()) == ORD('/')) {
464 *wp++ = c2;
465 *wp++ = c;
466 } else
467 ungetsc(c);
468 PUSH_STATE(SBRACE);
469 PUSH_STATE(SADELIM);
470 statep->ls_adelim.delimiter = '/';
471 statep->ls_adelim.num = 1;
472 statep->nparen = 0;
473 break;
474 } else if (c == '@') {
475 c2 = getsc();
476 ungetsc(c2);
477 if ((unsigned int)c2 == ORD('/')) {
478 c2 = CHAR;
479 goto parse_adelim_slash;
480 }
481 }
482 /*
483 * If this is a trim operation,
484 * treat (,|,) specially in STBRACE.
485 */
486 if (ctype(c, C_SUB2)) {
487 ungetsc(c);
488 if (Flag(FSH))
489 PUSH_STATE(STBRACEBOURNE);
490 else
491 PUSH_STATE(STBRACEKORN);
492 } else {
493 ungetsc(c);
494 if (state == SDQUOTE ||
495 state == SQBRACE)
496 PUSH_STATE(SQBRACE);
497 else
498 PUSH_STATE(SBRACE);
499 }
500 } else if (ctype(c, C_ALPHX)) {
501 *wp++ = OSUBST;
502 *wp++ = 'X';
503 do {
504 Xcheck(ws, wp);
505 *wp++ = c;
506 c = getsc();
507 } while (ctype(c, C_ALNUX));
508 *wp++ = '\0';
509 *wp++ = CSUBST;
510 *wp++ = 'X';
511 ungetsc(c);
512 } else if (ctype(c, C_VAR1 | C_DIGIT)) {
513 Xcheck(ws, wp);
514 *wp++ = OSUBST;
515 *wp++ = 'X';
516 *wp++ = c;
517 *wp++ = '\0';
518 *wp++ = CSUBST;
519 *wp++ = 'X';
520 } else {
521 *wp++ = CHAR;
522 *wp++ = '$';
523 ungetsc(c);
524 }
525 break;
526 case ORD('`'):
527 subst_gravis:
528 PUSH_STATE(SBQUOTE);
529 *wp++ = COMASUB;
530 /*
531 * We need to know whether we are within double
532 * quotes in order to translate \" to " within
533 * "…`…\"…`…" because, unlike for COMSUBs, the
534 * outer double quoteing changes the backslash
535 * meaning for the inside. For more details:
536 * http://austingroupbugs.net/view.php?id=1015
537 */
538 statep->ls_bool = false;
539 s2 = statep;
540 base = state_info.base;
541 while (/* CONSTCOND */ 1) {
542 for (; s2 != base; s2--) {
543 if (s2->type == SDQUOTE) {
544 statep->ls_bool = true;
545 break;
546 }
547 }
548 if (s2 != base)
549 break;
550 if (!(s2 = s2->ls_base))
551 break;
552 base = s2-- - STATE_BSIZE;
553 }
554 break;
555 case QCHAR:
556 if (cf & LQCHAR) {
557 *wp++ = QCHAR;
558 *wp++ = getsc();
559 break;
560 }
561 /* FALLTHROUGH */
562 default:
563 store_char:
564 *wp++ = CHAR;
565 *wp++ = c;
566 }
567 break;
568
569 case SEQUOTE:
570 if ((unsigned int)c == ORD('\'')) {
571 POP_STATE();
572 *wp++ = CQUOTE;
573 ignore_backslash_newline--;
574 } else if ((unsigned int)c == ORD('\\')) {
575 if ((c2 = unbksl(true, getsc_i, ungetsc)) == -1)
576 c2 = getsc();
577 if (c2 == 0)
578 statep->ls_bool = true;
579 if (!statep->ls_bool) {
580 char ts[4];
581
582 if ((unsigned int)c2 < 0x100) {
583 *wp++ = QCHAR;
584 *wp++ = c2;
585 } else {
586 cz = utf_wctomb(ts, c2 - 0x100);
587 ts[cz] = 0;
588 cz = 0;
589 do {
590 *wp++ = QCHAR;
591 *wp++ = ts[cz];
592 } while (ts[++cz]);
593 }
594 }
595 } else if (!statep->ls_bool) {
596 *wp++ = QCHAR;
597 *wp++ = c;
598 }
599 break;
600
601 case SSQUOTE:
602 if ((unsigned int)c == ORD('\'')) {
603 POP_STATE();
604 if ((cf & HEREDOC) || state == SQBRACE)
605 goto store_char;
606 *wp++ = CQUOTE;
607 ignore_backslash_newline--;
608 } else {
609 *wp++ = QCHAR;
610 *wp++ = c;
611 }
612 break;
613
614 case SDQUOTE:
615 if ((unsigned int)c == ORD('"')) {
616 POP_STATE();
617 *wp++ = CQUOTE;
618 } else
619 goto Subst;
620 break;
621
622 /* $(( ... )) */
623 case SASPAREN:
624 if ((unsigned int)c == ORD('('))
625 statep->nparen++;
626 else if ((unsigned int)c == ORD(')')) {
627 statep->nparen--;
628 if (statep->nparen == 1) {
629 /* end of EXPRSUB */
630 POP_SRETRACE();
631
632 if ((unsigned int)(c2 = getsc()) == ORD(/*(*/ ')')) {
633 cz = strlen(sp) - 2;
634 XcheckN(ws, wp, cz);
635 memcpy(wp, sp + 1, cz);
636 wp += cz;
637 afree(sp, ATEMP);
638 *wp++ = '\0';
639 break;
640 } else {
641 Source *s;
642
643 ungetsc(c2);
644 /*
645 * mismatched parenthesis -
646 * assume we were really
647 * parsing a $(...) expression
648 */
649 --wp;
650 s = pushs(SREREAD,
651 source->areap);
652 s->start = s->str =
653 s->u.freeme = sp;
654 s->next = source;
655 source = s;
656 goto subst_command;
657 }
658 }
659 }
660 /* reuse existing state machine */
661 goto Sbase2;
662
663 case SQBRACE:
664 if ((unsigned int)c == ORD('\\')) {
665 /*
666 * perform POSIX "quote removal" if the back-
667 * slash is "special", i.e. same cases as the
668 * {case '\\':} in Subst: plus closing brace;
669 * in mksh code "quote removal" on '\c' means
670 * write QCHAR+c, otherwise CHAR+\+CHAR+c are
671 * emitted (in heredocquote:)
672 */
673 if ((unsigned int)(c = getsc()) == ORD('"') ||
674 (unsigned int)c == ORD('\\') ||
675 ctype(c, C_DOLAR | C_GRAVE) ||
676 (unsigned int)c == ORD(/*{*/ '}'))
677 goto store_qchar;
678 goto heredocquote;
679 }
680 goto common_SQBRACE;
681
682 case SBRACE:
683 if ((unsigned int)c == ORD('\''))
684 goto open_ssquote_unless_heredoc;
685 else if ((unsigned int)c == ORD('\\'))
686 goto getsc_qchar;
687 common_SQBRACE:
688 if ((unsigned int)c == ORD('"'))
689 goto open_sdquote;
690 else if ((unsigned int)c == ORD('$'))
691 goto subst_dollar_ex;
692 else if ((unsigned int)c == ORD('`'))
693 goto subst_gravis;
694 else if ((unsigned int)c != ORD(/*{*/ '}'))
695 goto store_char;
696 POP_STATE();
697 *wp++ = CSUBST;
698 *wp++ = /*{*/ '}';
699 break;
700
701 /* Same as SBASE, except (,|,) treated specially */
702 case STBRACEKORN:
703 if ((unsigned int)c == ORD('|'))
704 *wp++ = SPAT;
705 else if ((unsigned int)c == ORD('(')) {
706 *wp++ = OPAT;
707 /* simile for @ */
708 *wp++ = ' ';
709 PUSH_STATE(SPATTERN);
710 } else /* FALLTHROUGH */
711 case STBRACEBOURNE:
712 if ((unsigned int)c == ORD(/*{*/ '}')) {
713 POP_STATE();
714 *wp++ = CSUBST;
715 *wp++ = /*{*/ '}';
716 } else
717 goto Sbase1;
718 break;
719
720 case SBQUOTE:
721 if ((unsigned int)c == ORD('`')) {
722 *wp++ = 0;
723 POP_STATE();
724 } else if ((unsigned int)c == ORD('\\')) {
725 switch (c = getsc()) {
726 case 0:
727 /* trailing \ is lost */
728 break;
729 case ORD('$'):
730 case ORD('`'):
731 case ORD('\\'):
732 *wp++ = c;
733 break;
734 case ORD('"'):
735 if (statep->ls_bool) {
736 *wp++ = c;
737 break;
738 }
739 /* FALLTHROUGH */
740 default:
741 *wp++ = '\\';
742 *wp++ = c;
743 break;
744 }
745 } else
746 *wp++ = c;
747 break;
748
749 /* ONEWORD */
750 case SWORD:
751 goto Subst;
752
753 /* LETEXPR: (( ... )) */
754 case SLETPAREN:
755 if ((unsigned int)c == ORD(/*(*/ ')')) {
756 if (statep->nparen > 0)
757 --statep->nparen;
758 else if ((unsigned int)(c2 = getsc()) == ORD(/*(*/ ')')) {
759 c = 0;
760 *wp++ = CQUOTE;
761 goto Done;
762 } else {
763 Source *s;
764
765 ungetsc(c2);
766 ungetsc(c);
767 /*
768 * mismatched parenthesis -
769 * assume we were really
770 * parsing a (...) expression
771 */
772 *wp = EOS;
773 sp = Xstring(ws, wp);
774 dp = wdstrip(sp + 1, WDS_TPUTS);
775 s = pushs(SREREAD, source->areap);
776 s->start = s->str = s->u.freeme = dp;
777 s->next = source;
778 source = s;
779 ungetsc('(' /*)*/);
780 return (ORD('(' /*)*/));
781 }
782 } else if ((unsigned int)c == ORD('('))
783 /*
784 * parentheses inside quotes and
785 * backslashes are lost, but AT&T ksh
786 * doesn't count them either
787 */
788 ++statep->nparen;
789 goto Sbase2;
790
791 /* << or <<- delimiter */
792 case SHEREDELIM:
793 /*
794 * here delimiters need a special case since
795 * $ and `...` are not to be treated specially
796 */
797 switch (c) {
798 case ORD('\\'):
799 if ((c = getsc())) {
800 /* trailing \ is lost */
801 *wp++ = QCHAR;
802 *wp++ = c;
803 }
804 break;
805 case ORD('\''):
806 goto open_ssquote_unless_heredoc;
807 case ORD('$'):
808 if ((unsigned int)(c2 = getsc()) == ORD('\'')) {
809 open_sequote:
810 *wp++ = OQUOTE;
811 ignore_backslash_newline++;
812 PUSH_STATE(SEQUOTE);
813 statep->ls_bool = false;
814 break;
815 } else if ((unsigned int)c2 == ORD('"')) {
816 /* FALLTHROUGH */
817 case ORD('"'):
818 PUSH_SRETRACE(SHEREDQUOTE);
819 break;
820 }
821 ungetsc(c2);
822 /* FALLTHROUGH */
823 default:
824 *wp++ = CHAR;
825 *wp++ = c;
826 }
827 break;
828
829 /* " in << or <<- delimiter */
830 case SHEREDQUOTE:
831 if ((unsigned int)c != ORD('"'))
832 goto Subst;
833 POP_SRETRACE();
834 dp = strnul(sp) - 1;
835 /* remove the trailing double quote */
836 *dp = '\0';
837 /* store the quoted string */
838 *wp++ = OQUOTE;
839 XcheckN(ws, wp, (dp - sp) * 2);
840 dp = sp;
841 while ((c = *dp++)) {
842 if (c == '\\') {
843 switch ((c = *dp++)) {
844 case ORD('\\'):
845 case ORD('"'):
846 case ORD('$'):
847 case ORD('`'):
848 break;
849 default:
850 *wp++ = CHAR;
851 *wp++ = '\\';
852 break;
853 }
854 }
855 *wp++ = CHAR;
856 *wp++ = c;
857 }
858 afree(sp, ATEMP);
859 *wp++ = CQUOTE;
860 state = statep->type = SHEREDELIM;
861 break;
862
863 /* in *(...|...) pattern (*+?@!) */
864 case SPATTERN:
865 if ((unsigned int)c == ORD(/*(*/ ')')) {
866 *wp++ = CPAT;
867 POP_STATE();
868 } else if ((unsigned int)c == ORD('|')) {
869 *wp++ = SPAT;
870 } else if ((unsigned int)c == ORD('(')) {
871 *wp++ = OPAT;
872 /* simile for @ */
873 *wp++ = ' ';
874 PUSH_STATE(SPATTERN);
875 } else
876 goto Sbase1;
877 break;
878 }
879 }
880 Done:
881 Xcheck(ws, wp);
882 if (statep != &states[1])
883 /* XXX figure out what is missing */
884 yyerror("no closing quote");
885
886 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */
887 if (state == SHEREDELIM)
888 state = SBASE;
889
890 dp = Xstring(ws, wp);
891 if (state == SBASE && (
892 (c == '&' && !Flag(FSH) && !Flag(FPOSIX)) ||
893 ctype(c, C_ANGLE)) && ((c2 = Xlength(ws, wp)) == 0 ||
894 (c2 == 2 && dp[0] == CHAR && ctype(dp[1], C_DIGIT)))) {
895 struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
896
897 iop->unit = c2 == 2 ? ksh_numdig(dp[1]) : c == '<' ? 0 : 1;
898
899 if (c == '&') {
900 if ((unsigned int)(c2 = getsc()) != ORD('>')) {
901 ungetsc(c2);
902 goto no_iop;
903 }
904 c = c2;
905 iop->ioflag = IOBASH;
906 } else
907 iop->ioflag = 0;
908
909 c2 = getsc();
910 /* <<, >>, <> are ok, >< is not */
911 if (c == c2 || ((unsigned int)c == ORD('<') &&
912 (unsigned int)c2 == ORD('>'))) {
913 iop->ioflag |= c == c2 ?
914 ((unsigned int)c == ORD('>') ? IOCAT : IOHERE) : IORDWR;
915 if (iop->ioflag == IOHERE) {
916 if ((unsigned int)(c2 = getsc()) == ORD('-'))
917 iop->ioflag |= IOSKIP;
918 else if ((unsigned int)c2 == ORD('<'))
919 iop->ioflag |= IOHERESTR;
920 else
921 ungetsc(c2);
922 }
923 } else if ((unsigned int)c2 == ORD('&'))
924 iop->ioflag |= IODUP | ((unsigned int)c == ORD('<') ? IORDUP : 0);
925 else {
926 iop->ioflag |= (unsigned int)c == ORD('>') ? IOWRITE : IOREAD;
927 if ((unsigned int)c == ORD('>') && (unsigned int)c2 == ORD('|'))
928 iop->ioflag |= IOCLOB;
929 else
930 ungetsc(c2);
931 }
932
933 iop->ioname = NULL;
934 iop->delim = NULL;
935 iop->heredoc = NULL;
936 /* free word */
937 Xfree(ws, wp);
938 yylval.iop = iop;
939 return (REDIR);
940 no_iop:
941 afree(iop, ATEMP);
942 }
943
944 if (wp == dp && state == SBASE) {
945 /* free word */
946 Xfree(ws, wp);
947 /* no word, process LEX1 character */
948 if (((unsigned int)c == ORD('|')) ||
949 ((unsigned int)c == ORD('&')) ||
950 ((unsigned int)c == ORD(';')) ||
951 ((unsigned int)c == ORD('(' /*)*/))) {
952 if ((c2 = getsc()) == c)
953 c = ((unsigned int)c == ORD(';')) ? BREAK :
954 ((unsigned int)c == ORD('|')) ? LOGOR :
955 ((unsigned int)c == ORD('&')) ? LOGAND :
956 /* (unsigned int)c == ORD('(' )) */ MDPAREN;
957 else if ((unsigned int)c == ORD('|') && (unsigned int)c2 == ORD('&'))
958 c = COPROC;
959 else if ((unsigned int)c == ORD(';') && (unsigned int)c2 == ORD('|'))
960 c = BRKEV;
961 else if ((unsigned int)c == ORD(';') && (unsigned int)c2 == ORD('&'))
962 c = BRKFT;
963 else
964 ungetsc(c2);
965 #ifndef MKSH_SMALL
966 if (c == BREAK) {
967 if ((unsigned int)(c2 = getsc()) == ORD('&'))
968 c = BRKEV;
969 else
970 ungetsc(c2);
971 }
972 #endif
973 } else if ((unsigned int)c == ORD('\n')) {
974 if (cf & HEREDELIM)
975 ungetsc(c);
976 else {
977 gethere();
978 if (cf & CONTIN)
979 goto Again;
980 }
981 } else if (c == '\0' && !(cf & HEREDELIM)) {
982 struct ioword **p = heres;
983
984 while (p < herep)
985 if ((*p)->ioflag & IOHERESTR)
986 ++p;
987 else
988 /* ksh -c 'cat <<EOF' can cause this */
989 yyerror(Tf_heredoc,
990 evalstr((*p)->delim, 0));
991 }
992 return (c);
993 }
994
995 /* terminate word */
996 *wp++ = EOS;
997 yylval.cp = Xclose(ws, wp);
998 if (state == SWORD || state == SLETPAREN
999 /* XXX ONEWORD? */)
1000 return (LWORD);
1001
1002 /* unget terminator */
1003 ungetsc(c);
1004
1005 /*
1006 * note: the alias-vs-function code below depends on several
1007 * interna: starting from here, source->str is not modified;
1008 * the way getsc() and ungetsc() operate; etc.
1009 */
1010
1011 /* copy word to unprefixed string ident */
1012 sp = yylval.cp;
1013 dp = ident;
1014 while ((dp - ident) < IDENT && (c = *sp++) == CHAR)
1015 *dp++ = *sp++;
1016 if (c != EOS)
1017 /* word is not unquoted, or space ran out */
1018 dp = ident;
1019 /* make sure the ident array stays NUL padded */
1020 memset(dp, 0, (ident + IDENT) - dp + 1);
1021
1022 if (*ident != '\0' && (cf & (KEYWORD | ALIAS))) {
1023 struct tbl *p;
1024 uint32_t h = hash(ident);
1025
1026 if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
1027 (!(cf & ESACONLY) || p->val.i == ESAC ||
1028 (unsigned int)p->val.i == ORD(/*{*/ '}'))) {
1029 afree(yylval.cp, ATEMP);
1030 return (p->val.i);
1031 }
1032 if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
1033 (p->flag & ISSET)) {
1034 /*
1035 * this still points to the same character as the
1036 * ungetsc'd terminator from above
1037 */
1038 const char *cp = source->str;
1039
1040 /* prefer POSIX but not Korn functions over aliases */
1041 while (ctype(*cp, C_BLANK))
1042 /*
1043 * this is like getsc() without skipping
1044 * over Source boundaries (including not
1045 * parsing ungetsc'd characters that got
1046 * pushed into an SREREAD) which is what
1047 * we want here anyway: find out whether
1048 * the alias name is followed by a POSIX
1049 * function definition
1050 */
1051 ++cp;
1052 /* prefer functions over aliases */
1053 if (cp[0] != '(' || cp[1] != ')') {
1054 Source *s = source;
1055
1056 while (s && (s->flags & SF_HASALIAS))
1057 if (s->u.tblp == p)
1058 return (LWORD);
1059 else
1060 s = s->next;
1061 /* push alias expansion */
1062 s = pushs(SALIAS, source->areap);
1063 s->start = s->str = p->val.s;
1064 s->u.tblp = p;
1065 s->flags |= SF_HASALIAS;
1066 s->line = source->line;
1067 s->next = source;
1068 if (source->type == SEOF) {
1069 /* prevent infinite recursion at EOS */
1070 source->u.tblp = p;
1071 source->flags |= SF_HASALIAS;
1072 }
1073 source = s;
1074 afree(yylval.cp, ATEMP);
1075 goto Again;
1076 }
1077 }
1078 } else if (*ident == '\0') {
1079 /* retain typeset et al. even when quoted */
1080 struct tbl *tt = get_builtin((dp = wdstrip(yylval.cp, 0)));
1081 uint32_t flag = tt ? tt->flag : 0;
1082
1083 if (flag & (DECL_UTIL | DECL_FWDR))
1084 strlcpy(ident, dp, sizeof(ident));
1085 afree(dp, ATEMP);
1086 }
1087
1088 return (LWORD);
1089 }
1090
1091 static void
gethere(void)1092 gethere(void)
1093 {
1094 struct ioword **p;
1095
1096 for (p = heres; p < herep; p++)
1097 if (!((*p)->ioflag & IOHERESTR))
1098 readhere(*p);
1099 herep = heres;
1100 }
1101
1102 /*
1103 * read "<<word" text into temp file
1104 */
1105
1106 static void
readhere(struct ioword * iop)1107 readhere(struct ioword *iop)
1108 {
1109 int c;
1110 const char *eof, *eofp;
1111 XString xs;
1112 char *xp;
1113 size_t xpos;
1114
1115 eof = evalstr(iop->delim, 0);
1116
1117 if (!(iop->ioflag & IOEVAL))
1118 ignore_backslash_newline++;
1119
1120 Xinit(xs, xp, 256, ATEMP);
1121
1122 heredoc_read_line:
1123 /* beginning of line */
1124 eofp = eof;
1125 xpos = Xsavepos(xs, xp);
1126 if (iop->ioflag & IOSKIP) {
1127 /* skip over leading tabs */
1128 while ((c = getsc()) == '\t')
1129 ; /* nothing */
1130 goto heredoc_parse_char;
1131 }
1132 heredoc_read_char:
1133 c = getsc();
1134 heredoc_parse_char:
1135 /* compare with here document marker */
1136 if (!*eofp) {
1137 /* end of here document marker, what to do? */
1138 switch (c) {
1139 case ORD(/*(*/ ')'):
1140 if (!subshell_nesting_type)
1141 /*-
1142 * not allowed outside $(...) or (...)
1143 * => mismatch
1144 */
1145 break;
1146 /* allow $(...) or (...) to close here */
1147 ungetsc(/*(*/ ')');
1148 /* FALLTHROUGH */
1149 case 0:
1150 /*
1151 * Allow EOF here to commands without trailing
1152 * newlines (mksh -c '...') will work as well.
1153 */
1154 case ORD('\n'):
1155 /* Newline terminates here document marker */
1156 goto heredoc_found_terminator;
1157 }
1158 } else if (c == *eofp++)
1159 /* store; then read and compare next character */
1160 goto heredoc_store_and_loop;
1161 /* nope, mismatch; read until end of line */
1162 while (c != '\n') {
1163 if (!c)
1164 /* oops, reached EOF */
1165 yyerror(Tf_heredoc, eof);
1166 /* store character */
1167 Xcheck(xs, xp);
1168 Xput(xs, xp, c);
1169 /* read next character */
1170 c = getsc();
1171 }
1172 /* we read a newline as last character */
1173 heredoc_store_and_loop:
1174 /* store character */
1175 Xcheck(xs, xp);
1176 Xput(xs, xp, c);
1177 if (c == '\n')
1178 goto heredoc_read_line;
1179 goto heredoc_read_char;
1180
1181 heredoc_found_terminator:
1182 /* jump back to saved beginning of line */
1183 xp = Xrestpos(xs, xp, xpos);
1184 /* terminate, close and store */
1185 Xput(xs, xp, '\0');
1186 iop->heredoc = Xclose(xs, xp);
1187
1188 if (!(iop->ioflag & IOEVAL))
1189 ignore_backslash_newline--;
1190 }
1191
1192 void
yyerror(const char * fmt,...)1193 yyerror(const char *fmt, ...)
1194 {
1195 va_list va;
1196
1197 /* pop aliases and re-reads */
1198 while (source->type == SALIAS || source->type == SREREAD)
1199 source = source->next;
1200 /* zap pending input */
1201 source->str = null;
1202
1203 error_prefix(true);
1204 va_start(va, fmt);
1205 shf_vfprintf(shl_out, fmt, va);
1206 shf_putc('\n', shl_out);
1207 va_end(va);
1208 errorfz();
1209 }
1210
1211 /*
1212 * input for yylex with alias expansion
1213 */
1214
1215 Source *
pushs(int type,Area * areap)1216 pushs(int type, Area *areap)
1217 {
1218 Source *s;
1219
1220 s = alloc(sizeof(Source), areap);
1221 memset(s, 0, sizeof(Source));
1222 s->type = type;
1223 s->str = null;
1224 s->areap = areap;
1225 if (type == SFILE || type == SSTDIN)
1226 XinitN(s->xs, 256, s->areap);
1227 return (s);
1228 }
1229
1230 static int
getsc_uu(void)1231 getsc_uu(void)
1232 {
1233 Source *s = source;
1234 int c;
1235
1236 while ((c = ord(*s->str++)) == 0) {
1237 /* return 0 for EOF by default */
1238 s->str = NULL;
1239 switch (s->type) {
1240 case SEOF:
1241 s->str = null;
1242 return (0);
1243
1244 case SSTDIN:
1245 case SFILE:
1246 getsc_line(s);
1247 break;
1248
1249 case SWSTR:
1250 break;
1251
1252 case SSTRING:
1253 case SSTRINGCMDLINE:
1254 break;
1255
1256 case SWORDS:
1257 s->start = s->str = *s->u.strv++;
1258 s->type = SWORDSEP;
1259 break;
1260
1261 case SWORDSEP:
1262 if (*s->u.strv == NULL) {
1263 s->start = s->str = "\n";
1264 s->type = SEOF;
1265 } else {
1266 s->start = s->str = T1space;
1267 s->type = SWORDS;
1268 }
1269 break;
1270
1271 case SALIAS:
1272 if (s->flags & SF_ALIASEND) {
1273 /* pass on an unused SF_ALIAS flag */
1274 source = s->next;
1275 source->flags |= s->flags & SF_ALIAS;
1276 s = source;
1277 } else if (*s->u.tblp->val.s &&
1278 ctype((c = strnul(s->u.tblp->val.s)[-1]), C_SPACE)) {
1279 /* pop source stack */
1280 source = s = s->next;
1281 /*
1282 * Note that this alias ended with a
1283 * space, enabling alias expansion on
1284 * the following word.
1285 */
1286 s->flags |= SF_ALIAS;
1287 } else {
1288 /*
1289 * At this point, we need to keep the current
1290 * alias in the source list so recursive
1291 * aliases can be detected and we also need to
1292 * return the next character. Do this by
1293 * temporarily popping the alias to get the
1294 * next character and then put it back in the
1295 * source list with the SF_ALIASEND flag set.
1296 */
1297 /* pop source stack */
1298 source = s->next;
1299 source->flags |= s->flags & SF_ALIAS;
1300 c = getsc_uu();
1301 if (c) {
1302 s->flags |= SF_ALIASEND;
1303 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1304 s->start = s->str = s->ugbuf;
1305 s->next = source;
1306 source = s;
1307 } else {
1308 s = source;
1309 /* avoid reading EOF twice */
1310 s->str = NULL;
1311 break;
1312 }
1313 }
1314 continue;
1315
1316 case SREREAD:
1317 if (s->start != s->ugbuf)
1318 /* yuck */
1319 afree(s->u.freeme, ATEMP);
1320 source = s = s->next;
1321 continue;
1322 }
1323 if (s->str == NULL) {
1324 s->type = SEOF;
1325 s->start = s->str = null;
1326 return ('\0');
1327 }
1328 if (s->flags & SF_ECHO) {
1329 shf_puts(s->str, shl_out);
1330 shf_flush(shl_out);
1331 }
1332 }
1333 return (c);
1334 }
1335
1336 static void
getsc_line(Source * s)1337 getsc_line(Source *s)
1338 {
1339 char *xp = Xstring(s->xs, xp), *cp;
1340 bool interactive = Flag(FTALKING) && s->type == SSTDIN;
1341 bool have_tty = tobool(interactive && (s->flags & SF_TTY));
1342
1343 /* Done here to ensure nothing odd happens when a timeout occurs */
1344 XcheckN(s->xs, xp, LINE);
1345 *xp = '\0';
1346 s->start = s->str = xp;
1347
1348 if (have_tty && ksh_tmout) {
1349 ksh_tmout_state = TMOUT_READING;
1350 alarm(ksh_tmout);
1351 }
1352 if (interactive) {
1353 if (cur_prompt == PS1)
1354 histsave(&s->line, NULL, HIST_FLUSH, true);
1355 change_winsz();
1356 }
1357 #ifndef MKSH_NO_CMDLINE_EDITING
1358 if (have_tty && (
1359 #if !MKSH_S_NOVI
1360 Flag(FVI) ||
1361 #endif
1362 Flag(FEMACS) || Flag(FGMACS))) {
1363 int nread;
1364
1365 nread = x_read(xp);
1366 if (nread < 0)
1367 /* read error */
1368 nread = 0;
1369 xp[nread] = '\0';
1370 xp += nread;
1371 } else
1372 #endif
1373 {
1374 if (interactive)
1375 pprompt(prompt, 0);
1376 else
1377 s->line++;
1378
1379 while (/* CONSTCOND */ 1) {
1380 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1381
1382 if (!p && shf_error(s->u.shf) &&
1383 shf_errno(s->u.shf) == EINTR) {
1384 shf_clearerr(s->u.shf);
1385 if (trap)
1386 runtraps(0);
1387 continue;
1388 }
1389 if (!p || (xp = p, xp[-1] == '\n'))
1390 break;
1391 /* double buffer size */
1392 /* move past NUL so doubling works... */
1393 xp++;
1394 XcheckN(s->xs, xp, Xlength(s->xs, xp));
1395 /* ...and move back again */
1396 xp--;
1397 }
1398 /*
1399 * flush any unwanted input so other programs/builtins
1400 * can read it. Not very optimal, but less error prone
1401 * than flushing else where, dealing with redirections,
1402 * etc.
1403 * TODO: reduce size of shf buffer (~128?) if SSTDIN
1404 */
1405 if (s->type == SSTDIN)
1406 shf_flush(s->u.shf);
1407 }
1408 /*
1409 * XXX: temporary kludge to restore source after a
1410 * trap may have been executed.
1411 */
1412 source = s;
1413 if (have_tty && ksh_tmout) {
1414 ksh_tmout_state = TMOUT_EXECUTING;
1415 alarm(0);
1416 }
1417 cp = Xstring(s->xs, xp);
1418 rndpush(cp);
1419 s->start = s->str = cp;
1420 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1421 /* Note: if input is all nulls, this is not eof */
1422 if (Xlength(s->xs, xp) == 0) {
1423 /* EOF */
1424 if (s->type == SFILE)
1425 shf_fdclose(s->u.shf);
1426 s->str = NULL;
1427 } else if (interactive && *s->str) {
1428 if (cur_prompt != PS1)
1429 histsave(&s->line, s->str, HIST_APPEND, true);
1430 else if (!ctype(*s->str, C_IFS | C_IFSWS))
1431 histsave(&s->line, s->str, HIST_QUEUE, true);
1432 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
1433 else
1434 goto check_for_sole_return;
1435 } else if (interactive && cur_prompt == PS1) {
1436 check_for_sole_return:
1437 cp = Xstring(s->xs, xp);
1438 while (ctype(*cp, C_IFSWS))
1439 ++cp;
1440 if (!*cp) {
1441 histsave(&s->line, NULL, HIST_FLUSH, true);
1442 histsync();
1443 }
1444 #endif
1445 }
1446 if (interactive)
1447 set_prompt(PS2, NULL);
1448 }
1449
1450 void
set_prompt(int to,Source * s)1451 set_prompt(int to, Source *s)
1452 {
1453 cur_prompt = (uint8_t)to;
1454
1455 switch (to) {
1456 /* command */
1457 case PS1:
1458 /*
1459 * Substitute ! and !! here, before substitutions are done
1460 * so ! in expanded variables are not expanded.
1461 * NOTE: this is not what AT&T ksh does (it does it after
1462 * substitutions, POSIX doesn't say which is to be done.
1463 */
1464 {
1465 struct shf *shf;
1466 char * volatile ps1;
1467 Area *saved_atemp;
1468 int saved_lineno;
1469
1470 ps1 = str_val(global("PS1"));
1471 shf = shf_sopen(NULL, strlen(ps1) * 2,
1472 SHF_WR | SHF_DYNAMIC, NULL);
1473 while (*ps1)
1474 if (*ps1 != '!' || *++ps1 == '!')
1475 shf_putchar(*ps1++, shf);
1476 else
1477 shf_fprintf(shf, Tf_lu, s ?
1478 (unsigned long)s->line + 1 : 0UL);
1479 ps1 = shf_sclose(shf);
1480 saved_lineno = current_lineno;
1481 if (s)
1482 current_lineno = s->line + 1;
1483 saved_atemp = ATEMP;
1484 newenv(E_ERRH);
1485 if (kshsetjmp(e->jbuf)) {
1486 prompt = safe_prompt;
1487 /*
1488 * Don't print an error - assume it has already
1489 * been printed. Reason is we may have forked
1490 * to run a command and the child may be
1491 * unwinding its stack through this code as it
1492 * exits.
1493 */
1494 } else {
1495 char *cp = substitute(ps1, 0);
1496 strdupx(prompt, cp, saved_atemp);
1497 }
1498 current_lineno = saved_lineno;
1499 quitenv(NULL);
1500 }
1501 break;
1502 /* command continuation */
1503 case PS2:
1504 prompt = str_val(global("PS2"));
1505 break;
1506 }
1507 }
1508
1509 int
pprompt(const char * cp,int ntruncate)1510 pprompt(const char *cp, int ntruncate)
1511 {
1512 char delimiter = 0;
1513 bool doprint = (ntruncate != -1);
1514 bool indelimit = false;
1515 int columns = 0, lines = 0;
1516
1517 /*
1518 * Undocumented AT&T ksh feature:
1519 * If the second char in the prompt string is \r then the first
1520 * char is taken to be a non-printing delimiter and any chars
1521 * between two instances of the delimiter are not considered to
1522 * be part of the prompt length
1523 */
1524 if (*cp && cp[1] == '\r') {
1525 delimiter = *cp;
1526 cp += 2;
1527 }
1528 for (; *cp; cp++) {
1529 if (indelimit && *cp != delimiter)
1530 ;
1531 else if (ctype(*cp, C_CR | C_LF)) {
1532 lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
1533 columns = 0;
1534 } else if (*cp == '\t') {
1535 columns = (columns | 7) + 1;
1536 } else if (*cp == '\b') {
1537 if (columns > 0)
1538 columns--;
1539 } else if (*cp == delimiter)
1540 indelimit = !indelimit;
1541 else if (UTFMODE && (rtt2asc(*cp) > 0x7F)) {
1542 const char *cp2;
1543 columns += utf_widthadj(cp, &cp2);
1544 if (doprint && (indelimit ||
1545 (ntruncate < (x_cols * lines + columns))))
1546 shf_write(cp, cp2 - cp, shl_out);
1547 cp = cp2 - /* loop increment */ 1;
1548 continue;
1549 } else
1550 columns++;
1551 if (doprint && (*cp != delimiter) &&
1552 (indelimit || (ntruncate < (x_cols * lines + columns))))
1553 shf_putc(*cp, shl_out);
1554 }
1555 if (doprint)
1556 shf_flush(shl_out);
1557 return (x_cols * lines + columns);
1558 }
1559
1560 /*
1561 * Read the variable part of a ${...} expression (i.e. up to but not
1562 * including the :[-+?=#%] or close-brace).
1563 */
1564 static char *
get_brace_var(XString * wsp,char * wp)1565 get_brace_var(XString *wsp, char *wp)
1566 {
1567 char c;
1568 enum parse_state {
1569 PS_INITIAL, PS_SAW_PERCENT, PS_SAW_HASH, PS_SAW_BANG,
1570 PS_IDENT, PS_NUMBER, PS_VAR1
1571 } state = PS_INITIAL;
1572
1573 while (/* CONSTCOND */ 1) {
1574 c = getsc();
1575 /* State machine to figure out where the variable part ends. */
1576 switch (state) {
1577 case PS_SAW_HASH:
1578 if (ctype(c, C_VAR1)) {
1579 char c2;
1580
1581 c2 = getsc();
1582 ungetsc(c2);
1583 if (ord(c2) != ORD(/*{*/ '}')) {
1584 ungetsc(c);
1585 goto out;
1586 }
1587 }
1588 goto ps_common;
1589 case PS_SAW_BANG:
1590 switch (ord(c)) {
1591 case ORD('@'):
1592 case ORD('#'):
1593 case ORD('-'):
1594 case ORD('?'):
1595 goto out;
1596 }
1597 goto ps_common;
1598 case PS_INITIAL:
1599 switch (ord(c)) {
1600 case ORD('%'):
1601 state = PS_SAW_PERCENT;
1602 goto next;
1603 case ORD('#'):
1604 state = PS_SAW_HASH;
1605 goto next;
1606 case ORD('!'):
1607 state = PS_SAW_BANG;
1608 goto next;
1609 }
1610 /* FALLTHROUGH */
1611 case PS_SAW_PERCENT:
1612 ps_common:
1613 if (ctype(c, C_ALPHX))
1614 state = PS_IDENT;
1615 else if (ctype(c, C_DIGIT))
1616 state = PS_NUMBER;
1617 else if (ctype(c, C_VAR1))
1618 state = PS_VAR1;
1619 else
1620 goto out;
1621 break;
1622 case PS_IDENT:
1623 if (!ctype(c, C_ALNUX)) {
1624 if (ord(c) == ORD('[')) {
1625 char *tmp, *p;
1626
1627 if (!arraysub(&tmp))
1628 yyerror("missing ]");
1629 *wp++ = c;
1630 p = tmp;
1631 while (*p) {
1632 Xcheck(*wsp, wp);
1633 *wp++ = *p++;
1634 }
1635 afree(tmp, ATEMP);
1636 /* the ] */
1637 c = getsc();
1638 }
1639 goto out;
1640 }
1641 next:
1642 break;
1643 case PS_NUMBER:
1644 if (!ctype(c, C_DIGIT))
1645 goto out;
1646 break;
1647 case PS_VAR1:
1648 goto out;
1649 }
1650 Xcheck(*wsp, wp);
1651 *wp++ = c;
1652 }
1653 out:
1654 /* end of variable part */
1655 *wp++ = '\0';
1656 ungetsc(c);
1657 return (wp);
1658 }
1659
1660 /*
1661 * Save an array subscript - returns true if matching bracket found, false
1662 * if eof or newline was found.
1663 * (Returned string double null terminated)
1664 */
1665 static bool
arraysub(char ** strp)1666 arraysub(char **strp)
1667 {
1668 XString ws;
1669 char *wp, c;
1670 /* we are just past the initial [ */
1671 unsigned int depth = 1;
1672
1673 Xinit(ws, wp, 32, ATEMP);
1674
1675 do {
1676 c = getsc();
1677 Xcheck(ws, wp);
1678 *wp++ = c;
1679 if (ord(c) == ORD('['))
1680 depth++;
1681 else if (ord(c) == ORD(']'))
1682 depth--;
1683 } while (depth > 0 && c && c != '\n');
1684
1685 *wp++ = '\0';
1686 *strp = Xclose(ws, wp);
1687
1688 return (tobool(depth == 0));
1689 }
1690
1691 /* Unget a char: handles case when we are already at the start of the buffer */
1692 static void
ungetsc(int c)1693 ungetsc(int c)
1694 {
1695 struct sretrace_info *rp = retrace_info;
1696
1697 if (backslash_skip)
1698 backslash_skip--;
1699 /* Don't unget EOF... */
1700 if (source->str == null && c == '\0')
1701 return;
1702 while (rp) {
1703 if (Xlength(rp->xs, rp->xp))
1704 rp->xp--;
1705 rp = rp->next;
1706 }
1707 ungetsc_i(c);
1708 }
1709 static void
ungetsc_i(int c)1710 ungetsc_i(int c)
1711 {
1712 if (source->str > source->start)
1713 source->str--;
1714 else {
1715 Source *s;
1716
1717 s = pushs(SREREAD, source->areap);
1718 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1719 s->start = s->str = s->ugbuf;
1720 s->next = source;
1721 source = s;
1722 }
1723 }
1724
1725
1726 /* Called to get a char that isn't a \newline sequence. */
1727 static int
getsc_bn(void)1728 getsc_bn(void)
1729 {
1730 int c, c2;
1731
1732 if (ignore_backslash_newline)
1733 return (o_getsc_u());
1734
1735 if (backslash_skip == 1) {
1736 backslash_skip = 2;
1737 return (o_getsc_u());
1738 }
1739
1740 backslash_skip = 0;
1741
1742 while (/* CONSTCOND */ 1) {
1743 c = o_getsc_u();
1744 if (c == '\\') {
1745 if ((c2 = o_getsc_u()) == '\n')
1746 /* ignore the \newline; get the next char... */
1747 continue;
1748 ungetsc_i(c2);
1749 backslash_skip = 1;
1750 }
1751 return (c);
1752 }
1753 }
1754
1755 void
yyskiputf8bom(void)1756 yyskiputf8bom(void)
1757 {
1758 int c;
1759
1760 if (rtt2asc((c = o_getsc_u())) != 0xEF) {
1761 ungetsc_i(c);
1762 return;
1763 }
1764 if (rtt2asc((c = o_getsc_u())) != 0xBB) {
1765 ungetsc_i(c);
1766 ungetsc_i(asc2rtt(0xEF));
1767 return;
1768 }
1769 if (rtt2asc((c = o_getsc_u())) != 0xBF) {
1770 ungetsc_i(c);
1771 ungetsc_i(asc2rtt(0xBB));
1772 ungetsc_i(asc2rtt(0xEF));
1773 return;
1774 }
1775 UTFMODE |= 8;
1776 }
1777
1778 static Lex_state *
push_state_i(State_info * si,Lex_state * old_end)1779 push_state_i(State_info *si, Lex_state *old_end)
1780 {
1781 Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP);
1782
1783 news[0].ls_base = old_end;
1784 si->base = &news[0];
1785 si->end = &news[STATE_BSIZE];
1786 return (&news[1]);
1787 }
1788
1789 static Lex_state *
pop_state_i(State_info * si,Lex_state * old_end)1790 pop_state_i(State_info *si, Lex_state *old_end)
1791 {
1792 Lex_state *old_base = si->base;
1793
1794 si->base = old_end->ls_base - STATE_BSIZE;
1795 si->end = old_end->ls_base;
1796
1797 afree(old_base, ATEMP);
1798
1799 return (si->base + STATE_BSIZE - 1);
1800 }
1801