• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*	$OpenBSD: lex.c,v 1.51 2015/09/10 22:48:58 nicm Exp $	*/
2 
3 /*-
4  * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5  *		 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018
6  *	mirabilos <m@mirbsd.org>
7  *
8  * Provided that these terms and disclaimer and all copyright notices
9  * are retained or reproduced in an accompanying document, permission
10  * is granted to deal in this work without restriction, including un-
11  * limited rights to use, publicly perform, distribute, sell, modify,
12  * merge, give away, or sublicence.
13  *
14  * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
15  * the utmost extent permitted by applicable law, neither express nor
16  * implied; without malicious intent or gross negligence. In no event
17  * may a licensor, author or contributor be held liable for indirect,
18  * direct, other damage, loss, or other issues arising in any way out
19  * of dealing in the work, even if advised of the possibility of such
20  * damage or existence of a defect, except proven that it results out
21  * of said person's immediate fault when using the work as intended.
22  */
23 
24 #include "sh.h"
25 
26 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.247 2018/01/14 01:44:01 tg Exp $");
27 
28 /*
29  * states while lexing word
30  */
31 #define SBASE		0	/* outside any lexical constructs */
32 #define SWORD		1	/* implicit quoting for substitute() */
33 #define SLETPAREN	2	/* inside (( )), implicit quoting */
34 #define SSQUOTE		3	/* inside '' */
35 #define SDQUOTE		4	/* inside "" */
36 #define SEQUOTE		5	/* inside $'' */
37 #define SBRACE		6	/* inside ${} */
38 #define SQBRACE		7	/* inside "${}" */
39 #define SBQUOTE		8	/* inside `` */
40 #define SASPAREN	9	/* inside $(( )) */
41 #define SHEREDELIM	10	/* parsing << or <<- delimiter */
42 #define SHEREDQUOTE	11	/* parsing " in << or <<- delimiter */
43 #define SPATTERN	12	/* parsing *(...|...) pattern (*+?@!) */
44 #define SADELIM		13	/* like SBASE, looking for delimiter */
45 #define STBRACEKORN	14	/* parsing ${...[#%]...} !FSH */
46 #define STBRACEBOURNE	15	/* parsing ${...[#%]...} FSH */
47 #define SINVALID	255	/* invalid state */
48 
49 struct sretrace_info {
50 	struct sretrace_info *next;
51 	XString xs;
52 	char *xp;
53 };
54 
55 /*
56  * Structure to keep track of the lexing state and the various pieces of info
57  * needed for each particular state.
58  */
59 typedef struct lex_state {
60 	union {
61 		/* point to the next state block */
62 		struct lex_state *base;
63 		/* marks start of state output in output string */
64 		size_t start;
65 		/* SBQUOTE: true if in double quotes: "`...`" */
66 		/* SEQUOTE: got NUL, ignore rest of string */
67 		bool abool;
68 		/* SADELIM information */
69 		struct {
70 			/* character to search for */
71 			unsigned char delimiter;
72 			/* max. number of delimiters */
73 			unsigned char num;
74 		} adelim;
75 	} u;
76 	/* count open parentheses */
77 	short nparen;
78 	/* type of this state */
79 	uint8_t type;
80 } Lex_state;
81 #define ls_base		u.base
82 #define ls_start	u.start
83 #define ls_bool		u.abool
84 #define ls_adelim	u.adelim
85 
86 typedef struct {
87 	Lex_state *base;
88 	Lex_state *end;
89 } State_info;
90 
91 static void readhere(struct ioword *);
92 static void ungetsc(int);
93 static void ungetsc_i(int);
94 static int getsc_uu(void);
95 static void getsc_line(Source *);
96 static int getsc_bn(void);
97 static int getsc_i(void);
98 static char *get_brace_var(XString *, char *);
99 static bool arraysub(char **);
100 static void gethere(void);
101 static Lex_state *push_state_i(State_info *, Lex_state *);
102 static Lex_state *pop_state_i(State_info *, Lex_state *);
103 
104 static int backslash_skip;
105 static int ignore_backslash_newline;
106 
107 /* optimised getsc_bn() */
108 #define o_getsc()	(*source->str != '\0' && *source->str != '\\' && \
109 			    !backslash_skip ? *source->str++ : getsc_bn())
110 /* optimised getsc_uu() */
111 #define	o_getsc_u()	((*source->str != '\0') ? *source->str++ : getsc_uu())
112 
113 /* retrace helper */
114 #define o_getsc_r(carg)					\
115 	int cev = (carg);				\
116 	struct sretrace_info *rp = retrace_info;	\
117 							\
118 	while (rp) {					\
119 		Xcheck(rp->xs, rp->xp);			\
120 		*rp->xp++ = cev;			\
121 		rp = rp->next;				\
122 	}						\
123 							\
124 	return (cev);
125 
126 /* callback */
127 static int
getsc_i(void)128 getsc_i(void)
129 {
130 	o_getsc_r((unsigned int)(unsigned char)o_getsc());
131 }
132 
133 #if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST)
134 #define getsc()		getsc_i()
135 #else
136 static int getsc_r(int);
137 
138 static int
getsc_r(int c)139 getsc_r(int c)
140 {
141 	o_getsc_r(c);
142 }
143 
144 #define getsc()		getsc_r((unsigned int)(unsigned char)o_getsc())
145 #endif
146 
147 #define STATE_BSIZE	8
148 
149 #define PUSH_STATE(s)	do {					\
150 	if (++statep == state_info.end)				\
151 		statep = push_state_i(&state_info, statep);	\
152 	state = statep->type = (s);				\
153 } while (/* CONSTCOND */ 0)
154 
155 #define POP_STATE()	do {					\
156 	if (--statep == state_info.base)			\
157 		statep = pop_state_i(&state_info, statep);	\
158 	state = statep->type;					\
159 } while (/* CONSTCOND */ 0)
160 
161 #define PUSH_SRETRACE(s) do {					\
162 	struct sretrace_info *ri;				\
163 								\
164 	PUSH_STATE(s);						\
165 	statep->ls_start = Xsavepos(ws, wp);			\
166 	ri = alloc(sizeof(struct sretrace_info), ATEMP);	\
167 	Xinit(ri->xs, ri->xp, 64, ATEMP);			\
168 	ri->next = retrace_info;				\
169 	retrace_info = ri;					\
170 } while (/* CONSTCOND */ 0)
171 
172 #define POP_SRETRACE()	do {					\
173 	wp = Xrestpos(ws, wp, statep->ls_start);		\
174 	*retrace_info->xp = '\0';				\
175 	sp = Xstring(retrace_info->xs, retrace_info->xp);	\
176 	dp = (void *)retrace_info;				\
177 	retrace_info = retrace_info->next;			\
178 	afree(dp, ATEMP);					\
179 	POP_STATE();						\
180 } while (/* CONSTCOND */ 0)
181 
182 /**
183  * Lexical analyser
184  *
185  * tokens are not regular expressions, they are LL(1).
186  * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
187  * hence the state stack. Note "$(...)" are now parsed recursively.
188  */
189 
190 int
yylex(int cf)191 yylex(int cf)
192 {
193 	Lex_state states[STATE_BSIZE], *statep, *s2, *base;
194 	State_info state_info;
195 	int c, c2, state;
196 	size_t cz;
197 	XString ws;		/* expandable output word */
198 	char *wp;		/* output word pointer */
199 	char *sp, *dp;
200 
201  Again:
202 	states[0].type = SINVALID;
203 	states[0].ls_base = NULL;
204 	statep = &states[1];
205 	state_info.base = states;
206 	state_info.end = &state_info.base[STATE_BSIZE];
207 
208 	Xinit(ws, wp, 64, ATEMP);
209 
210 	backslash_skip = 0;
211 	ignore_backslash_newline = 0;
212 
213 	if (cf & ONEWORD)
214 		state = SWORD;
215 	else if (cf & LETEXPR) {
216 		/* enclose arguments in (double) quotes */
217 		*wp++ = OQUOTE;
218 		state = SLETPAREN;
219 		statep->nparen = 0;
220 	} else {
221 		/* normal lexing */
222 		state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
223 		do {
224 			c = getsc();
225 		} while (ctype(c, C_BLANK));
226 		if (c == '#') {
227 			ignore_backslash_newline++;
228 			do {
229 				c = getsc();
230 			} while (!ctype(c, C_NUL | C_LF));
231 			ignore_backslash_newline--;
232 		}
233 		ungetsc(c);
234 	}
235 	if (source->flags & SF_ALIAS) {
236 		/* trailing ' ' in alias definition */
237 		source->flags &= ~SF_ALIAS;
238 		/* POSIX: trailing space only counts if parsing simple cmd */
239 		if (!Flag(FPOSIX) || (cf & CMDWORD))
240 			cf |= ALIAS;
241 	}
242 
243 	/* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */
244 	statep->type = state;
245 
246 	/* collect non-special or quoted characters to form word */
247 	while (!((c = getsc()) == 0 ||
248 	    ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) {
249 		if (state == SBASE &&
250 		    subshell_nesting_type == ORD(/*{*/ '}') &&
251 		    (unsigned int)c == ORD(/*{*/ '}'))
252 			/* possibly end ${ :;} */
253 			break;
254 		Xcheck(ws, wp);
255 		switch (state) {
256 		case SADELIM:
257 			if ((unsigned int)c == ORD('('))
258 				statep->nparen++;
259 			else if ((unsigned int)c == ORD(')'))
260 				statep->nparen--;
261 			else if (statep->nparen == 0 &&
262 			    ((unsigned int)c == ORD(/*{*/ '}') ||
263 			    c == (int)statep->ls_adelim.delimiter)) {
264 				*wp++ = ADELIM;
265 				*wp++ = c;
266 				if ((unsigned int)c == ORD(/*{*/ '}') ||
267 				    --statep->ls_adelim.num == 0)
268 					POP_STATE();
269 				if ((unsigned int)c == ORD(/*{*/ '}'))
270 					POP_STATE();
271 				break;
272 			}
273 			/* FALLTHROUGH */
274 		case SBASE:
275 			if ((unsigned int)c == ORD('[') && (cf & CMDASN)) {
276 				/* temporary */
277 				*wp = EOS;
278 				if (is_wdvarname(Xstring(ws, wp), false)) {
279 					char *p, *tmp;
280 
281 					if (arraysub(&tmp)) {
282 						*wp++ = CHAR;
283 						*wp++ = c;
284 						for (p = tmp; *p; ) {
285 							Xcheck(ws, wp);
286 							*wp++ = CHAR;
287 							*wp++ = *p++;
288 						}
289 						afree(tmp, ATEMP);
290 						break;
291 					}
292 				}
293 				*wp++ = CHAR;
294 				*wp++ = c;
295 				break;
296 			}
297 			/* FALLTHROUGH */
298  Sbase1:		/* includes *(...|...) pattern (*+?@!) */
299 			if (ctype(c, C_PATMO)) {
300 				c2 = getsc();
301 				if ((unsigned int)c2 == ORD('(' /*)*/)) {
302 					*wp++ = OPAT;
303 					*wp++ = c;
304 					PUSH_STATE(SPATTERN);
305 					break;
306 				}
307 				ungetsc(c2);
308 			}
309 			/* FALLTHROUGH */
310  Sbase2:		/* doesn't include *(...|...) pattern (*+?@!) */
311 			switch (c) {
312 			case ORD('\\'):
313  getsc_qchar:
314 				if ((c = getsc())) {
315 					/* trailing \ is lost */
316 					*wp++ = QCHAR;
317 					*wp++ = c;
318 				}
319 				break;
320 			case ORD('\''):
321  open_ssquote_unless_heredoc:
322 				if ((cf & HEREDOC))
323 					goto store_char;
324 				*wp++ = OQUOTE;
325 				ignore_backslash_newline++;
326 				PUSH_STATE(SSQUOTE);
327 				break;
328 			case ORD('"'):
329  open_sdquote:
330 				*wp++ = OQUOTE;
331 				PUSH_STATE(SDQUOTE);
332 				break;
333 			case ORD('$'):
334 				/*
335 				 * processing of dollar sign belongs into
336 				 * Subst, except for those which can open
337 				 * a string: $'…' and $"…"
338 				 */
339  subst_dollar_ex:
340 				c = getsc();
341 				switch (c) {
342 				case ORD('"'):
343 					goto open_sdquote;
344 				case ORD('\''):
345 					goto open_sequote;
346 				default:
347 					goto SubstS;
348 				}
349 			default:
350 				goto Subst;
351 			}
352 			break;
353 
354  Subst:
355 			switch (c) {
356 			case ORD('\\'):
357 				c = getsc();
358 				switch (c) {
359 				case ORD('"'):
360 					if ((cf & HEREDOC))
361 						goto heredocquote;
362 					/* FALLTHROUGH */
363 				case ORD('\\'):
364 				case ORD('$'):
365 				case ORD('`'):
366  store_qchar:
367 					*wp++ = QCHAR;
368 					*wp++ = c;
369 					break;
370 				default:
371  heredocquote:
372 					Xcheck(ws, wp);
373 					if (c) {
374 						/* trailing \ is lost */
375 						*wp++ = CHAR;
376 						*wp++ = '\\';
377 						*wp++ = CHAR;
378 						*wp++ = c;
379 					}
380 					break;
381 				}
382 				break;
383 			case ORD('$'):
384 				c = getsc();
385  SubstS:
386 				if ((unsigned int)c == ORD('(' /*)*/)) {
387 					c = getsc();
388 					if ((unsigned int)c == ORD('(' /*)*/)) {
389 						*wp++ = EXPRSUB;
390 						PUSH_SRETRACE(SASPAREN);
391 						statep->nparen = 2;
392 						*retrace_info->xp++ = '(';
393 					} else {
394 						ungetsc(c);
395  subst_command:
396 						c = COMSUB;
397  subst_command2:
398 						sp = yyrecursive(c);
399 						cz = strlen(sp) + 1;
400 						XcheckN(ws, wp, cz);
401 						*wp++ = c;
402 						memcpy(wp, sp, cz);
403 						wp += cz;
404 					}
405 				} else if ((unsigned int)c == ORD('{' /*}*/)) {
406 					if ((unsigned int)(c = getsc()) == ORD('|')) {
407 						/*
408 						 * non-subenvironment
409 						 * value substitution
410 						 */
411 						c = VALSUB;
412 						goto subst_command2;
413 					} else if (ctype(c, C_IFSWS)) {
414 						/*
415 						 * non-subenvironment
416 						 * "command" substitution
417 						 */
418 						c = FUNSUB;
419 						goto subst_command2;
420 					}
421 					ungetsc(c);
422 					*wp++ = OSUBST;
423 					*wp++ = '{' /*}*/;
424 					wp = get_brace_var(&ws, wp);
425 					c = getsc();
426 					/* allow :# and :% (ksh88 compat) */
427 					if ((unsigned int)c == ORD(':')) {
428 						*wp++ = CHAR;
429 						*wp++ = c;
430 						c = getsc();
431 						if ((unsigned int)c == ORD(':')) {
432 							*wp++ = CHAR;
433 							*wp++ = '0';
434 							*wp++ = ADELIM;
435 							*wp++ = ':';
436 							PUSH_STATE(SBRACE);
437 							PUSH_STATE(SADELIM);
438 							statep->ls_adelim.delimiter = ':';
439 							statep->ls_adelim.num = 1;
440 							statep->nparen = 0;
441 							break;
442 						} else if (ctype(c, C_DIGIT | C_DOLAR | C_SPC) ||
443 						    /*XXX what else? */
444 						    c == '(' /*)*/) {
445 							/* substring subst. */
446 							if (c != ' ') {
447 								*wp++ = CHAR;
448 								*wp++ = ' ';
449 							}
450 							ungetsc(c);
451 							PUSH_STATE(SBRACE);
452 							PUSH_STATE(SADELIM);
453 							statep->ls_adelim.delimiter = ':';
454 							statep->ls_adelim.num = 2;
455 							statep->nparen = 0;
456 							break;
457 						}
458 					} else if (c == '/') {
459 						c2 = ADELIM;
460  parse_adelim_slash:
461 						*wp++ = CHAR;
462 						*wp++ = c;
463 						if ((unsigned int)(c = getsc()) == ORD('/')) {
464 							*wp++ = c2;
465 							*wp++ = c;
466 						} else
467 							ungetsc(c);
468 						PUSH_STATE(SBRACE);
469 						PUSH_STATE(SADELIM);
470 						statep->ls_adelim.delimiter = '/';
471 						statep->ls_adelim.num = 1;
472 						statep->nparen = 0;
473 						break;
474 					} else if (c == '@') {
475 						c2 = getsc();
476 						ungetsc(c2);
477 						if ((unsigned int)c2 == ORD('/')) {
478 							c2 = CHAR;
479 							goto parse_adelim_slash;
480 						}
481 					}
482 					/*
483 					 * If this is a trim operation,
484 					 * treat (,|,) specially in STBRACE.
485 					 */
486 					if (ctype(c, C_SUB2)) {
487 						ungetsc(c);
488 						if (Flag(FSH))
489 							PUSH_STATE(STBRACEBOURNE);
490 						else
491 							PUSH_STATE(STBRACEKORN);
492 					} else {
493 						ungetsc(c);
494 						if (state == SDQUOTE ||
495 						    state == SQBRACE)
496 							PUSH_STATE(SQBRACE);
497 						else
498 							PUSH_STATE(SBRACE);
499 					}
500 				} else if (ctype(c, C_ALPHX)) {
501 					*wp++ = OSUBST;
502 					*wp++ = 'X';
503 					do {
504 						Xcheck(ws, wp);
505 						*wp++ = c;
506 						c = getsc();
507 					} while (ctype(c, C_ALNUX));
508 					*wp++ = '\0';
509 					*wp++ = CSUBST;
510 					*wp++ = 'X';
511 					ungetsc(c);
512 				} else if (ctype(c, C_VAR1 | C_DIGIT)) {
513 					Xcheck(ws, wp);
514 					*wp++ = OSUBST;
515 					*wp++ = 'X';
516 					*wp++ = c;
517 					*wp++ = '\0';
518 					*wp++ = CSUBST;
519 					*wp++ = 'X';
520 				} else {
521 					*wp++ = CHAR;
522 					*wp++ = '$';
523 					ungetsc(c);
524 				}
525 				break;
526 			case ORD('`'):
527  subst_gravis:
528 				PUSH_STATE(SBQUOTE);
529 				*wp++ = COMASUB;
530 				/*
531 				 * We need to know whether we are within double
532 				 * quotes in order to translate \" to " within
533 				 * "…`…\"…`…" because, unlike for COMSUBs, the
534 				 * outer double quoteing changes the backslash
535 				 * meaning for the inside. For more details:
536 				 * http://austingroupbugs.net/view.php?id=1015
537 				 */
538 				statep->ls_bool = false;
539 				s2 = statep;
540 				base = state_info.base;
541 				while (/* CONSTCOND */ 1) {
542 					for (; s2 != base; s2--) {
543 						if (s2->type == SDQUOTE) {
544 							statep->ls_bool = true;
545 							break;
546 						}
547 					}
548 					if (s2 != base)
549 						break;
550 					if (!(s2 = s2->ls_base))
551 						break;
552 					base = s2-- - STATE_BSIZE;
553 				}
554 				break;
555 			case QCHAR:
556 				if (cf & LQCHAR) {
557 					*wp++ = QCHAR;
558 					*wp++ = getsc();
559 					break;
560 				}
561 				/* FALLTHROUGH */
562 			default:
563  store_char:
564 				*wp++ = CHAR;
565 				*wp++ = c;
566 			}
567 			break;
568 
569 		case SEQUOTE:
570 			if ((unsigned int)c == ORD('\'')) {
571 				POP_STATE();
572 				*wp++ = CQUOTE;
573 				ignore_backslash_newline--;
574 			} else if ((unsigned int)c == ORD('\\')) {
575 				if ((c2 = unbksl(true, getsc_i, ungetsc)) == -1)
576 					c2 = getsc();
577 				if (c2 == 0)
578 					statep->ls_bool = true;
579 				if (!statep->ls_bool) {
580 					char ts[4];
581 
582 					if ((unsigned int)c2 < 0x100) {
583 						*wp++ = QCHAR;
584 						*wp++ = c2;
585 					} else {
586 						cz = utf_wctomb(ts, c2 - 0x100);
587 						ts[cz] = 0;
588 						cz = 0;
589 						do {
590 							*wp++ = QCHAR;
591 							*wp++ = ts[cz];
592 						} while (ts[++cz]);
593 					}
594 				}
595 			} else if (!statep->ls_bool) {
596 				*wp++ = QCHAR;
597 				*wp++ = c;
598 			}
599 			break;
600 
601 		case SSQUOTE:
602 			if ((unsigned int)c == ORD('\'')) {
603 				POP_STATE();
604 				if ((cf & HEREDOC) || state == SQBRACE)
605 					goto store_char;
606 				*wp++ = CQUOTE;
607 				ignore_backslash_newline--;
608 			} else {
609 				*wp++ = QCHAR;
610 				*wp++ = c;
611 			}
612 			break;
613 
614 		case SDQUOTE:
615 			if ((unsigned int)c == ORD('"')) {
616 				POP_STATE();
617 				*wp++ = CQUOTE;
618 			} else
619 				goto Subst;
620 			break;
621 
622 		/* $(( ... )) */
623 		case SASPAREN:
624 			if ((unsigned int)c == ORD('('))
625 				statep->nparen++;
626 			else if ((unsigned int)c == ORD(')')) {
627 				statep->nparen--;
628 				if (statep->nparen == 1) {
629 					/* end of EXPRSUB */
630 					POP_SRETRACE();
631 
632 					if ((unsigned int)(c2 = getsc()) == ORD(/*(*/ ')')) {
633 						cz = strlen(sp) - 2;
634 						XcheckN(ws, wp, cz);
635 						memcpy(wp, sp + 1, cz);
636 						wp += cz;
637 						afree(sp, ATEMP);
638 						*wp++ = '\0';
639 						break;
640 					} else {
641 						Source *s;
642 
643 						ungetsc(c2);
644 						/*
645 						 * mismatched parenthesis -
646 						 * assume we were really
647 						 * parsing a $(...) expression
648 						 */
649 						--wp;
650 						s = pushs(SREREAD,
651 						    source->areap);
652 						s->start = s->str =
653 						    s->u.freeme = sp;
654 						s->next = source;
655 						source = s;
656 						goto subst_command;
657 					}
658 				}
659 			}
660 			/* reuse existing state machine */
661 			goto Sbase2;
662 
663 		case SQBRACE:
664 			if ((unsigned int)c == ORD('\\')) {
665 				/*
666 				 * perform POSIX "quote removal" if the back-
667 				 * slash is "special", i.e. same cases as the
668 				 * {case '\\':} in Subst: plus closing brace;
669 				 * in mksh code "quote removal" on '\c' means
670 				 * write QCHAR+c, otherwise CHAR+\+CHAR+c are
671 				 * emitted (in heredocquote:)
672 				 */
673 				if ((unsigned int)(c = getsc()) == ORD('"') ||
674 				    (unsigned int)c == ORD('\\') ||
675 				    ctype(c, C_DOLAR | C_GRAVE) ||
676 				    (unsigned int)c == ORD(/*{*/ '}'))
677 					goto store_qchar;
678 				goto heredocquote;
679 			}
680 			goto common_SQBRACE;
681 
682 		case SBRACE:
683 			if ((unsigned int)c == ORD('\''))
684 				goto open_ssquote_unless_heredoc;
685 			else if ((unsigned int)c == ORD('\\'))
686 				goto getsc_qchar;
687  common_SQBRACE:
688 			if ((unsigned int)c == ORD('"'))
689 				goto open_sdquote;
690 			else if ((unsigned int)c == ORD('$'))
691 				goto subst_dollar_ex;
692 			else if ((unsigned int)c == ORD('`'))
693 				goto subst_gravis;
694 			else if ((unsigned int)c != ORD(/*{*/ '}'))
695 				goto store_char;
696 			POP_STATE();
697 			*wp++ = CSUBST;
698 			*wp++ = /*{*/ '}';
699 			break;
700 
701 		/* Same as SBASE, except (,|,) treated specially */
702 		case STBRACEKORN:
703 			if ((unsigned int)c == ORD('|'))
704 				*wp++ = SPAT;
705 			else if ((unsigned int)c == ORD('(')) {
706 				*wp++ = OPAT;
707 				/* simile for @ */
708 				*wp++ = ' ';
709 				PUSH_STATE(SPATTERN);
710 			} else /* FALLTHROUGH */
711 		case STBRACEBOURNE:
712 			  if ((unsigned int)c == ORD(/*{*/ '}')) {
713 				POP_STATE();
714 				*wp++ = CSUBST;
715 				*wp++ = /*{*/ '}';
716 			} else
717 				goto Sbase1;
718 			break;
719 
720 		case SBQUOTE:
721 			if ((unsigned int)c == ORD('`')) {
722 				*wp++ = 0;
723 				POP_STATE();
724 			} else if ((unsigned int)c == ORD('\\')) {
725 				switch (c = getsc()) {
726 				case 0:
727 					/* trailing \ is lost */
728 					break;
729 				case ORD('$'):
730 				case ORD('`'):
731 				case ORD('\\'):
732 					*wp++ = c;
733 					break;
734 				case ORD('"'):
735 					if (statep->ls_bool) {
736 						*wp++ = c;
737 						break;
738 					}
739 					/* FALLTHROUGH */
740 				default:
741 					*wp++ = '\\';
742 					*wp++ = c;
743 					break;
744 				}
745 			} else
746 				*wp++ = c;
747 			break;
748 
749 		/* ONEWORD */
750 		case SWORD:
751 			goto Subst;
752 
753 		/* LETEXPR: (( ... )) */
754 		case SLETPAREN:
755 			if ((unsigned int)c == ORD(/*(*/ ')')) {
756 				if (statep->nparen > 0)
757 					--statep->nparen;
758 				else if ((unsigned int)(c2 = getsc()) == ORD(/*(*/ ')')) {
759 					c = 0;
760 					*wp++ = CQUOTE;
761 					goto Done;
762 				} else {
763 					Source *s;
764 
765 					ungetsc(c2);
766 					ungetsc(c);
767 					/*
768 					 * mismatched parenthesis -
769 					 * assume we were really
770 					 * parsing a (...) expression
771 					 */
772 					*wp = EOS;
773 					sp = Xstring(ws, wp);
774 					dp = wdstrip(sp + 1, WDS_TPUTS);
775 					s = pushs(SREREAD, source->areap);
776 					s->start = s->str = s->u.freeme = dp;
777 					s->next = source;
778 					source = s;
779 					ungetsc('(' /*)*/);
780 					return (ORD('(' /*)*/));
781 				}
782 			} else if ((unsigned int)c == ORD('('))
783 				/*
784 				 * parentheses inside quotes and
785 				 * backslashes are lost, but AT&T ksh
786 				 * doesn't count them either
787 				 */
788 				++statep->nparen;
789 			goto Sbase2;
790 
791 		/* << or <<- delimiter */
792 		case SHEREDELIM:
793 			/*
794 			 * here delimiters need a special case since
795 			 * $ and `...` are not to be treated specially
796 			 */
797 			switch (c) {
798 			case ORD('\\'):
799 				if ((c = getsc())) {
800 					/* trailing \ is lost */
801 					*wp++ = QCHAR;
802 					*wp++ = c;
803 				}
804 				break;
805 			case ORD('\''):
806 				goto open_ssquote_unless_heredoc;
807 			case ORD('$'):
808 				if ((unsigned int)(c2 = getsc()) == ORD('\'')) {
809  open_sequote:
810 					*wp++ = OQUOTE;
811 					ignore_backslash_newline++;
812 					PUSH_STATE(SEQUOTE);
813 					statep->ls_bool = false;
814 					break;
815 				} else if ((unsigned int)c2 == ORD('"')) {
816 					/* FALLTHROUGH */
817 			case ORD('"'):
818 					PUSH_SRETRACE(SHEREDQUOTE);
819 					break;
820 				}
821 				ungetsc(c2);
822 				/* FALLTHROUGH */
823 			default:
824 				*wp++ = CHAR;
825 				*wp++ = c;
826 			}
827 			break;
828 
829 		/* " in << or <<- delimiter */
830 		case SHEREDQUOTE:
831 			if ((unsigned int)c != ORD('"'))
832 				goto Subst;
833 			POP_SRETRACE();
834 			dp = strnul(sp) - 1;
835 			/* remove the trailing double quote */
836 			*dp = '\0';
837 			/* store the quoted string */
838 			*wp++ = OQUOTE;
839 			XcheckN(ws, wp, (dp - sp) * 2);
840 			dp = sp;
841 			while ((c = *dp++)) {
842 				if (c == '\\') {
843 					switch ((c = *dp++)) {
844 					case ORD('\\'):
845 					case ORD('"'):
846 					case ORD('$'):
847 					case ORD('`'):
848 						break;
849 					default:
850 						*wp++ = CHAR;
851 						*wp++ = '\\';
852 						break;
853 					}
854 				}
855 				*wp++ = CHAR;
856 				*wp++ = c;
857 			}
858 			afree(sp, ATEMP);
859 			*wp++ = CQUOTE;
860 			state = statep->type = SHEREDELIM;
861 			break;
862 
863 		/* in *(...|...) pattern (*+?@!) */
864 		case SPATTERN:
865 			if ((unsigned int)c == ORD(/*(*/ ')')) {
866 				*wp++ = CPAT;
867 				POP_STATE();
868 			} else if ((unsigned int)c == ORD('|')) {
869 				*wp++ = SPAT;
870 			} else if ((unsigned int)c == ORD('(')) {
871 				*wp++ = OPAT;
872 				/* simile for @ */
873 				*wp++ = ' ';
874 				PUSH_STATE(SPATTERN);
875 			} else
876 				goto Sbase1;
877 			break;
878 		}
879 	}
880  Done:
881 	Xcheck(ws, wp);
882 	if (statep != &states[1])
883 		/* XXX figure out what is missing */
884 		yyerror("no closing quote");
885 
886 	/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
887 	if (state == SHEREDELIM)
888 		state = SBASE;
889 
890 	dp = Xstring(ws, wp);
891 	if (state == SBASE && (
892 	    (c == '&' && !Flag(FSH) && !Flag(FPOSIX)) ||
893 	    ctype(c, C_ANGLE)) && ((c2 = Xlength(ws, wp)) == 0 ||
894 	    (c2 == 2 && dp[0] == CHAR && ctype(dp[1], C_DIGIT)))) {
895 		struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
896 
897 		iop->unit = c2 == 2 ? ksh_numdig(dp[1]) : c == '<' ? 0 : 1;
898 
899 		if (c == '&') {
900 			if ((unsigned int)(c2 = getsc()) != ORD('>')) {
901 				ungetsc(c2);
902 				goto no_iop;
903 			}
904 			c = c2;
905 			iop->ioflag = IOBASH;
906 		} else
907 			iop->ioflag = 0;
908 
909 		c2 = getsc();
910 		/* <<, >>, <> are ok, >< is not */
911 		if (c == c2 || ((unsigned int)c == ORD('<') &&
912 		    (unsigned int)c2 == ORD('>'))) {
913 			iop->ioflag |= c == c2 ?
914 			    ((unsigned int)c == ORD('>') ? IOCAT : IOHERE) : IORDWR;
915 			if (iop->ioflag == IOHERE) {
916 				if ((unsigned int)(c2 = getsc()) == ORD('-'))
917 					iop->ioflag |= IOSKIP;
918 				else if ((unsigned int)c2 == ORD('<'))
919 					iop->ioflag |= IOHERESTR;
920 				else
921 					ungetsc(c2);
922 			}
923 		} else if ((unsigned int)c2 == ORD('&'))
924 			iop->ioflag |= IODUP | ((unsigned int)c == ORD('<') ? IORDUP : 0);
925 		else {
926 			iop->ioflag |= (unsigned int)c == ORD('>') ? IOWRITE : IOREAD;
927 			if ((unsigned int)c == ORD('>') && (unsigned int)c2 == ORD('|'))
928 				iop->ioflag |= IOCLOB;
929 			else
930 				ungetsc(c2);
931 		}
932 
933 		iop->ioname = NULL;
934 		iop->delim = NULL;
935 		iop->heredoc = NULL;
936 		/* free word */
937 		Xfree(ws, wp);
938 		yylval.iop = iop;
939 		return (REDIR);
940  no_iop:
941 		afree(iop, ATEMP);
942 	}
943 
944 	if (wp == dp && state == SBASE) {
945 		/* free word */
946 		Xfree(ws, wp);
947 		/* no word, process LEX1 character */
948 		if (((unsigned int)c == ORD('|')) ||
949 		    ((unsigned int)c == ORD('&')) ||
950 		    ((unsigned int)c == ORD(';')) ||
951 		    ((unsigned int)c == ORD('(' /*)*/))) {
952 			if ((c2 = getsc()) == c)
953 				c = ((unsigned int)c == ORD(';')) ? BREAK :
954 				    ((unsigned int)c == ORD('|')) ? LOGOR :
955 				    ((unsigned int)c == ORD('&')) ? LOGAND :
956 				    /* (unsigned int)c == ORD('(' )) */ MDPAREN;
957 			else if ((unsigned int)c == ORD('|') && (unsigned int)c2 == ORD('&'))
958 				c = COPROC;
959 			else if ((unsigned int)c == ORD(';') && (unsigned int)c2 == ORD('|'))
960 				c = BRKEV;
961 			else if ((unsigned int)c == ORD(';') && (unsigned int)c2 == ORD('&'))
962 				c = BRKFT;
963 			else
964 				ungetsc(c2);
965 #ifndef MKSH_SMALL
966 			if (c == BREAK) {
967 				if ((unsigned int)(c2 = getsc()) == ORD('&'))
968 					c = BRKEV;
969 				else
970 					ungetsc(c2);
971 			}
972 #endif
973 		} else if ((unsigned int)c == ORD('\n')) {
974 			if (cf & HEREDELIM)
975 				ungetsc(c);
976 			else {
977 				gethere();
978 				if (cf & CONTIN)
979 					goto Again;
980 			}
981 		} else if (c == '\0' && !(cf & HEREDELIM)) {
982 			struct ioword **p = heres;
983 
984 			while (p < herep)
985 				if ((*p)->ioflag & IOHERESTR)
986 					++p;
987 				else
988 					/* ksh -c 'cat <<EOF' can cause this */
989 					yyerror(Tf_heredoc,
990 					    evalstr((*p)->delim, 0));
991 		}
992 		return (c);
993 	}
994 
995 	/* terminate word */
996 	*wp++ = EOS;
997 	yylval.cp = Xclose(ws, wp);
998 	if (state == SWORD || state == SLETPAREN
999 	    /* XXX ONEWORD? */)
1000 		return (LWORD);
1001 
1002 	/* unget terminator */
1003 	ungetsc(c);
1004 
1005 	/*
1006 	 * note: the alias-vs-function code below depends on several
1007 	 * interna: starting from here, source->str is not modified;
1008 	 * the way getsc() and ungetsc() operate; etc.
1009 	 */
1010 
1011 	/* copy word to unprefixed string ident */
1012 	sp = yylval.cp;
1013 	dp = ident;
1014 	while ((dp - ident) < IDENT && (c = *sp++) == CHAR)
1015 		*dp++ = *sp++;
1016 	if (c != EOS)
1017 		/* word is not unquoted, or space ran out */
1018 		dp = ident;
1019 	/* make sure the ident array stays NUL padded */
1020 	memset(dp, 0, (ident + IDENT) - dp + 1);
1021 
1022 	if (*ident != '\0' && (cf & (KEYWORD | ALIAS))) {
1023 		struct tbl *p;
1024 		uint32_t h = hash(ident);
1025 
1026 		if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
1027 		    (!(cf & ESACONLY) || p->val.i == ESAC ||
1028 		    (unsigned int)p->val.i == ORD(/*{*/ '}'))) {
1029 			afree(yylval.cp, ATEMP);
1030 			return (p->val.i);
1031 		}
1032 		if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
1033 		    (p->flag & ISSET)) {
1034 			/*
1035 			 * this still points to the same character as the
1036 			 * ungetsc'd terminator from above
1037 			 */
1038 			const char *cp = source->str;
1039 
1040 			/* prefer POSIX but not Korn functions over aliases */
1041 			while (ctype(*cp, C_BLANK))
1042 				/*
1043 				 * this is like getsc() without skipping
1044 				 * over Source boundaries (including not
1045 				 * parsing ungetsc'd characters that got
1046 				 * pushed into an SREREAD) which is what
1047 				 * we want here anyway: find out whether
1048 				 * the alias name is followed by a POSIX
1049 				 * function definition
1050 				 */
1051 				++cp;
1052 			/* prefer functions over aliases */
1053 			if (cp[0] != '(' || cp[1] != ')') {
1054 				Source *s = source;
1055 
1056 				while (s && (s->flags & SF_HASALIAS))
1057 					if (s->u.tblp == p)
1058 						return (LWORD);
1059 					else
1060 						s = s->next;
1061 				/* push alias expansion */
1062 				s = pushs(SALIAS, source->areap);
1063 				s->start = s->str = p->val.s;
1064 				s->u.tblp = p;
1065 				s->flags |= SF_HASALIAS;
1066 				s->line = source->line;
1067 				s->next = source;
1068 				if (source->type == SEOF) {
1069 					/* prevent infinite recursion at EOS */
1070 					source->u.tblp = p;
1071 					source->flags |= SF_HASALIAS;
1072 				}
1073 				source = s;
1074 				afree(yylval.cp, ATEMP);
1075 				goto Again;
1076 			}
1077 		}
1078 	} else if (*ident == '\0') {
1079 		/* retain typeset et al. even when quoted */
1080 		struct tbl *tt = get_builtin((dp = wdstrip(yylval.cp, 0)));
1081 		uint32_t flag = tt ? tt->flag : 0;
1082 
1083 		if (flag & (DECL_UTIL | DECL_FWDR))
1084 			strlcpy(ident, dp, sizeof(ident));
1085 		afree(dp, ATEMP);
1086 	}
1087 
1088 	return (LWORD);
1089 }
1090 
1091 static void
gethere(void)1092 gethere(void)
1093 {
1094 	struct ioword **p;
1095 
1096 	for (p = heres; p < herep; p++)
1097 		if (!((*p)->ioflag & IOHERESTR))
1098 			readhere(*p);
1099 	herep = heres;
1100 }
1101 
1102 /*
1103  * read "<<word" text into temp file
1104  */
1105 
1106 static void
readhere(struct ioword * iop)1107 readhere(struct ioword *iop)
1108 {
1109 	int c;
1110 	const char *eof, *eofp;
1111 	XString xs;
1112 	char *xp;
1113 	size_t xpos;
1114 
1115 	eof = evalstr(iop->delim, 0);
1116 
1117 	if (!(iop->ioflag & IOEVAL))
1118 		ignore_backslash_newline++;
1119 
1120 	Xinit(xs, xp, 256, ATEMP);
1121 
1122  heredoc_read_line:
1123 	/* beginning of line */
1124 	eofp = eof;
1125 	xpos = Xsavepos(xs, xp);
1126 	if (iop->ioflag & IOSKIP) {
1127 		/* skip over leading tabs */
1128 		while ((c = getsc()) == '\t')
1129 			;	/* nothing */
1130 		goto heredoc_parse_char;
1131 	}
1132  heredoc_read_char:
1133 	c = getsc();
1134  heredoc_parse_char:
1135 	/* compare with here document marker */
1136 	if (!*eofp) {
1137 		/* end of here document marker, what to do? */
1138 		switch (c) {
1139 		case ORD(/*(*/ ')'):
1140 			if (!subshell_nesting_type)
1141 				/*-
1142 				 * not allowed outside $(...) or (...)
1143 				 * => mismatch
1144 				 */
1145 				break;
1146 			/* allow $(...) or (...) to close here */
1147 			ungetsc(/*(*/ ')');
1148 			/* FALLTHROUGH */
1149 		case 0:
1150 			/*
1151 			 * Allow EOF here to commands without trailing
1152 			 * newlines (mksh -c '...') will work as well.
1153 			 */
1154 		case ORD('\n'):
1155 			/* Newline terminates here document marker */
1156 			goto heredoc_found_terminator;
1157 		}
1158 	} else if (c == *eofp++)
1159 		/* store; then read and compare next character */
1160 		goto heredoc_store_and_loop;
1161 	/* nope, mismatch; read until end of line */
1162 	while (c != '\n') {
1163 		if (!c)
1164 			/* oops, reached EOF */
1165 			yyerror(Tf_heredoc, eof);
1166 		/* store character */
1167 		Xcheck(xs, xp);
1168 		Xput(xs, xp, c);
1169 		/* read next character */
1170 		c = getsc();
1171 	}
1172 	/* we read a newline as last character */
1173  heredoc_store_and_loop:
1174 	/* store character */
1175 	Xcheck(xs, xp);
1176 	Xput(xs, xp, c);
1177 	if (c == '\n')
1178 		goto heredoc_read_line;
1179 	goto heredoc_read_char;
1180 
1181  heredoc_found_terminator:
1182 	/* jump back to saved beginning of line */
1183 	xp = Xrestpos(xs, xp, xpos);
1184 	/* terminate, close and store */
1185 	Xput(xs, xp, '\0');
1186 	iop->heredoc = Xclose(xs, xp);
1187 
1188 	if (!(iop->ioflag & IOEVAL))
1189 		ignore_backslash_newline--;
1190 }
1191 
1192 void
yyerror(const char * fmt,...)1193 yyerror(const char *fmt, ...)
1194 {
1195 	va_list va;
1196 
1197 	/* pop aliases and re-reads */
1198 	while (source->type == SALIAS || source->type == SREREAD)
1199 		source = source->next;
1200 	/* zap pending input */
1201 	source->str = null;
1202 
1203 	error_prefix(true);
1204 	va_start(va, fmt);
1205 	shf_vfprintf(shl_out, fmt, va);
1206 	shf_putc('\n', shl_out);
1207 	va_end(va);
1208 	errorfz();
1209 }
1210 
1211 /*
1212  * input for yylex with alias expansion
1213  */
1214 
1215 Source *
pushs(int type,Area * areap)1216 pushs(int type, Area *areap)
1217 {
1218 	Source *s;
1219 
1220 	s = alloc(sizeof(Source), areap);
1221 	memset(s, 0, sizeof(Source));
1222 	s->type = type;
1223 	s->str = null;
1224 	s->areap = areap;
1225 	if (type == SFILE || type == SSTDIN)
1226 		XinitN(s->xs, 256, s->areap);
1227 	return (s);
1228 }
1229 
1230 static int
getsc_uu(void)1231 getsc_uu(void)
1232 {
1233 	Source *s = source;
1234 	int c;
1235 
1236 	while ((c = ord(*s->str++)) == 0) {
1237 		/* return 0 for EOF by default */
1238 		s->str = NULL;
1239 		switch (s->type) {
1240 		case SEOF:
1241 			s->str = null;
1242 			return (0);
1243 
1244 		case SSTDIN:
1245 		case SFILE:
1246 			getsc_line(s);
1247 			break;
1248 
1249 		case SWSTR:
1250 			break;
1251 
1252 		case SSTRING:
1253 		case SSTRINGCMDLINE:
1254 			break;
1255 
1256 		case SWORDS:
1257 			s->start = s->str = *s->u.strv++;
1258 			s->type = SWORDSEP;
1259 			break;
1260 
1261 		case SWORDSEP:
1262 			if (*s->u.strv == NULL) {
1263 				s->start = s->str = "\n";
1264 				s->type = SEOF;
1265 			} else {
1266 				s->start = s->str = T1space;
1267 				s->type = SWORDS;
1268 			}
1269 			break;
1270 
1271 		case SALIAS:
1272 			if (s->flags & SF_ALIASEND) {
1273 				/* pass on an unused SF_ALIAS flag */
1274 				source = s->next;
1275 				source->flags |= s->flags & SF_ALIAS;
1276 				s = source;
1277 			} else if (*s->u.tblp->val.s &&
1278 			    ctype((c = strnul(s->u.tblp->val.s)[-1]), C_SPACE)) {
1279 				/* pop source stack */
1280 				source = s = s->next;
1281 				/*
1282 				 * Note that this alias ended with a
1283 				 * space, enabling alias expansion on
1284 				 * the following word.
1285 				 */
1286 				s->flags |= SF_ALIAS;
1287 			} else {
1288 				/*
1289 				 * At this point, we need to keep the current
1290 				 * alias in the source list so recursive
1291 				 * aliases can be detected and we also need to
1292 				 * return the next character. Do this by
1293 				 * temporarily popping the alias to get the
1294 				 * next character and then put it back in the
1295 				 * source list with the SF_ALIASEND flag set.
1296 				 */
1297 				/* pop source stack */
1298 				source = s->next;
1299 				source->flags |= s->flags & SF_ALIAS;
1300 				c = getsc_uu();
1301 				if (c) {
1302 					s->flags |= SF_ALIASEND;
1303 					s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1304 					s->start = s->str = s->ugbuf;
1305 					s->next = source;
1306 					source = s;
1307 				} else {
1308 					s = source;
1309 					/* avoid reading EOF twice */
1310 					s->str = NULL;
1311 					break;
1312 				}
1313 			}
1314 			continue;
1315 
1316 		case SREREAD:
1317 			if (s->start != s->ugbuf)
1318 				/* yuck */
1319 				afree(s->u.freeme, ATEMP);
1320 			source = s = s->next;
1321 			continue;
1322 		}
1323 		if (s->str == NULL) {
1324 			s->type = SEOF;
1325 			s->start = s->str = null;
1326 			return ('\0');
1327 		}
1328 		if (s->flags & SF_ECHO) {
1329 			shf_puts(s->str, shl_out);
1330 			shf_flush(shl_out);
1331 		}
1332 	}
1333 	return (c);
1334 }
1335 
1336 static void
getsc_line(Source * s)1337 getsc_line(Source *s)
1338 {
1339 	char *xp = Xstring(s->xs, xp), *cp;
1340 	bool interactive = Flag(FTALKING) && s->type == SSTDIN;
1341 	bool have_tty = tobool(interactive && (s->flags & SF_TTY));
1342 
1343 	/* Done here to ensure nothing odd happens when a timeout occurs */
1344 	XcheckN(s->xs, xp, LINE);
1345 	*xp = '\0';
1346 	s->start = s->str = xp;
1347 
1348 	if (have_tty && ksh_tmout) {
1349 		ksh_tmout_state = TMOUT_READING;
1350 		alarm(ksh_tmout);
1351 	}
1352 	if (interactive) {
1353 		if (cur_prompt == PS1)
1354 			histsave(&s->line, NULL, HIST_FLUSH, true);
1355 		change_winsz();
1356 	}
1357 #ifndef MKSH_NO_CMDLINE_EDITING
1358 	if (have_tty && (
1359 #if !MKSH_S_NOVI
1360 	    Flag(FVI) ||
1361 #endif
1362 	    Flag(FEMACS) || Flag(FGMACS))) {
1363 		int nread;
1364 
1365 		nread = x_read(xp);
1366 		if (nread < 0)
1367 			/* read error */
1368 			nread = 0;
1369 		xp[nread] = '\0';
1370 		xp += nread;
1371 	} else
1372 #endif
1373 	  {
1374 		if (interactive)
1375 			pprompt(prompt, 0);
1376 		else
1377 			s->line++;
1378 
1379 		while (/* CONSTCOND */ 1) {
1380 			char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1381 
1382 			if (!p && shf_error(s->u.shf) &&
1383 			    shf_errno(s->u.shf) == EINTR) {
1384 				shf_clearerr(s->u.shf);
1385 				if (trap)
1386 					runtraps(0);
1387 				continue;
1388 			}
1389 			if (!p || (xp = p, xp[-1] == '\n'))
1390 				break;
1391 			/* double buffer size */
1392 			/* move past NUL so doubling works... */
1393 			xp++;
1394 			XcheckN(s->xs, xp, Xlength(s->xs, xp));
1395 			/* ...and move back again */
1396 			xp--;
1397 		}
1398 		/*
1399 		 * flush any unwanted input so other programs/builtins
1400 		 * can read it. Not very optimal, but less error prone
1401 		 * than flushing else where, dealing with redirections,
1402 		 * etc.
1403 		 * TODO: reduce size of shf buffer (~128?) if SSTDIN
1404 		 */
1405 		if (s->type == SSTDIN)
1406 			shf_flush(s->u.shf);
1407 	}
1408 	/*
1409 	 * XXX: temporary kludge to restore source after a
1410 	 * trap may have been executed.
1411 	 */
1412 	source = s;
1413 	if (have_tty && ksh_tmout) {
1414 		ksh_tmout_state = TMOUT_EXECUTING;
1415 		alarm(0);
1416 	}
1417 	cp = Xstring(s->xs, xp);
1418 	rndpush(cp);
1419 	s->start = s->str = cp;
1420 	strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1421 	/* Note: if input is all nulls, this is not eof */
1422 	if (Xlength(s->xs, xp) == 0) {
1423 		/* EOF */
1424 		if (s->type == SFILE)
1425 			shf_fdclose(s->u.shf);
1426 		s->str = NULL;
1427 	} else if (interactive && *s->str) {
1428 		if (cur_prompt != PS1)
1429 			histsave(&s->line, s->str, HIST_APPEND, true);
1430 		else if (!ctype(*s->str, C_IFS | C_IFSWS))
1431 			histsave(&s->line, s->str, HIST_QUEUE, true);
1432 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
1433 		else
1434 			goto check_for_sole_return;
1435 	} else if (interactive && cur_prompt == PS1) {
1436  check_for_sole_return:
1437 		cp = Xstring(s->xs, xp);
1438 		while (ctype(*cp, C_IFSWS))
1439 			++cp;
1440 		if (!*cp) {
1441 			histsave(&s->line, NULL, HIST_FLUSH, true);
1442 			histsync();
1443 		}
1444 #endif
1445 	}
1446 	if (interactive)
1447 		set_prompt(PS2, NULL);
1448 }
1449 
1450 void
set_prompt(int to,Source * s)1451 set_prompt(int to, Source *s)
1452 {
1453 	cur_prompt = (uint8_t)to;
1454 
1455 	switch (to) {
1456 	/* command */
1457 	case PS1:
1458 		/*
1459 		 * Substitute ! and !! here, before substitutions are done
1460 		 * so ! in expanded variables are not expanded.
1461 		 * NOTE: this is not what AT&T ksh does (it does it after
1462 		 * substitutions, POSIX doesn't say which is to be done.
1463 		 */
1464 		{
1465 			struct shf *shf;
1466 			char * volatile ps1;
1467 			Area *saved_atemp;
1468 			int saved_lineno;
1469 
1470 			ps1 = str_val(global("PS1"));
1471 			shf = shf_sopen(NULL, strlen(ps1) * 2,
1472 			    SHF_WR | SHF_DYNAMIC, NULL);
1473 			while (*ps1)
1474 				if (*ps1 != '!' || *++ps1 == '!')
1475 					shf_putchar(*ps1++, shf);
1476 				else
1477 					shf_fprintf(shf, Tf_lu, s ?
1478 					    (unsigned long)s->line + 1 : 0UL);
1479 			ps1 = shf_sclose(shf);
1480 			saved_lineno = current_lineno;
1481 			if (s)
1482 				current_lineno = s->line + 1;
1483 			saved_atemp = ATEMP;
1484 			newenv(E_ERRH);
1485 			if (kshsetjmp(e->jbuf)) {
1486 				prompt = safe_prompt;
1487 				/*
1488 				 * Don't print an error - assume it has already
1489 				 * been printed. Reason is we may have forked
1490 				 * to run a command and the child may be
1491 				 * unwinding its stack through this code as it
1492 				 * exits.
1493 				 */
1494 			} else {
1495 				char *cp = substitute(ps1, 0);
1496 				strdupx(prompt, cp, saved_atemp);
1497 			}
1498 			current_lineno = saved_lineno;
1499 			quitenv(NULL);
1500 		}
1501 		break;
1502 	/* command continuation */
1503 	case PS2:
1504 		prompt = str_val(global("PS2"));
1505 		break;
1506 	}
1507 }
1508 
1509 int
pprompt(const char * cp,int ntruncate)1510 pprompt(const char *cp, int ntruncate)
1511 {
1512 	char delimiter = 0;
1513 	bool doprint = (ntruncate != -1);
1514 	bool indelimit = false;
1515 	int columns = 0, lines = 0;
1516 
1517 	/*
1518 	 * Undocumented AT&T ksh feature:
1519 	 * If the second char in the prompt string is \r then the first
1520 	 * char is taken to be a non-printing delimiter and any chars
1521 	 * between two instances of the delimiter are not considered to
1522 	 * be part of the prompt length
1523 	 */
1524 	if (*cp && cp[1] == '\r') {
1525 		delimiter = *cp;
1526 		cp += 2;
1527 	}
1528 	for (; *cp; cp++) {
1529 		if (indelimit && *cp != delimiter)
1530 			;
1531 		else if (ctype(*cp, C_CR | C_LF)) {
1532 			lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
1533 			columns = 0;
1534 		} else if (*cp == '\t') {
1535 			columns = (columns | 7) + 1;
1536 		} else if (*cp == '\b') {
1537 			if (columns > 0)
1538 				columns--;
1539 		} else if (*cp == delimiter)
1540 			indelimit = !indelimit;
1541 		else if (UTFMODE && (rtt2asc(*cp) > 0x7F)) {
1542 			const char *cp2;
1543 			columns += utf_widthadj(cp, &cp2);
1544 			if (doprint && (indelimit ||
1545 			    (ntruncate < (x_cols * lines + columns))))
1546 				shf_write(cp, cp2 - cp, shl_out);
1547 			cp = cp2 - /* loop increment */ 1;
1548 			continue;
1549 		} else
1550 			columns++;
1551 		if (doprint && (*cp != delimiter) &&
1552 		    (indelimit || (ntruncate < (x_cols * lines + columns))))
1553 			shf_putc(*cp, shl_out);
1554 	}
1555 	if (doprint)
1556 		shf_flush(shl_out);
1557 	return (x_cols * lines + columns);
1558 }
1559 
1560 /*
1561  * Read the variable part of a ${...} expression (i.e. up to but not
1562  * including the :[-+?=#%] or close-brace).
1563  */
1564 static char *
get_brace_var(XString * wsp,char * wp)1565 get_brace_var(XString *wsp, char *wp)
1566 {
1567 	char c;
1568 	enum parse_state {
1569 		PS_INITIAL, PS_SAW_PERCENT, PS_SAW_HASH, PS_SAW_BANG,
1570 		PS_IDENT, PS_NUMBER, PS_VAR1
1571 	} state = PS_INITIAL;
1572 
1573 	while (/* CONSTCOND */ 1) {
1574 		c = getsc();
1575 		/* State machine to figure out where the variable part ends. */
1576 		switch (state) {
1577 		case PS_SAW_HASH:
1578 			if (ctype(c, C_VAR1)) {
1579 				char c2;
1580 
1581 				c2 = getsc();
1582 				ungetsc(c2);
1583 				if (ord(c2) != ORD(/*{*/ '}')) {
1584 					ungetsc(c);
1585 					goto out;
1586 				}
1587 			}
1588 			goto ps_common;
1589 		case PS_SAW_BANG:
1590 			switch (ord(c)) {
1591 			case ORD('@'):
1592 			case ORD('#'):
1593 			case ORD('-'):
1594 			case ORD('?'):
1595 				goto out;
1596 			}
1597 			goto ps_common;
1598 		case PS_INITIAL:
1599 			switch (ord(c)) {
1600 			case ORD('%'):
1601 				state = PS_SAW_PERCENT;
1602 				goto next;
1603 			case ORD('#'):
1604 				state = PS_SAW_HASH;
1605 				goto next;
1606 			case ORD('!'):
1607 				state = PS_SAW_BANG;
1608 				goto next;
1609 			}
1610 			/* FALLTHROUGH */
1611 		case PS_SAW_PERCENT:
1612  ps_common:
1613 			if (ctype(c, C_ALPHX))
1614 				state = PS_IDENT;
1615 			else if (ctype(c, C_DIGIT))
1616 				state = PS_NUMBER;
1617 			else if (ctype(c, C_VAR1))
1618 				state = PS_VAR1;
1619 			else
1620 				goto out;
1621 			break;
1622 		case PS_IDENT:
1623 			if (!ctype(c, C_ALNUX)) {
1624 				if (ord(c) == ORD('[')) {
1625 					char *tmp, *p;
1626 
1627 					if (!arraysub(&tmp))
1628 						yyerror("missing ]");
1629 					*wp++ = c;
1630 					p = tmp;
1631 					while (*p) {
1632 						Xcheck(*wsp, wp);
1633 						*wp++ = *p++;
1634 					}
1635 					afree(tmp, ATEMP);
1636 					/* the ] */
1637 					c = getsc();
1638 				}
1639 				goto out;
1640 			}
1641  next:
1642 			break;
1643 		case PS_NUMBER:
1644 			if (!ctype(c, C_DIGIT))
1645 				goto out;
1646 			break;
1647 		case PS_VAR1:
1648 			goto out;
1649 		}
1650 		Xcheck(*wsp, wp);
1651 		*wp++ = c;
1652 	}
1653  out:
1654 	/* end of variable part */
1655 	*wp++ = '\0';
1656 	ungetsc(c);
1657 	return (wp);
1658 }
1659 
1660 /*
1661  * Save an array subscript - returns true if matching bracket found, false
1662  * if eof or newline was found.
1663  * (Returned string double null terminated)
1664  */
1665 static bool
arraysub(char ** strp)1666 arraysub(char **strp)
1667 {
1668 	XString ws;
1669 	char *wp, c;
1670 	/* we are just past the initial [ */
1671 	unsigned int depth = 1;
1672 
1673 	Xinit(ws, wp, 32, ATEMP);
1674 
1675 	do {
1676 		c = getsc();
1677 		Xcheck(ws, wp);
1678 		*wp++ = c;
1679 		if (ord(c) == ORD('['))
1680 			depth++;
1681 		else if (ord(c) == ORD(']'))
1682 			depth--;
1683 	} while (depth > 0 && c && c != '\n');
1684 
1685 	*wp++ = '\0';
1686 	*strp = Xclose(ws, wp);
1687 
1688 	return (tobool(depth == 0));
1689 }
1690 
1691 /* Unget a char: handles case when we are already at the start of the buffer */
1692 static void
ungetsc(int c)1693 ungetsc(int c)
1694 {
1695 	struct sretrace_info *rp = retrace_info;
1696 
1697 	if (backslash_skip)
1698 		backslash_skip--;
1699 	/* Don't unget EOF... */
1700 	if (source->str == null && c == '\0')
1701 		return;
1702 	while (rp) {
1703 		if (Xlength(rp->xs, rp->xp))
1704 			rp->xp--;
1705 		rp = rp->next;
1706 	}
1707 	ungetsc_i(c);
1708 }
1709 static void
ungetsc_i(int c)1710 ungetsc_i(int c)
1711 {
1712 	if (source->str > source->start)
1713 		source->str--;
1714 	else {
1715 		Source *s;
1716 
1717 		s = pushs(SREREAD, source->areap);
1718 		s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1719 		s->start = s->str = s->ugbuf;
1720 		s->next = source;
1721 		source = s;
1722 	}
1723 }
1724 
1725 
1726 /* Called to get a char that isn't a \newline sequence. */
1727 static int
getsc_bn(void)1728 getsc_bn(void)
1729 {
1730 	int c, c2;
1731 
1732 	if (ignore_backslash_newline)
1733 		return (o_getsc_u());
1734 
1735 	if (backslash_skip == 1) {
1736 		backslash_skip = 2;
1737 		return (o_getsc_u());
1738 	}
1739 
1740 	backslash_skip = 0;
1741 
1742 	while (/* CONSTCOND */ 1) {
1743 		c = o_getsc_u();
1744 		if (c == '\\') {
1745 			if ((c2 = o_getsc_u()) == '\n')
1746 				/* ignore the \newline; get the next char... */
1747 				continue;
1748 			ungetsc_i(c2);
1749 			backslash_skip = 1;
1750 		}
1751 		return (c);
1752 	}
1753 }
1754 
1755 void
yyskiputf8bom(void)1756 yyskiputf8bom(void)
1757 {
1758 	int c;
1759 
1760 	if (rtt2asc((c = o_getsc_u())) != 0xEF) {
1761 		ungetsc_i(c);
1762 		return;
1763 	}
1764 	if (rtt2asc((c = o_getsc_u())) != 0xBB) {
1765 		ungetsc_i(c);
1766 		ungetsc_i(asc2rtt(0xEF));
1767 		return;
1768 	}
1769 	if (rtt2asc((c = o_getsc_u())) != 0xBF) {
1770 		ungetsc_i(c);
1771 		ungetsc_i(asc2rtt(0xBB));
1772 		ungetsc_i(asc2rtt(0xEF));
1773 		return;
1774 	}
1775 	UTFMODE |= 8;
1776 }
1777 
1778 static Lex_state *
push_state_i(State_info * si,Lex_state * old_end)1779 push_state_i(State_info *si, Lex_state *old_end)
1780 {
1781 	Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP);
1782 
1783 	news[0].ls_base = old_end;
1784 	si->base = &news[0];
1785 	si->end = &news[STATE_BSIZE];
1786 	return (&news[1]);
1787 }
1788 
1789 static Lex_state *
pop_state_i(State_info * si,Lex_state * old_end)1790 pop_state_i(State_info *si, Lex_state *old_end)
1791 {
1792 	Lex_state *old_base = si->base;
1793 
1794 	si->base = old_end->ls_base - STATE_BSIZE;
1795 	si->end = old_end->ls_base;
1796 
1797 	afree(old_base, ATEMP);
1798 
1799 	return (si->base + STATE_BSIZE - 1);
1800 }
1801