• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*	$OpenBSD: lex.c,v 1.45 2011/03/09 09:30:39 okan Exp $	*/
2 
3 /*-
4  * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
5  *	Thorsten Glaser <tg@mirbsd.org>
6  *
7  * Provided that these terms and disclaimer and all copyright notices
8  * are retained or reproduced in an accompanying document, permission
9  * is granted to deal in this work without restriction, including un-
10  * limited rights to use, publicly perform, distribute, sell, modify,
11  * merge, give away, or sublicence.
12  *
13  * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
14  * the utmost extent permitted by applicable law, neither express nor
15  * implied; without malicious intent or gross negligence. In no event
16  * may a licensor, author or contributor be held liable for indirect,
17  * direct, other damage, loss, or other issues arising in any way out
18  * of dealing in the work, even if advised of the possibility of such
19  * damage or existence of a defect, except proven that it results out
20  * of said person's immediate fault when using the work as intended.
21  */
22 
23 #include "sh.h"
24 
25 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.156 2011/09/07 15:24:16 tg Exp $");
26 
27 /*
28  * states while lexing word
29  */
30 #define SBASE		0	/* outside any lexical constructs */
31 #define SWORD		1	/* implicit quoting for substitute() */
32 #define SLETPAREN	2	/* inside (( )), implicit quoting */
33 #define SSQUOTE		3	/* inside '' */
34 #define SDQUOTE		4	/* inside "" */
35 #define SEQUOTE		5	/* inside $'' */
36 #define SBRACE		6	/* inside ${} */
37 #define SQBRACE		7	/* inside "${}" */
38 #define SBQUOTE		8	/* inside `` */
39 #define SASPAREN	9	/* inside $(( )) */
40 #define SHEREDELIM	10	/* parsing <<,<<- delimiter */
41 #define SHEREDQUOTE	11	/* parsing " in <<,<<- delimiter */
42 #define SPATTERN	12	/* parsing *(...|...) pattern (*+?@!) */
43 #define SADELIM		13	/* like SBASE, looking for delimiter */
44 #define SHERESTRING	14	/* parsing <<< string */
45 #define STBRACEKORN	15	/* parsing ${...[#%]...} !FSH */
46 #define STBRACEBOURNE	16	/* parsing ${...[#%]...} FSH */
47 #define SINVALID	255	/* invalid state */
48 
49 struct sretrace_info {
50 	struct sretrace_info *next;
51 	XString xs;
52 	char *xp;
53 };
54 
55 /*
56  * Structure to keep track of the lexing state and the various pieces of info
57  * needed for each particular state.
58  */
59 typedef struct lex_state {
60 	union {
61 		/* point to the next state block */
62 		struct lex_state *base;
63 		/* marks start of state output in output string */
64 		int start;
65 		/* SBQUOTE: true if in double quotes: "`...`" */
66 		/* SEQUOTE: got NUL, ignore rest of string */
67 		bool abool;
68 		/* SADELIM information */
69 		struct {
70 			/* character to search for */
71 			unsigned char delimiter;
72 			/* max. number of delimiters */
73 			unsigned char num;
74 		} adelim;
75 	} u;
76 	/* count open parentheses */
77 	short nparen;
78 	/* type of this state */
79 	uint8_t type;
80 } Lex_state;
81 #define ls_base		u.base
82 #define ls_start	u.start
83 #define ls_bool		u.abool
84 #define ls_adelim	u.adelim
85 
86 typedef struct {
87 	Lex_state *base;
88 	Lex_state *end;
89 } State_info;
90 
91 static void readhere(struct ioword *);
92 static void ungetsc(int);
93 static void ungetsc_(int);
94 static int getsc_uu(void);
95 static void getsc_line(Source *);
96 static int getsc_bn(void);
97 static int s_get(void);
98 static void s_put(int);
99 static char *get_brace_var(XString *, char *);
100 static bool arraysub(char **);
101 static void gethere(bool);
102 static Lex_state *push_state_(State_info *, Lex_state *);
103 static Lex_state *pop_state_(State_info *, Lex_state *);
104 
105 static int dopprompt(const char *, int, bool);
106 void yyskiputf8bom(void);
107 
108 static int backslash_skip;
109 static int ignore_backslash_newline;
110 static struct sretrace_info *retrace_info;
111 short subshell_nesting_level = 0;
112 
113 /* optimised getsc_bn() */
114 #define o_getsc()	(*source->str != '\0' && *source->str != '\\' && \
115 			    !backslash_skip ? *source->str++ : getsc_bn())
116 /* optimised getsc_uu() */
117 #define	o_getsc_u()	((*source->str != '\0') ? *source->str++ : getsc_uu())
118 
119 /* retrace helper */
120 #define o_getsc_r(carg)	{				\
121 	int cev = (carg);				\
122 	struct sretrace_info *rp = retrace_info;	\
123 							\
124 	while (rp) {					\
125 		Xcheck(rp->xs, rp->xp);			\
126 		*rp->xp++ = cev;			\
127 		rp = rp->next;				\
128 	}						\
129 							\
130 	return (cev);					\
131 }
132 
133 #ifdef MKSH_SMALL
134 static int getsc(void);
135 
136 static int
getsc(void)137 getsc(void)
138 {
139 	o_getsc_r(o_getsc());
140 }
141 #else
142 static int getsc_r(int);
143 
144 static int
getsc_r(int c)145 getsc_r(int c)
146 {
147 	o_getsc_r(c);
148 }
149 
150 #define getsc()		getsc_r(o_getsc())
151 #endif
152 
153 #define STATE_BSIZE	8
154 
155 #define PUSH_STATE(s)	do {					\
156 	if (++statep == state_info.end)				\
157 		statep = push_state_(&state_info, statep);	\
158 	state = statep->type = (s);				\
159 } while (/* CONSTCOND */ 0)
160 
161 #define POP_STATE()	do {					\
162 	if (--statep == state_info.base)			\
163 		statep = pop_state_(&state_info, statep);	\
164 	state = statep->type;					\
165 } while (/* CONSTCOND */ 0)
166 
167 #define PUSH_SRETRACE()	do {					\
168 	struct sretrace_info *ri;				\
169 								\
170 	statep->ls_start = Xsavepos(ws, wp);			\
171 	ri = alloc(sizeof(struct sretrace_info), ATEMP);	\
172 	Xinit(ri->xs, ri->xp, 64, ATEMP);			\
173 	ri->next = retrace_info;				\
174 	retrace_info = ri;					\
175 } while (/* CONSTCOND */ 0)
176 
177 #define POP_SRETRACE()	do {					\
178 	wp = Xrestpos(ws, wp, statep->ls_start);		\
179 	*retrace_info->xp = '\0';				\
180 	sp = Xstring(retrace_info->xs, retrace_info->xp);	\
181 	dp = (void *)retrace_info;				\
182 	retrace_info = retrace_info->next;			\
183 	afree(dp, ATEMP);					\
184 } while (/* CONSTCOND */ 0)
185 
186 /**
187  * Lexical analyser
188  *
189  * tokens are not regular expressions, they are LL(1).
190  * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
191  * hence the state stack. Note "$(...)" are now parsed recursively.
192  */
193 
194 int
yylex(int cf)195 yylex(int cf)
196 {
197 	Lex_state states[STATE_BSIZE], *statep, *s2, *base;
198 	State_info state_info;
199 	int c, c2, state;
200 	size_t cz;
201 	XString ws;		/* expandable output word */
202 	char *wp;		/* output word pointer */
203 	char *sp, *dp;
204 
205  Again:
206 	states[0].type = SINVALID;
207 	states[0].ls_base = NULL;
208 	statep = &states[1];
209 	state_info.base = states;
210 	state_info.end = &state_info.base[STATE_BSIZE];
211 
212 	Xinit(ws, wp, 64, ATEMP);
213 
214 	backslash_skip = 0;
215 	ignore_backslash_newline = 0;
216 
217 	if (cf & ONEWORD)
218 		state = SWORD;
219 	else if (cf & LETEXPR) {
220 		/* enclose arguments in (double) quotes */
221 		*wp++ = OQUOTE;
222 		state = SLETPAREN;
223 		statep->nparen = 0;
224 	} else {
225 		/* normal lexing */
226 		state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
227 		while ((c = getsc()) == ' ' || c == '\t')
228 			;
229 		if (c == '#') {
230 			ignore_backslash_newline++;
231 			while ((c = getsc()) != '\0' && c != '\n')
232 				;
233 			ignore_backslash_newline--;
234 		}
235 		ungetsc(c);
236 	}
237 	if (source->flags & SF_ALIAS) {
238 		/* trailing ' ' in alias definition */
239 		source->flags &= ~SF_ALIAS;
240 		cf |= ALIAS;
241 	}
242 
243 	/* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */
244 	statep->type = state;
245 
246 	/* check for here string */
247 	if (state == SHEREDELIM) {
248 		c = getsc();
249 		if (c == '<') {
250 			state = SHERESTRING;
251 			while ((c = getsc()) == ' ' || c == '\t')
252 				;
253 			ungetsc(c);
254 			c = '<';
255 			goto accept_nonword;
256 		}
257 		ungetsc(c);
258 	}
259 
260 	/* collect non-special or quoted characters to form word */
261 	while (!((c = getsc()) == 0 ||
262 	    ((state == SBASE || state == SHEREDELIM || state == SHERESTRING) &&
263 	    ctype(c, C_LEX1)))) {
264  accept_nonword:
265 		Xcheck(ws, wp);
266 		switch (state) {
267 		case SADELIM:
268 			if (c == '(')
269 				statep->nparen++;
270 			else if (c == ')')
271 				statep->nparen--;
272 			else if (statep->nparen == 0 &&
273 			    (c == /*{*/ '}' || c == statep->ls_adelim.delimiter)) {
274 				*wp++ = ADELIM;
275 				*wp++ = c;
276 				if (c == /*{*/ '}' || --statep->ls_adelim.num == 0)
277 					POP_STATE();
278 				if (c == /*{*/ '}')
279 					POP_STATE();
280 				break;
281 			}
282 			/* FALLTHROUGH */
283 		case SBASE:
284 			if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
285 				/* temporary */
286 				*wp = EOS;
287 				if (is_wdvarname(Xstring(ws, wp), false)) {
288 					char *p, *tmp;
289 
290 					if (arraysub(&tmp)) {
291 						*wp++ = CHAR;
292 						*wp++ = c;
293 						for (p = tmp; *p; ) {
294 							Xcheck(ws, wp);
295 							*wp++ = CHAR;
296 							*wp++ = *p++;
297 						}
298 						afree(tmp, ATEMP);
299 						break;
300 					} else {
301 						Source *s;
302 
303 						s = pushs(SREREAD,
304 						    source->areap);
305 						s->start = s->str =
306 						    s->u.freeme = tmp;
307 						s->next = source;
308 						source = s;
309 					}
310 				}
311 				*wp++ = CHAR;
312 				*wp++ = c;
313 				break;
314 			}
315 			/* FALLTHROUGH */
316  Sbase1:		/* includes *(...|...) pattern (*+?@!) */
317 			if (c == '*' || c == '@' || c == '+' || c == '?' ||
318 			    c == '!') {
319 				c2 = getsc();
320 				if (c2 == '(' /*)*/ ) {
321 					*wp++ = OPAT;
322 					*wp++ = c;
323 					PUSH_STATE(SPATTERN);
324 					break;
325 				}
326 				ungetsc(c2);
327 			}
328 			/* FALLTHROUGH */
329  Sbase2:		/* doesn't include *(...|...) pattern (*+?@!) */
330 			switch (c) {
331 			case '\\':
332  getsc_qchar:
333 				if ((c = getsc())) {
334 					/* trailing \ is lost */
335 					*wp++ = QCHAR;
336 					*wp++ = c;
337 				}
338 				break;
339 			case '\'':
340  open_ssquote:
341 				*wp++ = OQUOTE;
342 				ignore_backslash_newline++;
343 				PUSH_STATE(SSQUOTE);
344 				break;
345 			case '"':
346  open_sdquote:
347 				*wp++ = OQUOTE;
348 				PUSH_STATE(SDQUOTE);
349 				break;
350 			default:
351 				goto Subst;
352 			}
353 			break;
354 
355  Subst:
356 			switch (c) {
357 			case '\\':
358 				c = getsc();
359 				switch (c) {
360 				case '"':
361 					if ((cf & HEREDOC))
362 						goto heredocquote;
363 					/* FALLTHROUGH */
364 				case '\\':
365 				case '$': case '`':
366  store_qchar:
367 					*wp++ = QCHAR;
368 					*wp++ = c;
369 					break;
370 				default:
371  heredocquote:
372 					Xcheck(ws, wp);
373 					if (c) {
374 						/* trailing \ is lost */
375 						*wp++ = CHAR;
376 						*wp++ = '\\';
377 						*wp++ = CHAR;
378 						*wp++ = c;
379 					}
380 					break;
381 				}
382 				break;
383 			case '$':
384  subst_dollar:
385 				c = getsc();
386 				if (c == '(') /*)*/ {
387 					c = getsc();
388 					if (c == '(') /*)*/ {
389 						*wp++ = EXPRSUB;
390 						PUSH_STATE(SASPAREN);
391 						statep->nparen = 2;
392 						PUSH_SRETRACE();
393 						*retrace_info->xp++ = '(';
394 					} else {
395 						ungetsc(c);
396  subst_command:
397 						sp = yyrecursive();
398 						cz = strlen(sp) + 1;
399 						XcheckN(ws, wp, cz);
400 						*wp++ = COMSUB;
401 						memcpy(wp, sp, cz);
402 						wp += cz;
403 					}
404 				} else if (c == '{') /*}*/ {
405 					*wp++ = OSUBST;
406 					*wp++ = '{'; /*}*/
407 					wp = get_brace_var(&ws, wp);
408 					c = getsc();
409 					/* allow :# and :% (ksh88 compat) */
410 					if (c == ':') {
411 						*wp++ = CHAR;
412 						*wp++ = c;
413 						c = getsc();
414 						if (c == ':') {
415 							*wp++ = CHAR;
416 							*wp++ = '0';
417 							*wp++ = ADELIM;
418 							*wp++ = ':';
419 							PUSH_STATE(SBRACE);
420 							PUSH_STATE(SADELIM);
421 							statep->ls_adelim.delimiter = ':';
422 							statep->ls_adelim.num = 1;
423 							statep->nparen = 0;
424 							break;
425 						} else if (ksh_isdigit(c) ||
426 						    c == '('/*)*/ || c == ' ' ||
427 						    /*XXX what else? */
428 						    c == '$') {
429 							/* substring subst. */
430 							if (c != ' ') {
431 								*wp++ = CHAR;
432 								*wp++ = ' ';
433 							}
434 							ungetsc(c);
435 							PUSH_STATE(SBRACE);
436 							PUSH_STATE(SADELIM);
437 							statep->ls_adelim.delimiter = ':';
438 							statep->ls_adelim.num = 2;
439 							statep->nparen = 0;
440 							break;
441 						}
442 					} else if (c == '/') {
443 						*wp++ = CHAR;
444 						*wp++ = c;
445 						if ((c = getsc()) == '/') {
446 							*wp++ = ADELIM;
447 							*wp++ = c;
448 						} else
449 							ungetsc(c);
450 						PUSH_STATE(SBRACE);
451 						PUSH_STATE(SADELIM);
452 						statep->ls_adelim.delimiter = '/';
453 						statep->ls_adelim.num = 1;
454 						statep->nparen = 0;
455 						break;
456 					}
457 					/*
458 					 * If this is a trim operation,
459 					 * treat (,|,) specially in STBRACE.
460 					 */
461 					if (ctype(c, C_SUBOP2)) {
462 						ungetsc(c);
463 						if (Flag(FSH))
464 							PUSH_STATE(STBRACEBOURNE);
465 						else
466 							PUSH_STATE(STBRACEKORN);
467 					} else {
468 						ungetsc(c);
469 						if (state == SDQUOTE)
470 							PUSH_STATE(SQBRACE);
471 						else
472 							PUSH_STATE(SBRACE);
473 					}
474 				} else if (ksh_isalphx(c)) {
475 					*wp++ = OSUBST;
476 					*wp++ = 'X';
477 					do {
478 						Xcheck(ws, wp);
479 						*wp++ = c;
480 						c = getsc();
481 					} while (ksh_isalnux(c));
482 					*wp++ = '\0';
483 					*wp++ = CSUBST;
484 					*wp++ = 'X';
485 					ungetsc(c);
486 				} else if (ctype(c, C_VAR1 | C_DIGIT)) {
487 					Xcheck(ws, wp);
488 					*wp++ = OSUBST;
489 					*wp++ = 'X';
490 					*wp++ = c;
491 					*wp++ = '\0';
492 					*wp++ = CSUBST;
493 					*wp++ = 'X';
494 				} else if (c == '\'' && (state == SBASE)) {
495 					/* XXX which other states are valid? */
496 					*wp++ = OQUOTE;
497 					ignore_backslash_newline++;
498 					PUSH_STATE(SEQUOTE);
499 					statep->ls_bool = false;
500 					break;
501 				} else if (c == '"' && (state == SBASE)) {
502 					/* XXX which other states are valid? */
503 					goto DEQUOTE;
504 				} else {
505 					*wp++ = CHAR;
506 					*wp++ = '$';
507  DEQUOTE:
508 					ungetsc(c);
509 				}
510 				break;
511 			case '`':
512  subst_gravis:
513 				PUSH_STATE(SBQUOTE);
514 				*wp++ = COMSUB;
515 				/*
516 				 * Need to know if we are inside double quotes
517 				 * since sh/AT&T-ksh translate the \" to " in
518 				 * "`...\"...`".
519 				 * This is not done in POSIX mode (section
520 				 * 3.2.3, Double Quotes: "The backquote shall
521 				 * retain its special meaning introducing the
522 				 * other form of command substitution (see
523 				 * 3.6.3). The portion of the quoted string
524 				 * from the initial backquote and the
525 				 * characters up to the next backquote that
526 				 * is not preceded by a backslash (having
527 				 * escape characters removed) defines that
528 				 * command whose output replaces `...` when
529 				 * the word is expanded."
530 				 * Section 3.6.3, Command Substitution:
531 				 * "Within the backquoted style of command
532 				 * substitution, backslash shall retain its
533 				 * literal meaning, except when followed by
534 				 * $ ` \.").
535 				 */
536 				statep->ls_bool = false;
537 				s2 = statep;
538 				base = state_info.base;
539 				while (/* CONSTCOND */ 1) {
540 					for (; s2 != base; s2--) {
541 						if (s2->type == SDQUOTE) {
542 							statep->ls_bool = true;
543 							break;
544 						}
545 					}
546 					if (s2 != base)
547 						break;
548 					if (!(s2 = s2->ls_base))
549 						break;
550 					base = s2-- - STATE_BSIZE;
551 				}
552 				break;
553 			case QCHAR:
554 				if (cf & LQCHAR) {
555 					*wp++ = QCHAR;
556 					*wp++ = getsc();
557 					break;
558 				}
559 				/* FALLTHROUGH */
560 			default:
561  store_char:
562 				*wp++ = CHAR;
563 				*wp++ = c;
564 			}
565 			break;
566 
567 		case SEQUOTE:
568 			if (c == '\'') {
569 				POP_STATE();
570 				*wp++ = CQUOTE;
571 				ignore_backslash_newline--;
572 			} else if (c == '\\') {
573 				if ((c2 = unbksl(true, s_get, s_put)) == -1)
574 					c2 = s_get();
575 				if (c2 == 0)
576 					statep->ls_bool = true;
577 				if (!statep->ls_bool) {
578 					char ts[4];
579 
580 					if ((unsigned int)c2 < 0x100) {
581 						*wp++ = QCHAR;
582 						*wp++ = c2;
583 					} else {
584 						cz = utf_wctomb(ts, c2 - 0x100);
585 						ts[cz] = 0;
586 						for (cz = 0; ts[cz]; ++cz) {
587 							*wp++ = QCHAR;
588 							*wp++ = ts[cz];
589 						}
590 					}
591 				}
592 			} else if (!statep->ls_bool) {
593 				*wp++ = QCHAR;
594 				*wp++ = c;
595 			}
596 			break;
597 
598 		case SSQUOTE:
599 			if (c == '\'') {
600 				POP_STATE();
601 				*wp++ = CQUOTE;
602 				ignore_backslash_newline--;
603 			} else {
604 				*wp++ = QCHAR;
605 				*wp++ = c;
606 			}
607 			break;
608 
609 		case SDQUOTE:
610 			if (c == '"') {
611 				POP_STATE();
612 				*wp++ = CQUOTE;
613 			} else
614 				goto Subst;
615 			break;
616 
617 		/* $(( ... )) */
618 		case SASPAREN:
619 			if (c == '(')
620 				statep->nparen++;
621 			else if (c == ')') {
622 				statep->nparen--;
623 				if (statep->nparen == 1) {
624 					/* end of EXPRSUB */
625 					POP_SRETRACE();
626 					POP_STATE();
627 
628 					if ((c2 = getsc()) == /*(*/ ')') {
629 						cz = strlen(sp) - 2;
630 						XcheckN(ws, wp, cz);
631 						memcpy(wp, sp + 1, cz);
632 						wp += cz;
633 						afree(sp, ATEMP);
634 						*wp++ = '\0';
635 						break;
636 					} else {
637 						Source *s;
638 
639 						ungetsc(c2);
640 						/*
641 						 * mismatched parenthesis -
642 						 * assume we were really
643 						 * parsing a $(...) expression
644 						 */
645 						--wp;
646 						s = pushs(SREREAD,
647 						    source->areap);
648 						s->start = s->str =
649 						    s->u.freeme = sp;
650 						s->next = source;
651 						source = s;
652 						goto subst_command;
653 					}
654 				}
655 			}
656 			/* reuse existing state machine */
657 			goto Sbase2;
658 
659 		case SQBRACE:
660 			if (c == '\\') {
661 				/*
662 				 * perform POSIX "quote removal" if the back-
663 				 * slash is "special", i.e. same cases as the
664 				 * {case '\\':} in Subst: plus closing brace;
665 				 * in mksh code "quote removal" on '\c' means
666 				 * write QCHAR+c, otherwise CHAR+\+CHAR+c are
667 				 * emitted (in heredocquote:)
668 				 */
669 				if ((c = getsc()) == '"' || c == '\\' ||
670 				    c == '$' || c == '`' || c == /*{*/'}')
671 					goto store_qchar;
672 				goto heredocquote;
673 			}
674 			goto common_SQBRACE;
675 
676 		case SBRACE:
677 			if (c == '\'')
678 				goto open_ssquote;
679 			else if (c == '\\')
680 				goto getsc_qchar;
681  common_SQBRACE:
682 			if (c == '"')
683 				goto open_sdquote;
684 			else if (c == '$')
685 				goto subst_dollar;
686 			else if (c == '`')
687 				goto subst_gravis;
688 			else if (c != /*{*/ '}')
689 				goto store_char;
690 			POP_STATE();
691 			*wp++ = CSUBST;
692 			*wp++ = /*{*/ '}';
693 			break;
694 
695 		/* Same as SBASE, except (,|,) treated specially */
696 		case STBRACEKORN:
697 			if (c == '|')
698 				*wp++ = SPAT;
699 			else if (c == '(') {
700 				*wp++ = OPAT;
701 				/* simile for @ */
702 				*wp++ = ' ';
703 				PUSH_STATE(SPATTERN);
704 			} else /* FALLTHROUGH */
705 		case STBRACEBOURNE:
706 			  if (c == /*{*/ '}') {
707 				POP_STATE();
708 				*wp++ = CSUBST;
709 				*wp++ = /*{*/ '}';
710 			} else
711 				goto Sbase1;
712 			break;
713 
714 		case SBQUOTE:
715 			if (c == '`') {
716 				*wp++ = 0;
717 				POP_STATE();
718 			} else if (c == '\\') {
719 				switch (c = getsc()) {
720 				case 0:
721 					/* trailing \ is lost */
722 					break;
723 				case '\\':
724 				case '$': case '`':
725 					*wp++ = c;
726 					break;
727 				case '"':
728 					if (statep->ls_bool) {
729 						*wp++ = c;
730 						break;
731 					}
732 					/* FALLTHROUGH */
733 				default:
734 					*wp++ = '\\';
735 					*wp++ = c;
736 					break;
737 				}
738 			} else
739 				*wp++ = c;
740 			break;
741 
742 		/* ONEWORD */
743 		case SWORD:
744 			goto Subst;
745 
746 		/* LETEXPR: (( ... )) */
747 		case SLETPAREN:
748 			if (c == /*(*/ ')') {
749 				if (statep->nparen > 0)
750 					--statep->nparen;
751 				else if ((c2 = getsc()) == /*(*/ ')') {
752 					c = 0;
753 					*wp++ = CQUOTE;
754 					goto Done;
755 				} else {
756 					Source *s;
757 
758 					ungetsc(c2);
759 					/*
760 					 * mismatched parenthesis -
761 					 * assume we were really
762 					 * parsing a (...) expression
763 					 */
764 					*wp = EOS;
765 					sp = Xstring(ws, wp);
766 					dp = wdstrip(sp, WDS_KEEPQ);
767 					s = pushs(SREREAD, source->areap);
768 					s->start = s->str = s->u.freeme = dp;
769 					s->next = source;
770 					source = s;
771 					return ('('/*)*/);
772 				}
773 			} else if (c == '(')
774 				/*
775 				 * parentheses inside quotes and
776 				 * backslashes are lost, but AT&T ksh
777 				 * doesn't count them either
778 				 */
779 				++statep->nparen;
780 			goto Sbase2;
781 
782 		/* <<< delimiter */
783 		case SHERESTRING:
784 			if (c == '\\') {
785 				c = getsc();
786 				if (c) {
787 					/* trailing \ is lost */
788 					*wp++ = QCHAR;
789 					*wp++ = c;
790 				}
791 			} else if (c == '$') {
792 				if ((c2 = getsc()) == '\'') {
793 					PUSH_STATE(SEQUOTE);
794 					statep->ls_bool = false;
795 					goto sherestring_quoted;
796 				} else if (c2 == '"')
797 					goto sherestring_dquoted;
798 				ungetsc(c2);
799 				goto sherestring_regular;
800 			} else if (c == '\'') {
801 				PUSH_STATE(SSQUOTE);
802  sherestring_quoted:
803 				*wp++ = OQUOTE;
804 				ignore_backslash_newline++;
805 			} else if (c == '"') {
806  sherestring_dquoted:
807 				state = statep->type = SHEREDQUOTE;
808 				*wp++ = OQUOTE;
809 				/* just don't IFS split; no quoting mode */
810 			} else {
811  sherestring_regular:
812 				*wp++ = CHAR;
813 				*wp++ = c;
814 			}
815 			break;
816 
817 		/* <<,<<- delimiter */
818 		case SHEREDELIM:
819 			/*
820 			 * XXX chuck this state (and the next) - use
821 			 * the existing states ($ and \`...` should be
822 			 * stripped of their specialness after the
823 			 * fact).
824 			 */
825 			/*
826 			 * here delimiters need a special case since
827 			 * $ and `...` are not to be treated specially
828 			 */
829 			if (c == '\\') {
830 				c = getsc();
831 				if (c) {
832 					/* trailing \ is lost */
833 					*wp++ = QCHAR;
834 					*wp++ = c;
835 				}
836 			} else if (c == '$') {
837 				if ((c2 = getsc()) == '\'') {
838 					PUSH_STATE(SEQUOTE);
839 					statep->ls_bool = false;
840 					goto sheredelim_quoted;
841 				} else if (c2 == '"')
842 					goto sheredelim_dquoted;
843 				ungetsc(c2);
844 				goto sheredelim_regular;
845 			} else if (c == '\'') {
846 				PUSH_STATE(SSQUOTE);
847  sheredelim_quoted:
848 				*wp++ = OQUOTE;
849 				ignore_backslash_newline++;
850 			} else if (c == '"') {
851  sheredelim_dquoted:
852 				state = statep->type = SHEREDQUOTE;
853 				*wp++ = OQUOTE;
854 			} else {
855  sheredelim_regular:
856 				*wp++ = CHAR;
857 				*wp++ = c;
858 			}
859 			break;
860 
861 		/* " in <<,<<- delimiter */
862 		case SHEREDQUOTE:
863 			if (c == '"') {
864 				*wp++ = CQUOTE;
865 				state = statep->type =
866 				    /* dp[1] == '<' means here string */
867 				    Xstring(ws, wp)[1] == '<' ?
868 				    SHERESTRING : SHEREDELIM;
869 			} else {
870 				if (c == '\\') {
871 					switch (c = getsc()) {
872 					case 0:
873 						/* trailing \ is lost */
874 					case '\\':
875 					case '"':
876 					case '$':
877 					case '`':
878 						break;
879 					default:
880 						*wp++ = CHAR;
881 						*wp++ = '\\';
882 						break;
883 					}
884 				}
885 				*wp++ = CHAR;
886 				*wp++ = c;
887 			}
888 			break;
889 
890 		/* in *(...|...) pattern (*+?@!) */
891 		case SPATTERN:
892 			if (c == /*(*/ ')') {
893 				*wp++ = CPAT;
894 				POP_STATE();
895 			} else if (c == '|') {
896 				*wp++ = SPAT;
897 			} else if (c == '(') {
898 				*wp++ = OPAT;
899 				/* simile for @ */
900 				*wp++ = ' ';
901 				PUSH_STATE(SPATTERN);
902 			} else
903 				goto Sbase1;
904 			break;
905 		}
906 	}
907  Done:
908 	Xcheck(ws, wp);
909 	if (statep != &states[1])
910 		/* XXX figure out what is missing */
911 		yyerror("no closing quote\n");
912 
913 	/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
914 	if (state == SHEREDELIM || state == SHERESTRING)
915 		state = SBASE;
916 
917 	dp = Xstring(ws, wp);
918 	if ((c == '<' || c == '>' || c == '&') && state == SBASE) {
919 		struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
920 
921 		if (Xlength(ws, wp) == 0)
922 			iop->unit = c == '<' ? 0 : 1;
923 		else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) {
924 			if (dp[c2] != CHAR)
925 				goto no_iop;
926 			if (!ksh_isdigit(dp[c2 + 1]))
927 				goto no_iop;
928 			iop->unit = (iop->unit * 10) + dp[c2 + 1] - '0';
929 		}
930 
931 		if (iop->unit >= FDBASE)
932 			goto no_iop;
933 
934 		if (c == '&') {
935 			if ((c2 = getsc()) != '>') {
936 				ungetsc(c2);
937 				goto no_iop;
938 			}
939 			c = c2;
940 			iop->flag = IOBASH;
941 		} else
942 			iop->flag = 0;
943 
944 		c2 = getsc();
945 		/* <<, >>, <> are ok, >< is not */
946 		if (c == c2 || (c == '<' && c2 == '>')) {
947 			iop->flag |= c == c2 ?
948 			    (c == '>' ? IOCAT : IOHERE) : IORDWR;
949 			if (iop->flag == IOHERE) {
950 				if ((c2 = getsc()) == '-') {
951 					iop->flag |= IOSKIP;
952 					c2 = getsc();
953 				} else if (c2 == '<')
954 					iop->flag |= IOHERESTR;
955 				ungetsc(c2);
956 				if (c2 == '\n')
957 					iop->flag |= IONDELIM;
958 			}
959 		} else if (c2 == '&')
960 			iop->flag |= IODUP | (c == '<' ? IORDUP : 0);
961 		else {
962 			iop->flag |= c == '>' ? IOWRITE : IOREAD;
963 			if (c == '>' && c2 == '|')
964 				iop->flag |= IOCLOB;
965 			else
966 				ungetsc(c2);
967 		}
968 
969 		iop->name = NULL;
970 		iop->delim = NULL;
971 		iop->heredoc = NULL;
972 		/* free word */
973 		Xfree(ws, wp);
974 		yylval.iop = iop;
975 		return (REDIR);
976  no_iop:
977 		afree(iop, ATEMP);
978 	}
979 
980 	if (wp == dp && state == SBASE) {
981 		/* free word */
982 		Xfree(ws, wp);
983 		/* no word, process LEX1 character */
984 		if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) {
985 			if ((c2 = getsc()) == c)
986 				c = (c == ';') ? BREAK :
987 				    (c == '|') ? LOGOR :
988 				    (c == '&') ? LOGAND :
989 				    /* c == '(' ) */ MDPAREN;
990 			else if (c == '|' && c2 == '&')
991 				c = COPROC;
992 			else if (c == ';' && c2 == '|')
993 				c = BRKEV;
994 			else if (c == ';' && c2 == '&')
995 				c = BRKFT;
996 			else
997 				ungetsc(c2);
998 #ifndef MKSH_SMALL
999 			if (c == BREAK) {
1000 				if ((c2 = getsc()) == '&')
1001 					c = BRKEV;
1002 				else
1003 					ungetsc(c2);
1004 			}
1005 #endif
1006 		} else if (c == '\n') {
1007 			gethere(false);
1008 			if (cf & CONTIN)
1009 				goto Again;
1010 		} else if (c == '\0')
1011 			/* need here strings at EOF */
1012 			gethere(true);
1013 		return (c);
1014 	}
1015 
1016 	/* terminate word */
1017 	*wp++ = EOS;
1018 	yylval.cp = Xclose(ws, wp);
1019 	if (state == SWORD || state == SLETPAREN
1020 	    /* XXX ONEWORD? */)
1021 		return (LWORD);
1022 
1023 	/* unget terminator */
1024 	ungetsc(c);
1025 
1026 	/*
1027 	 * note: the alias-vs-function code below depends on several
1028 	 * interna: starting from here, source->str is not modified;
1029 	 * the way getsc() and ungetsc() operate; etc.
1030 	 */
1031 
1032 	/* copy word to unprefixed string ident */
1033 	sp = yylval.cp;
1034 	dp = ident;
1035 	if ((cf & HEREDELIM) && (sp[1] == '<'))
1036 		while (dp < ident+IDENT) {
1037 			if ((c = *sp++) == CHAR)
1038 				*dp++ = *sp++;
1039 			else if ((c != OQUOTE) && (c != CQUOTE))
1040 				break;
1041 		}
1042 	else
1043 		while (dp < ident+IDENT && (c = *sp++) == CHAR)
1044 			*dp++ = *sp++;
1045 	/* Make sure the ident array stays '\0' padded */
1046 	memset(dp, 0, (ident+IDENT) - dp + 1);
1047 	if (c != EOS)
1048 		/* word is not unquoted */
1049 		*ident = '\0';
1050 
1051 	if (*ident != '\0' && (cf & (KEYWORD | ALIAS))) {
1052 		struct tbl *p;
1053 		uint32_t h = hash(ident);
1054 
1055 		if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
1056 		    (!(cf & ESACONLY) || p->val.i == ESAC ||
1057 		    p->val.i == /*{*/ '}')) {
1058 			afree(yylval.cp, ATEMP);
1059 			return (p->val.i);
1060 		}
1061 		if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
1062 		    (p->flag & ISSET)) {
1063 			/*
1064 			 * this still points to the same character as the
1065 			 * ungetsc'd terminator from above
1066 			 */
1067 			const char *cp = source->str;
1068 
1069 			/* prefer POSIX but not Korn functions over aliases */
1070 			while (*cp == ' ' || *cp == '\t')
1071 				/*
1072 				 * this is like getsc() without skipping
1073 				 * over Source boundaries (including not
1074 				 * parsing ungetsc'd characters that got
1075 				 * pushed into an SREREAD) which is what
1076 				 * we want here anyway: find out whether
1077 				 * the alias name is followed by a POSIX
1078 				 * function definition (only the opening
1079 				 * parenthesis is checked though)
1080 				 */
1081 				++cp;
1082 			/* prefer functions over aliases */
1083 			if (cp[0] != '(' || cp[1] != ')') {
1084 				Source *s = source;
1085 
1086 				while (s && (s->flags & SF_HASALIAS))
1087 					if (s->u.tblp == p)
1088 						return (LWORD);
1089 					else
1090 						s = s->next;
1091 				/* push alias expansion */
1092 				s = pushs(SALIAS, source->areap);
1093 				s->start = s->str = p->val.s;
1094 				s->u.tblp = p;
1095 				s->flags |= SF_HASALIAS;
1096 				s->next = source;
1097 				if (source->type == SEOF) {
1098 					/* prevent infinite recursion at EOS */
1099 					source->u.tblp = p;
1100 					source->flags |= SF_HASALIAS;
1101 				}
1102 				source = s;
1103 				afree(yylval.cp, ATEMP);
1104 				goto Again;
1105 			}
1106 		}
1107 	}
1108 
1109 	return (LWORD);
1110 }
1111 
1112 static void
gethere(bool iseof)1113 gethere(bool iseof)
1114 {
1115 	struct ioword **p;
1116 
1117 	for (p = heres; p < herep; p++)
1118 		if (iseof && !((*p)->flag & IOHERESTR))
1119 			/* only here strings at EOF */
1120 			return;
1121 		else
1122 			readhere(*p);
1123 	herep = heres;
1124 }
1125 
1126 /*
1127  * read "<<word" text into temp file
1128  */
1129 
1130 static void
readhere(struct ioword * iop)1131 readhere(struct ioword *iop)
1132 {
1133 	int c;
1134 	const char *eof, *eofp;
1135 	XString xs;
1136 	char *xp;
1137 	int xpos;
1138 
1139 	if (iop->flag & IOHERESTR) {
1140 		/* process the here string */
1141 		iop->heredoc = xp = evalstr(iop->delim, DOBLANK);
1142 		xpos = strlen(xp) - 1;
1143 		memmove(xp, xp + 1, xpos);
1144 		xp[xpos] = '\n';
1145 		return;
1146 	}
1147 
1148 	eof = iop->flag & IONDELIM ? "<<" : evalstr(iop->delim, 0);
1149 
1150 	if (!(iop->flag & IOEVAL))
1151 		ignore_backslash_newline++;
1152 
1153 	Xinit(xs, xp, 256, ATEMP);
1154 
1155  heredoc_read_line:
1156 	/* beginning of line */
1157 	eofp = eof;
1158 	xpos = Xsavepos(xs, xp);
1159 	if (iop->flag & IOSKIP) {
1160 		/* skip over leading tabs */
1161 		while ((c = getsc()) == '\t')
1162 			/* nothing */;
1163 		goto heredoc_parse_char;
1164 	}
1165  heredoc_read_char:
1166 	c = getsc();
1167  heredoc_parse_char:
1168 	/* compare with here document marker */
1169 	if (!*eofp) {
1170 		/* end of here document marker, what to do? */
1171 		switch (c) {
1172 		case /*(*/ ')':
1173 			if (!subshell_nesting_level)
1174 				/*-
1175 				 * not allowed outside $(...) or (...)
1176 				 * => mismatch
1177 				 */
1178 				break;
1179 			/* allow $(...) or (...) to close here */
1180 			ungetsc(/*(*/ ')');
1181 			/* FALLTHROUGH */
1182 		case 0:
1183 			/*
1184 			 * Allow EOF here to commands without trailing
1185 			 * newlines (mksh -c '...') will work as well.
1186 			 */
1187 		case '\n':
1188 			/* Newline terminates here document marker */
1189 			goto heredoc_found_terminator;
1190 		}
1191 	} else if (c == *eofp++)
1192 		/* store; then read and compare next character */
1193 		goto heredoc_store_and_loop;
1194 	/* nope, mismatch; read until end of line */
1195 	while (c != '\n') {
1196 		if (!c)
1197 			/* oops, reached EOF */
1198 			yyerror("%s '%s' unclosed\n", "here document", eof);
1199 		/* store character */
1200 		Xcheck(xs, xp);
1201 		Xput(xs, xp, c);
1202 		/* read next character */
1203 		c = getsc();
1204 	}
1205 	/* we read a newline as last character */
1206  heredoc_store_and_loop:
1207 	/* store character */
1208 	Xcheck(xs, xp);
1209 	Xput(xs, xp, c);
1210 	if (c == '\n')
1211 		goto heredoc_read_line;
1212 	goto heredoc_read_char;
1213 
1214  heredoc_found_terminator:
1215 	/* jump back to saved beginning of line */
1216 	xp = Xrestpos(xs, xp, xpos);
1217 	/* terminate, close and store */
1218 	Xput(xs, xp, '\0');
1219 	iop->heredoc = Xclose(xs, xp);
1220 
1221 	if (!(iop->flag & IOEVAL))
1222 		ignore_backslash_newline--;
1223 }
1224 
1225 void
yyerror(const char * fmt,...)1226 yyerror(const char *fmt, ...)
1227 {
1228 	va_list va;
1229 
1230 	/* pop aliases and re-reads */
1231 	while (source->type == SALIAS || source->type == SREREAD)
1232 		source = source->next;
1233 	/* zap pending input */
1234 	source->str = null;
1235 
1236 	error_prefix(true);
1237 	va_start(va, fmt);
1238 	shf_vfprintf(shl_out, fmt, va);
1239 	va_end(va);
1240 	errorfz();
1241 }
1242 
1243 /*
1244  * input for yylex with alias expansion
1245  */
1246 
1247 Source *
pushs(int type,Area * areap)1248 pushs(int type, Area *areap)
1249 {
1250 	Source *s;
1251 
1252 	s = alloc(sizeof(Source), areap);
1253 	memset(s, 0, sizeof(Source));
1254 	s->type = type;
1255 	s->str = null;
1256 	s->areap = areap;
1257 	if (type == SFILE || type == SSTDIN)
1258 		XinitN(s->xs, 256, s->areap);
1259 	return (s);
1260 }
1261 
1262 static int
getsc_uu(void)1263 getsc_uu(void)
1264 {
1265 	Source *s = source;
1266 	int c;
1267 
1268 	while ((c = *s->str++) == 0) {
1269 		/* return 0 for EOF by default */
1270 		s->str = NULL;
1271 		switch (s->type) {
1272 		case SEOF:
1273 			s->str = null;
1274 			return (0);
1275 
1276 		case SSTDIN:
1277 		case SFILE:
1278 			getsc_line(s);
1279 			break;
1280 
1281 		case SWSTR:
1282 			break;
1283 
1284 		case SSTRING:
1285 			break;
1286 
1287 		case SWORDS:
1288 			s->start = s->str = *s->u.strv++;
1289 			s->type = SWORDSEP;
1290 			break;
1291 
1292 		case SWORDSEP:
1293 			if (*s->u.strv == NULL) {
1294 				s->start = s->str = "\n";
1295 				s->type = SEOF;
1296 			} else {
1297 				s->start = s->str = " ";
1298 				s->type = SWORDS;
1299 			}
1300 			break;
1301 
1302 		case SALIAS:
1303 			if (s->flags & SF_ALIASEND) {
1304 				/* pass on an unused SF_ALIAS flag */
1305 				source = s->next;
1306 				source->flags |= s->flags & SF_ALIAS;
1307 				s = source;
1308 			} else if (*s->u.tblp->val.s &&
1309 			    (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) {
1310 				/* pop source stack */
1311 				source = s = s->next;
1312 				/*
1313 				 * Note that this alias ended with a
1314 				 * space, enabling alias expansion on
1315 				 * the following word.
1316 				 */
1317 				s->flags |= SF_ALIAS;
1318 			} else {
1319 				/*
1320 				 * At this point, we need to keep the current
1321 				 * alias in the source list so recursive
1322 				 * aliases can be detected and we also need to
1323 				 * return the next character. Do this by
1324 				 * temporarily popping the alias to get the
1325 				 * next character and then put it back in the
1326 				 * source list with the SF_ALIASEND flag set.
1327 				 */
1328 				/* pop source stack */
1329 				source = s->next;
1330 				source->flags |= s->flags & SF_ALIAS;
1331 				c = getsc_uu();
1332 				if (c) {
1333 					s->flags |= SF_ALIASEND;
1334 					s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1335 					s->start = s->str = s->ugbuf;
1336 					s->next = source;
1337 					source = s;
1338 				} else {
1339 					s = source;
1340 					/* avoid reading EOF twice */
1341 					s->str = NULL;
1342 					break;
1343 				}
1344 			}
1345 			continue;
1346 
1347 		case SREREAD:
1348 			if (s->start != s->ugbuf)
1349 				/* yuck */
1350 				afree(s->u.freeme, ATEMP);
1351 			source = s = s->next;
1352 			continue;
1353 		}
1354 		if (s->str == NULL) {
1355 			s->type = SEOF;
1356 			s->start = s->str = null;
1357 			return ('\0');
1358 		}
1359 		if (s->flags & SF_ECHO) {
1360 			shf_puts(s->str, shl_out);
1361 			shf_flush(shl_out);
1362 		}
1363 	}
1364 	return (c);
1365 }
1366 
1367 static void
getsc_line(Source * s)1368 getsc_line(Source *s)
1369 {
1370 	char *xp = Xstring(s->xs, xp), *cp;
1371 	bool interactive = Flag(FTALKING) && s->type == SSTDIN;
1372 	int have_tty = interactive && (s->flags & SF_TTY);
1373 
1374 	/* Done here to ensure nothing odd happens when a timeout occurs */
1375 	XcheckN(s->xs, xp, LINE);
1376 	*xp = '\0';
1377 	s->start = s->str = xp;
1378 
1379 	if (have_tty && ksh_tmout) {
1380 		ksh_tmout_state = TMOUT_READING;
1381 		alarm(ksh_tmout);
1382 	}
1383 	if (interactive)
1384 		change_winsz();
1385 	if (have_tty && (
1386 #if !MKSH_S_NOVI
1387 	    Flag(FVI) ||
1388 #endif
1389 	    Flag(FEMACS) || Flag(FGMACS))) {
1390 		int nread;
1391 
1392 		nread = x_read(xp, LINE);
1393 		if (nread < 0)
1394 			/* read error */
1395 			nread = 0;
1396 		xp[nread] = '\0';
1397 		xp += nread;
1398 	} else {
1399 		if (interactive)
1400 			pprompt(prompt, 0);
1401 		else
1402 			s->line++;
1403 
1404 		while (/* CONSTCOND */ 1) {
1405 			char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1406 
1407 			if (!p && shf_error(s->u.shf) &&
1408 			    shf_errno(s->u.shf) == EINTR) {
1409 				shf_clearerr(s->u.shf);
1410 				if (trap)
1411 					runtraps(0);
1412 				continue;
1413 			}
1414 			if (!p || (xp = p, xp[-1] == '\n'))
1415 				break;
1416 			/* double buffer size */
1417 			/* move past NUL so doubling works... */
1418 			xp++;
1419 			XcheckN(s->xs, xp, Xlength(s->xs, xp));
1420 			/* ...and move back again */
1421 			xp--;
1422 		}
1423 		/*
1424 		 * flush any unwanted input so other programs/builtins
1425 		 * can read it. Not very optimal, but less error prone
1426 		 * than flushing else where, dealing with redirections,
1427 		 * etc.
1428 		 * TODO: reduce size of shf buffer (~128?) if SSTDIN
1429 		 */
1430 		if (s->type == SSTDIN)
1431 			shf_flush(s->u.shf);
1432 	}
1433 	/*
1434 	 * XXX: temporary kludge to restore source after a
1435 	 * trap may have been executed.
1436 	 */
1437 	source = s;
1438 	if (have_tty && ksh_tmout) {
1439 		ksh_tmout_state = TMOUT_EXECUTING;
1440 		alarm(0);
1441 	}
1442 	cp = Xstring(s->xs, xp);
1443 #ifndef MKSH_SMALL
1444 	if (interactive && *cp == '!' && cur_prompt == PS1) {
1445 		int linelen;
1446 
1447 		linelen = Xlength(s->xs, xp);
1448 		XcheckN(s->xs, xp, Zfc_e_dash + /* NUL */ 1);
1449 		/* reload after potential realloc */
1450 		cp = Xstring(s->xs, xp);
1451 		/* change initial '!' into space */
1452 		*cp = ' ';
1453 		/* NUL terminate the current string */
1454 		*xp = '\0';
1455 		/* move the actual string forward */
1456 		memmove(cp + Zfc_e_dash, cp, linelen + /* NUL */ 1);
1457 		xp += Zfc_e_dash;
1458 		/* prepend it with "fc -e -" */
1459 		memcpy(cp, Tfc_e_dash, Zfc_e_dash);
1460 	}
1461 #endif
1462 	s->start = s->str = cp;
1463 	strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1464 	/* Note: if input is all nulls, this is not eof */
1465 	if (Xlength(s->xs, xp) == 0) {
1466 		/* EOF */
1467 		if (s->type == SFILE)
1468 			shf_fdclose(s->u.shf);
1469 		s->str = NULL;
1470 	} else if (interactive && *s->str &&
1471 	    (cur_prompt != PS1 || !ctype(*s->str, C_IFS | C_IFSWS))) {
1472 		histsave(&s->line, s->str, true, true);
1473 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
1474 	} else if (interactive && cur_prompt == PS1) {
1475 		cp = Xstring(s->xs, xp);
1476 		while (*cp && ctype(*cp, C_IFSWS))
1477 			++cp;
1478 		if (!*cp)
1479 			histsync();
1480 #endif
1481 	}
1482 	if (interactive)
1483 		set_prompt(PS2, NULL);
1484 }
1485 
1486 void
set_prompt(int to,Source * s)1487 set_prompt(int to, Source *s)
1488 {
1489 	cur_prompt = to;
1490 
1491 	switch (to) {
1492 	/* command */
1493 	case PS1:
1494 		/*
1495 		 * Substitute ! and !! here, before substitutions are done
1496 		 * so ! in expanded variables are not expanded.
1497 		 * NOTE: this is not what AT&T ksh does (it does it after
1498 		 * substitutions, POSIX doesn't say which is to be done.
1499 		 */
1500 		{
1501 			struct shf *shf;
1502 			char * volatile ps1;
1503 			Area *saved_atemp;
1504 
1505 			ps1 = str_val(global("PS1"));
1506 			shf = shf_sopen(NULL, strlen(ps1) * 2,
1507 			    SHF_WR | SHF_DYNAMIC, NULL);
1508 			while (*ps1)
1509 				if (*ps1 != '!' || *++ps1 == '!')
1510 					shf_putchar(*ps1++, shf);
1511 				else
1512 					shf_fprintf(shf, "%d",
1513 						s ? s->line + 1 : 0);
1514 			ps1 = shf_sclose(shf);
1515 			saved_atemp = ATEMP;
1516 			newenv(E_ERRH);
1517 			if (sigsetjmp(e->jbuf, 0)) {
1518 				prompt = safe_prompt;
1519 				/*
1520 				 * Don't print an error - assume it has already
1521 				 * been printed. Reason is we may have forked
1522 				 * to run a command and the child may be
1523 				 * unwinding its stack through this code as it
1524 				 * exits.
1525 				 */
1526 			} else {
1527 				char *cp = substitute(ps1, 0);
1528 				strdupx(prompt, cp, saved_atemp);
1529 			}
1530 			quitenv(NULL);
1531 		}
1532 		break;
1533 	/* command continuation */
1534 	case PS2:
1535 		prompt = str_val(global("PS2"));
1536 		break;
1537 	}
1538 }
1539 
1540 static int
dopprompt(const char * cp,int ntruncate,bool doprint)1541 dopprompt(const char *cp, int ntruncate, bool doprint)
1542 {
1543 	int columns = 0, lines = 0, indelimit = 0;
1544 	char delimiter = 0;
1545 
1546 	/*
1547 	 * Undocumented AT&T ksh feature:
1548 	 * If the second char in the prompt string is \r then the first
1549 	 * char is taken to be a non-printing delimiter and any chars
1550 	 * between two instances of the delimiter are not considered to
1551 	 * be part of the prompt length
1552 	 */
1553 	if (*cp && cp[1] == '\r') {
1554 		delimiter = *cp;
1555 		cp += 2;
1556 	}
1557 	for (; *cp; cp++) {
1558 		if (indelimit && *cp != delimiter)
1559 			;
1560 		else if (*cp == '\n' || *cp == '\r') {
1561 			lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
1562 			columns = 0;
1563 		} else if (*cp == '\t') {
1564 			columns = (columns | 7) + 1;
1565 		} else if (*cp == '\b') {
1566 			if (columns > 0)
1567 				columns--;
1568 		} else if (*cp == delimiter)
1569 			indelimit = !indelimit;
1570 		else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
1571 			const char *cp2;
1572 			columns += utf_widthadj(cp, &cp2);
1573 			if (doprint && (indelimit ||
1574 			    (ntruncate < (x_cols * lines + columns))))
1575 				shf_write(cp, cp2 - cp, shl_out);
1576 			cp = cp2 - /* loop increment */ 1;
1577 			continue;
1578 		} else
1579 			columns++;
1580 		if (doprint && (*cp != delimiter) &&
1581 		    (indelimit || (ntruncate < (x_cols * lines + columns))))
1582 			shf_putc(*cp, shl_out);
1583 	}
1584 	if (doprint)
1585 		shf_flush(shl_out);
1586 	return (x_cols * lines + columns);
1587 }
1588 
1589 
1590 void
pprompt(const char * cp,int ntruncate)1591 pprompt(const char *cp, int ntruncate)
1592 {
1593 	dopprompt(cp, ntruncate, true);
1594 }
1595 
1596 int
promptlen(const char * cp)1597 promptlen(const char *cp)
1598 {
1599 	return (dopprompt(cp, 0, false));
1600 }
1601 
1602 /*
1603  * Read the variable part of a ${...} expression (i.e. up to but not
1604  * including the :[-+?=#%] or close-brace).
1605  */
1606 static char *
get_brace_var(XString * wsp,char * wp)1607 get_brace_var(XString *wsp, char *wp)
1608 {
1609 	char c;
1610 	enum parse_state {
1611 		PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1612 		PS_NUMBER, PS_VAR1
1613 	} state = PS_INITIAL;
1614 
1615 	while (/* CONSTCOND */ 1) {
1616 		c = getsc();
1617 		/* State machine to figure out where the variable part ends. */
1618 		switch (state) {
1619 		case PS_INITIAL:
1620 			if (c == '#' || c == '!' || c == '%') {
1621 				state = PS_SAW_HASH;
1622 				break;
1623 			}
1624 			/* FALLTHROUGH */
1625 		case PS_SAW_HASH:
1626 			if (ksh_isalphx(c))
1627 				state = PS_IDENT;
1628 			else if (ksh_isdigit(c))
1629 				state = PS_NUMBER;
1630 			else if (c == '#') {
1631 				if (state == PS_SAW_HASH) {
1632 					char c2;
1633 
1634 					c2 = getsc();
1635 					ungetsc(c2);
1636 					if (c2 != '}') {
1637 						ungetsc(c);
1638 						goto out;
1639 					}
1640 				}
1641 				state = PS_VAR1;
1642 			} else if (ctype(c, C_VAR1))
1643 				state = PS_VAR1;
1644 			else
1645 				goto out;
1646 			break;
1647 		case PS_IDENT:
1648 			if (!ksh_isalnux(c)) {
1649 				if (c == '[') {
1650 					char *tmp, *p;
1651 
1652 					if (!arraysub(&tmp))
1653 						yyerror("missing ]\n");
1654 					*wp++ = c;
1655 					for (p = tmp; *p; ) {
1656 						Xcheck(*wsp, wp);
1657 						*wp++ = *p++;
1658 					}
1659 					afree(tmp, ATEMP);
1660 					/* the ] */
1661 					c = getsc();
1662 				}
1663 				goto out;
1664 			}
1665 			break;
1666 		case PS_NUMBER:
1667 			if (!ksh_isdigit(c))
1668 				goto out;
1669 			break;
1670 		case PS_VAR1:
1671 			goto out;
1672 		}
1673 		Xcheck(*wsp, wp);
1674 		*wp++ = c;
1675 	}
1676  out:
1677 	/* end of variable part */
1678 	*wp++ = '\0';
1679 	ungetsc(c);
1680 	return (wp);
1681 }
1682 
1683 /*
1684  * Save an array subscript - returns true if matching bracket found, false
1685  * if eof or newline was found.
1686  * (Returned string double null terminated)
1687  */
1688 static bool
arraysub(char ** strp)1689 arraysub(char **strp)
1690 {
1691 	XString ws;
1692 	char *wp, c;
1693 	/* we are just past the initial [ */
1694 	int depth = 1;
1695 
1696 	Xinit(ws, wp, 32, ATEMP);
1697 
1698 	do {
1699 		c = getsc();
1700 		Xcheck(ws, wp);
1701 		*wp++ = c;
1702 		if (c == '[')
1703 			depth++;
1704 		else if (c == ']')
1705 			depth--;
1706 	} while (depth > 0 && c && c != '\n');
1707 
1708 	*wp++ = '\0';
1709 	*strp = Xclose(ws, wp);
1710 
1711 	return (tobool(depth == 0));
1712 }
1713 
1714 /* Unget a char: handles case when we are already at the start of the buffer */
1715 static void
ungetsc(int c)1716 ungetsc(int c)
1717 {
1718 	struct sretrace_info *rp = retrace_info;
1719 
1720 	if (backslash_skip)
1721 		backslash_skip--;
1722 	/* Don't unget EOF... */
1723 	if (source->str == null && c == '\0')
1724 		return;
1725 	while (rp) {
1726 		if (Xlength(rp->xs, rp->xp))
1727 			rp->xp--;
1728 		rp = rp->next;
1729 	}
1730 	ungetsc_(c);
1731 }
1732 static void
ungetsc_(int c)1733 ungetsc_(int c)
1734 {
1735 	if (source->str > source->start)
1736 		source->str--;
1737 	else {
1738 		Source *s;
1739 
1740 		s = pushs(SREREAD, source->areap);
1741 		s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1742 		s->start = s->str = s->ugbuf;
1743 		s->next = source;
1744 		source = s;
1745 	}
1746 }
1747 
1748 
1749 /* Called to get a char that isn't a \newline sequence. */
1750 static int
getsc_bn(void)1751 getsc_bn(void)
1752 {
1753 	int c, c2;
1754 
1755 	if (ignore_backslash_newline)
1756 		return (o_getsc_u());
1757 
1758 	if (backslash_skip == 1) {
1759 		backslash_skip = 2;
1760 		return (o_getsc_u());
1761 	}
1762 
1763 	backslash_skip = 0;
1764 
1765 	while (/* CONSTCOND */ 1) {
1766 		c = o_getsc_u();
1767 		if (c == '\\') {
1768 			if ((c2 = o_getsc_u()) == '\n')
1769 				/* ignore the \newline; get the next char... */
1770 				continue;
1771 			ungetsc_(c2);
1772 			backslash_skip = 1;
1773 		}
1774 		return (c);
1775 	}
1776 }
1777 
1778 void
yyskiputf8bom(void)1779 yyskiputf8bom(void)
1780 {
1781 	int c;
1782 
1783 	if ((unsigned char)(c = o_getsc_u()) != 0xEF) {
1784 		ungetsc_(c);
1785 		return;
1786 	}
1787 	if ((unsigned char)(c = o_getsc_u()) != 0xBB) {
1788 		ungetsc_(c);
1789 		ungetsc_(0xEF);
1790 		return;
1791 	}
1792 	if ((unsigned char)(c = o_getsc_u()) != 0xBF) {
1793 		ungetsc_(c);
1794 		ungetsc_(0xBB);
1795 		ungetsc_(0xEF);
1796 		return;
1797 	}
1798 	UTFMODE |= 8;
1799 }
1800 
1801 static Lex_state *
push_state_(State_info * si,Lex_state * old_end)1802 push_state_(State_info *si, Lex_state *old_end)
1803 {
1804 	Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP);
1805 
1806 	news[0].ls_base = old_end;
1807 	si->base = &news[0];
1808 	si->end = &news[STATE_BSIZE];
1809 	return (&news[1]);
1810 }
1811 
1812 static Lex_state *
pop_state_(State_info * si,Lex_state * old_end)1813 pop_state_(State_info *si, Lex_state *old_end)
1814 {
1815 	Lex_state *old_base = si->base;
1816 
1817 	si->base = old_end->ls_base - STATE_BSIZE;
1818 	si->end = old_end->ls_base;
1819 
1820 	afree(old_base, ATEMP);
1821 
1822 	return (si->base + STATE_BSIZE - 1);
1823 }
1824 
1825 static int
s_get(void)1826 s_get(void)
1827 {
1828 	return (getsc());
1829 }
1830 
1831 static void
s_put(int c)1832 s_put(int c)
1833 {
1834 	ungetsc(c);
1835 }
1836