• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*	$OpenBSD: lex.c,v 1.44 2008/07/03 17:52:08 otto Exp $	*/
2 
3 /*-
4  * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
5  *	Thorsten Glaser <tg@mirbsd.org>
6  *
7  * Provided that these terms and disclaimer and all copyright notices
8  * are retained or reproduced in an accompanying document, permission
9  * is granted to deal in this work without restriction, including un-
10  * limited rights to use, publicly perform, distribute, sell, modify,
11  * merge, give away, or sublicence.
12  *
13  * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
14  * the utmost extent permitted by applicable law, neither express nor
15  * implied; without malicious intent or gross negligence. In no event
16  * may a licensor, author or contributor be held liable for indirect,
17  * direct, other damage, loss, or other issues arising in any way out
18  * of dealing in the work, even if advised of the possibility of such
19  * damage or existence of a defect, except proven that it results out
20  * of said person's immediate fault when using the work as intended.
21  */
22 
23 #include "sh.h"
24 
25 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.118 2010/07/25 11:35:41 tg Exp $");
26 
27 /*
28  * states while lexing word
29  */
30 #define SBASE		0	/* outside any lexical constructs */
31 #define SWORD		1	/* implicit quoting for substitute() */
32 #define SLETPAREN	2	/* inside (( )), implicit quoting */
33 #define SSQUOTE		3	/* inside '' */
34 #define SDQUOTE		4	/* inside "" */
35 #define SEQUOTE		5	/* inside $'' */
36 #define SBRACE		6	/* inside ${} */
37 #define SQBRACE		7	/* inside "${}" */
38 #define SCSPAREN	8	/* inside $() */
39 #define SBQUOTE		9	/* inside `` */
40 #define SASPAREN	10	/* inside $(( )) */
41 #define SHEREDELIM	11	/* parsing <<,<<- delimiter */
42 #define SHEREDQUOTE	12	/* parsing " in <<,<<- delimiter */
43 #define SPATTERN	13	/* parsing *(...|...) pattern (*+?@!) */
44 #define STBRACE		14	/* parsing ${...[#%]...} */
45 #define SLETARRAY	15	/* inside =( ), just copy */
46 #define SADELIM		16	/* like SBASE, looking for delimiter */
47 #define SHERESTRING	17	/* parsing <<< string */
48 
49 /* Structure to keep track of the lexing state and the various pieces of info
50  * needed for each particular state. */
51 typedef struct lex_state Lex_state;
52 struct lex_state {
53 	int ls_state;
54 	union {
55 		/* $(...) */
56 		struct scsparen_info {
57 			int nparen;	/* count open parenthesis */
58 			int csstate;	/* XXX remove */
59 #define ls_scsparen ls_info.u_scsparen
60 		} u_scsparen;
61 
62 		/* $((...)) */
63 		struct sasparen_info {
64 			int nparen;	/* count open parenthesis */
65 			int start;	/* marks start of $(( in output str */
66 #define ls_sasparen ls_info.u_sasparen
67 		} u_sasparen;
68 
69 		/* ((...)) */
70 		struct sletparen_info {
71 			int nparen;	/* count open parenthesis */
72 #define ls_sletparen ls_info.u_sletparen
73 		} u_sletparen;
74 
75 		/* `...` */
76 		struct sbquote_info {
77 			int indquotes;	/* true if in double quotes: "`...`" */
78 #define ls_sbquote ls_info.u_sbquote
79 		} u_sbquote;
80 
81 #ifndef MKSH_SMALL
82 		/* =(...) */
83 		struct sletarray_info {
84 			int nparen;	/* count open parentheses */
85 #define ls_sletarray ls_info.u_sletarray
86 		} u_sletarray;
87 #endif
88 
89 		/* ADELIM */
90 		struct sadelim_info {
91 			unsigned char nparen;	/* count open parentheses */
92 #define SADELIM_BASH	0
93 #define SADELIM_MAKE	1
94 			unsigned char style;
95 			unsigned char delimiter;
96 			unsigned char num;
97 			unsigned char flags;	/* ofs. into sadelim_flags[] */
98 #define ls_sadelim ls_info.u_sadelim
99 		} u_sadelim;
100 
101 		/* $'...' */
102 		struct sequote_info {
103 			bool got_NUL;	/* ignore rest of string */
104 #define ls_sequote ls_info.u_sequote
105 		} u_sequote;
106 
107 		Lex_state *base;	/* used to point to next state block */
108 	} ls_info;
109 };
110 
111 typedef struct {
112 	Lex_state *base;
113 	Lex_state *end;
114 } State_info;
115 
116 static void readhere(struct ioword *);
117 static int getsc__(void);
118 static void getsc_line(Source *);
119 static int getsc_bn(void);
120 static int s_get(void);
121 static void s_put(int);
122 static char *get_brace_var(XString *, char *);
123 static int arraysub(char **);
124 static const char *ungetsc(int);
125 static void gethere(bool);
126 static Lex_state *push_state_(State_info *, Lex_state *);
127 static Lex_state *pop_state_(State_info *, Lex_state *);
128 
129 static int dopprompt(const char *, int, bool);
130 
131 static int backslash_skip;
132 static int ignore_backslash_newline;
133 
134 /* optimised getsc_bn() */
135 #define _getsc()	(*source->str != '\0' && *source->str != '\\' \
136 			 && !backslash_skip && !(source->flags & SF_FIRST) \
137 			 ? *source->str++ : getsc_bn())
138 /* optimised getsc__() */
139 #define	_getsc_()	((*source->str != '\0') && !(source->flags & SF_FIRST) \
140 			 ? *source->str++ : getsc__())
141 
142 #ifdef MKSH_SMALL
143 static int getsc(void);
144 static int getsc_(void);
145 
146 static int
getsc(void)147 getsc(void)
148 {
149 	return (_getsc());
150 }
151 
152 static int
getsc_(void)153 getsc_(void)
154 {
155 	return (_getsc_());
156 }
157 #else
158 /* !MKSH_SMALL: use them inline */
159 #define getsc()		_getsc()
160 #define getsc_()	_getsc_()
161 #endif
162 
163 #define STATE_BSIZE	32
164 
165 #define PUSH_STATE(s)	do {					\
166 	if (++statep == state_info.end)				\
167 		statep = push_state_(&state_info, statep);	\
168 	state = statep->ls_state = (s);				\
169 } while (0)
170 
171 #define POP_STATE()	do {					\
172 	if (--statep == state_info.base)			\
173 		statep = pop_state_(&state_info, statep);	\
174 	state = statep->ls_state;				\
175 } while (0)
176 
177 /**
178  * Lexical analyser
179  *
180  * tokens are not regular expressions, they are LL(1).
181  * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
182  * hence the state stack.
183  */
184 
185 int
yylex(int cf)186 yylex(int cf)
187 {
188 	Lex_state states[STATE_BSIZE], *statep, *s2, *base;
189 	State_info state_info;
190 	int c, c2, state;
191 	XString ws;		/* expandable output word */
192 	char *wp;		/* output word pointer */
193 	char *sp, *dp;
194 
195  Again:
196 	states[0].ls_state = -1;
197 	states[0].ls_info.base = NULL;
198 	statep = &states[1];
199 	state_info.base = states;
200 	state_info.end = &state_info.base[STATE_BSIZE];
201 
202 	Xinit(ws, wp, 64, ATEMP);
203 
204 	backslash_skip = 0;
205 	ignore_backslash_newline = 0;
206 
207 	if (cf&ONEWORD)
208 		state = SWORD;
209 	else if (cf&LETEXPR) {
210 		/* enclose arguments in (double) quotes */
211 		*wp++ = OQUOTE;
212 		state = SLETPAREN;
213 		statep->ls_sletparen.nparen = 0;
214 #ifndef MKSH_SMALL
215 	} else if (cf&LETARRAY) {
216 		state = SLETARRAY;
217 		statep->ls_sletarray.nparen = 0;
218 #endif
219 	} else {		/* normal lexing */
220 		state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
221 		while ((c = getsc()) == ' ' || c == '\t')
222 			;
223 		if (c == '#') {
224 			ignore_backslash_newline++;
225 			while ((c = getsc()) != '\0' && c != '\n')
226 				;
227 			ignore_backslash_newline--;
228 		}
229 		ungetsc(c);
230 	}
231 	if (source->flags & SF_ALIAS) {	/* trailing ' ' in alias definition */
232 		source->flags &= ~SF_ALIAS;
233 		cf |= ALIAS;
234 	}
235 
236 	/* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */
237 	statep->ls_state = state;
238 
239 	/* check for here string */
240 	if (state == SHEREDELIM) {
241 		c = getsc();
242 		if (c == '<') {
243 			state = SHERESTRING;
244 			while ((c = getsc()) == ' ' || c == '\t')
245 				;
246 			ungetsc(c);
247 			c = '<';
248 			goto accept_nonword;
249 		}
250 		ungetsc(c);
251 	}
252 
253 	/* collect non-special or quoted characters to form word */
254 	while (!((c = getsc()) == 0 ||
255 	    ((state == SBASE || state == SHEREDELIM || state == SHERESTRING) &&
256 	    ctype(c, C_LEX1)))) {
257  accept_nonword:
258 		Xcheck(ws, wp);
259 		switch (state) {
260 		case SADELIM:
261 			if (c == '(')
262 				statep->ls_sadelim.nparen++;
263 			else if (c == ')')
264 				statep->ls_sadelim.nparen--;
265 			else if (statep->ls_sadelim.nparen == 0 &&
266 			    (c == /*{*/ '}' || c == statep->ls_sadelim.delimiter)) {
267 				*wp++ = ADELIM;
268 				*wp++ = c;
269 				if (c == /*{*/ '}' || --statep->ls_sadelim.num == 0)
270 					POP_STATE();
271 				if (c == /*{*/ '}')
272 					POP_STATE();
273 				break;
274 			}
275 			/* FALLTHROUGH */
276 		case SBASE:
277 			if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
278 				*wp = EOS;	/* temporary */
279 				if (is_wdvarname(Xstring(ws, wp), false)) {
280 					char *p, *tmp;
281 
282 					if (arraysub(&tmp)) {
283 						*wp++ = CHAR;
284 						*wp++ = c;
285 						for (p = tmp; *p; ) {
286 							Xcheck(ws, wp);
287 							*wp++ = CHAR;
288 							*wp++ = *p++;
289 						}
290 						afree(tmp, ATEMP);
291 						break;
292 					} else {
293 						Source *s;
294 
295 						s = pushs(SREREAD,
296 						    source->areap);
297 						s->start = s->str =
298 						    s->u.freeme = tmp;
299 						s->next = source;
300 						source = s;
301 					}
302 				}
303 				*wp++ = CHAR;
304 				*wp++ = c;
305 				break;
306 			}
307 			/* FALLTHROUGH */
308  Sbase1:		/* includes *(...|...) pattern (*+?@!) */
309 			if (c == '*' || c == '@' || c == '+' || c == '?' ||
310 			    c == '!') {
311 				c2 = getsc();
312 				if (c2 == '(' /*)*/ ) {
313 					*wp++ = OPAT;
314 					*wp++ = c;
315 					PUSH_STATE(SPATTERN);
316 					break;
317 				}
318 				ungetsc(c2);
319 			}
320 			/* FALLTHROUGH */
321  Sbase2:		/* doesn't include *(...|...) pattern (*+?@!) */
322 			switch (c) {
323 			case '\\':
324  getsc_qchar:
325 				if ((c = getsc())) {
326 					/* trailing \ is lost */
327 					*wp++ = QCHAR;
328 					*wp++ = c;
329 				}
330 				break;
331 			case '\'':
332  open_ssquote:
333 				*wp++ = OQUOTE;
334 				ignore_backslash_newline++;
335 				PUSH_STATE(SSQUOTE);
336 				break;
337 			case '"':
338  open_sdquote:
339 				*wp++ = OQUOTE;
340 				PUSH_STATE(SDQUOTE);
341 				break;
342 			default:
343 				goto Subst;
344 			}
345 			break;
346 
347  Subst:
348 			switch (c) {
349 			case '\\':
350 				c = getsc();
351 				switch (c) {
352 				case '"':
353 					if ((cf & HEREDOC))
354 						goto heredocquote;
355 					/* FALLTHROUGH */
356 				case '\\':
357 				case '$': case '`':
358  store_qchar:
359 					*wp++ = QCHAR;
360 					*wp++ = c;
361 					break;
362 				default:
363  heredocquote:
364 					Xcheck(ws, wp);
365 					if (c) {
366 						/* trailing \ is lost */
367 						*wp++ = CHAR;
368 						*wp++ = '\\';
369 						*wp++ = CHAR;
370 						*wp++ = c;
371 					}
372 					break;
373 				}
374 				break;
375 			case '$':
376  subst_dollar:
377 				c = getsc();
378 				if (c == '(') /*)*/ {
379 					c = getsc();
380 					if (c == '(') /*)*/ {
381 						PUSH_STATE(SASPAREN);
382 						statep->ls_sasparen.nparen = 2;
383 						statep->ls_sasparen.start =
384 						    Xsavepos(ws, wp);
385 						*wp++ = EXPRSUB;
386 					} else {
387 						ungetsc(c);
388 						PUSH_STATE(SCSPAREN);
389 						statep->ls_scsparen.nparen = 1;
390 						statep->ls_scsparen.csstate = 0;
391 						*wp++ = COMSUB;
392 					}
393 				} else if (c == '{') /*}*/ {
394 					*wp++ = OSUBST;
395 					*wp++ = '{'; /*}*/
396 					wp = get_brace_var(&ws, wp);
397 					c = getsc();
398 					/* allow :# and :% (ksh88 compat) */
399 					if (c == ':') {
400 						*wp++ = CHAR;
401 						*wp++ = c;
402 						c = getsc();
403 						if (c == ':') {
404 							*wp++ = CHAR;
405 							*wp++ = '0';
406 							*wp++ = ADELIM;
407 							*wp++ = ':';
408 							PUSH_STATE(SBRACE);
409 							PUSH_STATE(SADELIM);
410 							statep->ls_sadelim.style = SADELIM_BASH;
411 							statep->ls_sadelim.delimiter = ':';
412 							statep->ls_sadelim.num = 1;
413 							statep->ls_sadelim.nparen = 0;
414 							break;
415 						} else if (ksh_isdigit(c) ||
416 						    c == '('/*)*/ || c == ' ' ||
417 						    c == '$' /* XXX what else? */) {
418 							/* substring subst. */
419 							if (c != ' ') {
420 								*wp++ = CHAR;
421 								*wp++ = ' ';
422 							}
423 							ungetsc(c);
424 							PUSH_STATE(SBRACE);
425 							PUSH_STATE(SADELIM);
426 							statep->ls_sadelim.style = SADELIM_BASH;
427 							statep->ls_sadelim.delimiter = ':';
428 							statep->ls_sadelim.num = 2;
429 							statep->ls_sadelim.nparen = 0;
430 							break;
431 						}
432 					} else if (c == '/') {
433 						*wp++ = CHAR;
434 						*wp++ = c;
435 						if ((c = getsc()) == '/') {
436 							*wp++ = ADELIM;
437 							*wp++ = c;
438 						} else
439 							ungetsc(c);
440 						PUSH_STATE(SBRACE);
441 						PUSH_STATE(SADELIM);
442 						statep->ls_sadelim.style = SADELIM_BASH;
443 						statep->ls_sadelim.delimiter = '/';
444 						statep->ls_sadelim.num = 1;
445 						statep->ls_sadelim.nparen = 0;
446 						break;
447 					}
448 					/* If this is a trim operation,
449 					 * treat (,|,) specially in STBRACE.
450 					 */
451 					if (ctype(c, C_SUBOP2)) {
452 						ungetsc(c);
453 						PUSH_STATE(STBRACE);
454 					} else {
455 						ungetsc(c);
456 						if (state == SDQUOTE)
457 							PUSH_STATE(SQBRACE);
458 						else
459 							PUSH_STATE(SBRACE);
460 					}
461 				} else if (ksh_isalphx(c)) {
462 					*wp++ = OSUBST;
463 					*wp++ = 'X';
464 					do {
465 						Xcheck(ws, wp);
466 						*wp++ = c;
467 						c = getsc();
468 					} while (ksh_isalnux(c));
469 					*wp++ = '\0';
470 					*wp++ = CSUBST;
471 					*wp++ = 'X';
472 					ungetsc(c);
473 				} else if (ctype(c, C_VAR1 | C_DIGIT)) {
474 					Xcheck(ws, wp);
475 					*wp++ = OSUBST;
476 					*wp++ = 'X';
477 					*wp++ = c;
478 					*wp++ = '\0';
479 					*wp++ = CSUBST;
480 					*wp++ = 'X';
481 				} else if (c == '\'' && (state == SBASE)) {
482 					/* XXX which other states are valid? */
483 					*wp++ = OQUOTE;
484 					ignore_backslash_newline++;
485 					PUSH_STATE(SEQUOTE);
486 					statep->ls_sequote.got_NUL = false;
487 					break;
488 				} else {
489 					*wp++ = CHAR;
490 					*wp++ = '$';
491 					ungetsc(c);
492 				}
493 				break;
494 			case '`':
495  subst_gravis:
496 				PUSH_STATE(SBQUOTE);
497 				*wp++ = COMSUB;
498 				/* Need to know if we are inside double quotes
499 				 * since sh/AT&T-ksh translate the \" to " in
500 				 * "`...\"...`".
501 				 * This is not done in POSIX mode (section
502 				 * 3.2.3, Double Quotes: "The backquote shall
503 				 * retain its special meaning introducing the
504 				 * other form of command substitution (see
505 				 * 3.6.3). The portion of the quoted string
506 				 * from the initial backquote and the
507 				 * characters up to the next backquote that
508 				 * is not preceded by a backslash (having
509 				 * escape characters removed) defines that
510 				 * command whose output replaces `...` when
511 				 * the word is expanded."
512 				 * Section 3.6.3, Command Substitution:
513 				 * "Within the backquoted style of command
514 				 * substitution, backslash shall retain its
515 				 * literal meaning, except when followed by
516 				 * $ ` \.").
517 				 */
518 				statep->ls_sbquote.indquotes = 0;
519 				s2 = statep;
520 				base = state_info.base;
521 				while (1) {
522 					for (; s2 != base; s2--) {
523 						if (s2->ls_state == SDQUOTE) {
524 							statep->ls_sbquote.indquotes = 1;
525 							break;
526 						}
527 					}
528 					if (s2 != base)
529 						break;
530 					if (!(s2 = s2->ls_info.base))
531 						break;
532 					base = s2-- - STATE_BSIZE;
533 				}
534 				break;
535 			case QCHAR:
536 				if (cf & LQCHAR) {
537 					*wp++ = QCHAR;
538 					*wp++ = getsc();
539 					break;
540 				}
541 				/* FALLTHROUGH */
542 			default:
543  store_char:
544 				*wp++ = CHAR;
545 				*wp++ = c;
546 			}
547 			break;
548 
549 		case SEQUOTE:
550 			if (c == '\'') {
551 				POP_STATE();
552 				*wp++ = CQUOTE;
553 				ignore_backslash_newline--;
554 			} else if (c == '\\') {
555 				if ((c2 = unbksl(true, s_get, s_put)) == -1)
556 					c2 = s_get();
557 				if (c2 == 0)
558 					statep->ls_sequote.got_NUL = true;
559 				if (!statep->ls_sequote.got_NUL) {
560 					char ts[4];
561 
562 					if ((unsigned int)c2 < 0x100) {
563 						*wp++ = QCHAR;
564 						*wp++ = c2;
565 					} else {
566 						c = utf_wctomb(ts, c2 - 0x100);
567 						ts[c] = 0;
568 						for (c = 0; ts[c]; ++c) {
569 							*wp++ = QCHAR;
570 							*wp++ = ts[c];
571 						}
572 					}
573 				}
574 			} else if (!statep->ls_sequote.got_NUL) {
575 				*wp++ = QCHAR;
576 				*wp++ = c;
577 			}
578 			break;
579 
580 		case SSQUOTE:
581 			if (c == '\'') {
582 				POP_STATE();
583 				*wp++ = CQUOTE;
584 				ignore_backslash_newline--;
585 			} else {
586 				*wp++ = QCHAR;
587 				*wp++ = c;
588 			}
589 			break;
590 
591 		case SDQUOTE:
592 			if (c == '"') {
593 				POP_STATE();
594 				*wp++ = CQUOTE;
595 			} else
596 				goto Subst;
597 			break;
598 
599 		case SCSPAREN:	/* $( ... ) */
600 			/* todo: deal with $(...) quoting properly
601 			 * kludge to partly fake quoting inside $(...): doesn't
602 			 * really work because nested $(...) or ${...} inside
603 			 * double quotes aren't dealt with.
604 			 */
605 			switch (statep->ls_scsparen.csstate) {
606 			case 0:	/* normal */
607 				switch (c) {
608 				case '(':
609 					statep->ls_scsparen.nparen++;
610 					break;
611 				case ')':
612 					statep->ls_scsparen.nparen--;
613 					break;
614 				case '\\':
615 					statep->ls_scsparen.csstate = 1;
616 					break;
617 				case '"':
618 					statep->ls_scsparen.csstate = 2;
619 					break;
620 				case '\'':
621 					statep->ls_scsparen.csstate = 4;
622 					ignore_backslash_newline++;
623 					break;
624 				}
625 				break;
626 
627 			case 1:	/* backslash in normal mode */
628 			case 3:	/* backslash in double quotes */
629 				--statep->ls_scsparen.csstate;
630 				break;
631 
632 			case 2:	/* double quotes */
633 				if (c == '"')
634 					statep->ls_scsparen.csstate = 0;
635 				else if (c == '\\')
636 					statep->ls_scsparen.csstate = 3;
637 				break;
638 
639 			case 4:	/* single quotes */
640 				if (c == '\'') {
641 					statep->ls_scsparen.csstate = 0;
642 					ignore_backslash_newline--;
643 				}
644 				break;
645 			}
646 			if (statep->ls_scsparen.nparen == 0) {
647 				POP_STATE();
648 				*wp++ = 0;	/* end of COMSUB */
649 			} else
650 				*wp++ = c;
651 			break;
652 
653 		case SASPAREN:	/* $(( ... )) */
654 			/* XXX should nest using existing state machine
655 			 * (embed "...", $(...), etc.) */
656 			if (c == '(')
657 				statep->ls_sasparen.nparen++;
658 			else if (c == ')') {
659 				statep->ls_sasparen.nparen--;
660 				if (statep->ls_sasparen.nparen == 1) {
661 					/*(*/
662 					if ((c2 = getsc()) == ')') {
663 						POP_STATE();
664 						/* end of EXPRSUB */
665 						*wp++ = 0;
666 						break;
667 					} else {
668 						char *s;
669 
670 						ungetsc(c2);
671 						/* mismatched parenthesis -
672 						 * assume we were really
673 						 * parsing a $(...) expression
674 						 */
675 						s = Xrestpos(ws, wp,
676 						    statep->ls_sasparen.start);
677 						memmove(s + 1, s, wp - s);
678 						*s++ = COMSUB;
679 						*s = '('; /*)*/
680 						wp++;
681 						statep->ls_scsparen.nparen = 1;
682 						statep->ls_scsparen.csstate = 0;
683 						state = statep->ls_state =
684 						    SCSPAREN;
685 					}
686 				}
687 			}
688 			*wp++ = c;
689 			break;
690 
691 		case SQBRACE:
692 			if (c == '\\') {
693 				/*
694 				 * perform POSIX "quote removal" if the back-
695 				 * slash is "special", i.e. same cases as the
696 				 * {case '\\':} in Subst: plus closing brace;
697 				 * in mksh code "quote removal" on '\c' means
698 				 * write QCHAR+c, otherwise CHAR+\+CHAR+c are
699 				 * emitted (in heredocquote:)
700 				 */
701 				if ((c = getsc()) == '"' || c == '\\' ||
702 				    c == '$' || c == '`' || c == /*{*/'}')
703 					goto store_qchar;
704 				goto heredocquote;
705 			}
706 			goto common_SQBRACE;
707 
708 		case SBRACE:
709 			if (c == '\'')
710 				goto open_ssquote;
711 			else if (c == '\\')
712 				goto getsc_qchar;
713  common_SQBRACE:
714 			if (c == '"')
715 				goto open_sdquote;
716 			else if (c == '$')
717 				goto subst_dollar;
718 			else if (c == '`')
719 				goto subst_gravis;
720 			else if (c != /*{*/ '}')
721 				goto store_char;
722 			POP_STATE();
723 			*wp++ = CSUBST;
724 			*wp++ = /*{*/ '}';
725 			break;
726 
727 		case STBRACE:
728 			/* Same as SBASE, except (,|,) treated specially */
729 			if (c == /*{*/ '}') {
730 				POP_STATE();
731 				*wp++ = CSUBST;
732 				*wp++ = /*{*/ '}';
733 			} else if (c == '|') {
734 				*wp++ = SPAT;
735 			} else if (c == '(') {
736 				*wp++ = OPAT;
737 				*wp++ = ' ';	/* simile for @ */
738 				PUSH_STATE(SPATTERN);
739 			} else
740 				goto Sbase1;
741 			break;
742 
743 		case SBQUOTE:
744 			if (c == '`') {
745 				*wp++ = 0;
746 				POP_STATE();
747 			} else if (c == '\\') {
748 				switch (c = getsc()) {
749 				case '\\':
750 				case '$': case '`':
751 					*wp++ = c;
752 					break;
753 				case '"':
754 					if (statep->ls_sbquote.indquotes) {
755 						*wp++ = c;
756 						break;
757 					}
758 					/* FALLTHROUGH */
759 				default:
760 					if (c) {
761 						/* trailing \ is lost */
762 						*wp++ = '\\';
763 						*wp++ = c;
764 					}
765 					break;
766 				}
767 			} else
768 				*wp++ = c;
769 			break;
770 
771 		case SWORD:	/* ONEWORD */
772 			goto Subst;
773 
774 		case SLETPAREN:	/* LETEXPR: (( ... )) */
775 			/*(*/
776 			if (c == ')') {
777 				if (statep->ls_sletparen.nparen > 0)
778 					--statep->ls_sletparen.nparen;
779 				else if ((c2 = getsc()) == /*(*/ ')') {
780 					c = 0;
781 					*wp++ = CQUOTE;
782 					goto Done;
783 				} else {
784 					Source *s;
785 
786 					ungetsc(c2);
787 					/* mismatched parenthesis -
788 					 * assume we were really
789 					 * parsing a $(...) expression
790 					 */
791 					*wp = EOS;
792 					sp = Xstring(ws, wp);
793 					dp = wdstrip(sp, true, false);
794 					s = pushs(SREREAD, source->areap);
795 					s->start = s->str = s->u.freeme = dp;
796 					s->next = source;
797 					source = s;
798 					return ('('/*)*/);
799 				}
800 			} else if (c == '(')
801 				/* parenthesis inside quotes and backslashes
802 				 * are lost, but AT&T ksh doesn't count them
803 				 * either
804 				 */
805 				++statep->ls_sletparen.nparen;
806 			goto Sbase2;
807 
808 #ifndef MKSH_SMALL
809 		case SLETARRAY:	/* LETARRAY: =( ... ) */
810 			if (c == '('/*)*/)
811 				++statep->ls_sletarray.nparen;
812 			else if (c == /*(*/')')
813 				if (statep->ls_sletarray.nparen-- == 0) {
814 					c = 0;
815 					goto Done;
816 				}
817 			*wp++ = CHAR;
818 			*wp++ = c;
819 			break;
820 #endif
821 
822 		case SHERESTRING:	/* <<< delimiter */
823 			if (c == '\\') {
824 				c = getsc();
825 				if (c) {
826 					/* trailing \ is lost */
827 					*wp++ = QCHAR;
828 					*wp++ = c;
829 				}
830 				/* invoke quoting mode */
831 				Xstring(ws, wp)[0] = QCHAR;
832 			} else if (c == '$') {
833 				if ((c2 = getsc()) == '\'') {
834 					PUSH_STATE(SEQUOTE);
835 					statep->ls_sequote.got_NUL = false;
836 					goto sherestring_quoted;
837 				}
838 				ungetsc(c2);
839 				goto sherestring_regular;
840 			} else if (c == '\'') {
841 				PUSH_STATE(SSQUOTE);
842  sherestring_quoted:
843 				*wp++ = OQUOTE;
844 				ignore_backslash_newline++;
845 				/* invoke quoting mode */
846 				Xstring(ws, wp)[0] = QCHAR;
847 			} else if (c == '"') {
848 				state = statep->ls_state = SHEREDQUOTE;
849 				*wp++ = OQUOTE;
850 				/* just don't IFS split; no quoting mode */
851 			} else {
852  sherestring_regular:
853 				*wp++ = CHAR;
854 				*wp++ = c;
855 			}
856 			break;
857 
858 		case SHEREDELIM:	/* <<,<<- delimiter */
859 			/* XXX chuck this state (and the next) - use
860 			 * the existing states ($ and \`...` should be
861 			 * stripped of their specialness after the
862 			 * fact).
863 			 */
864 			/* here delimiters need a special case since
865 			 * $ and `...` are not to be treated specially
866 			 */
867 			if (c == '\\') {
868 				c = getsc();
869 				if (c) {
870 					/* trailing \ is lost */
871 					*wp++ = QCHAR;
872 					*wp++ = c;
873 				}
874 			} else if (c == '$') {
875 				if ((c2 = getsc()) == '\'') {
876 					PUSH_STATE(SEQUOTE);
877 					statep->ls_sequote.got_NUL = false;
878 					goto sheredelim_quoted;
879 				}
880 				ungetsc(c2);
881 				goto sheredelim_regular;
882 			} else if (c == '\'') {
883 				PUSH_STATE(SSQUOTE);
884  sheredelim_quoted:
885 				*wp++ = OQUOTE;
886 				ignore_backslash_newline++;
887 			} else if (c == '"') {
888 				state = statep->ls_state = SHEREDQUOTE;
889 				*wp++ = OQUOTE;
890 			} else {
891  sheredelim_regular:
892 				*wp++ = CHAR;
893 				*wp++ = c;
894 			}
895 			break;
896 
897 		case SHEREDQUOTE:	/* " in <<,<<- delimiter */
898 			if (c == '"') {
899 				*wp++ = CQUOTE;
900 				state = statep->ls_state =
901 				    /* dp[1] == '<' means here string */
902 				    Xstring(ws, wp)[1] == '<' ?
903 				    SHERESTRING : SHEREDELIM;
904 			} else {
905 				if (c == '\\') {
906 					switch (c = getsc()) {
907 					case '\\': case '"':
908 					case '$': case '`':
909 						break;
910 					default:
911 						if (c) {
912 							/* trailing \ lost */
913 							*wp++ = CHAR;
914 							*wp++ = '\\';
915 						}
916 						break;
917 					}
918 				}
919 				*wp++ = CHAR;
920 				*wp++ = c;
921 			}
922 			break;
923 
924 		case SPATTERN:	/* in *(...|...) pattern (*+?@!) */
925 			if ( /*(*/ c == ')') {
926 				*wp++ = CPAT;
927 				POP_STATE();
928 			} else if (c == '|') {
929 				*wp++ = SPAT;
930 			} else if (c == '(') {
931 				*wp++ = OPAT;
932 				*wp++ = ' ';	/* simile for @ */
933 				PUSH_STATE(SPATTERN);
934 			} else
935 				goto Sbase1;
936 			break;
937 		}
938 	}
939  Done:
940 	Xcheck(ws, wp);
941 	if (statep != &states[1])
942 		/* XXX figure out what is missing */
943 		yyerror("no closing quote\n");
944 
945 #ifndef MKSH_SMALL
946 	if (state == SLETARRAY && statep->ls_sletarray.nparen != -1)
947 		yyerror("%s: ')' missing\n", T_synerr);
948 #endif
949 
950 	/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
951 	if (state == SHEREDELIM || state == SHERESTRING)
952 		state = SBASE;
953 
954 	dp = Xstring(ws, wp);
955 	if ((c == '<' || c == '>' || c == '&') && state == SBASE) {
956 		struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
957 
958 		if (Xlength(ws, wp) == 0)
959 			iop->unit = c == '<' ? 0 : 1;
960 		else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) {
961 			if (dp[c2] != CHAR)
962 				goto no_iop;
963 			if (!ksh_isdigit(dp[c2 + 1]))
964 				goto no_iop;
965 			iop->unit = (iop->unit * 10) + dp[c2 + 1] - '0';
966 		}
967 
968 		if (iop->unit >= FDBASE)
969 			goto no_iop;
970 
971 		if (c == '&') {
972 			if ((c2 = getsc()) != '>') {
973 				ungetsc(c2);
974 				goto no_iop;
975 			}
976 			c = c2;
977 			iop->flag = IOBASH;
978 		} else
979 			iop->flag = 0;
980 
981 		c2 = getsc();
982 		/* <<, >>, <> are ok, >< is not */
983 		if (c == c2 || (c == '<' && c2 == '>')) {
984 			iop->flag |= c == c2 ?
985 			    (c == '>' ? IOCAT : IOHERE) : IORDWR;
986 			if (iop->flag == IOHERE) {
987 				if ((c2 = getsc()) == '-')
988 					iop->flag |= IOSKIP;
989 				else
990 					ungetsc(c2);
991 			}
992 		} else if (c2 == '&')
993 			iop->flag |= IODUP | (c == '<' ? IORDUP : 0);
994 		else {
995 			iop->flag |= c == '>' ? IOWRITE : IOREAD;
996 			if (c == '>' && c2 == '|')
997 				iop->flag |= IOCLOB;
998 			else
999 				ungetsc(c2);
1000 		}
1001 
1002 		iop->name = NULL;
1003 		iop->delim = NULL;
1004 		iop->heredoc = NULL;
1005 		Xfree(ws, wp);	/* free word */
1006 		yylval.iop = iop;
1007 		return (REDIR);
1008  no_iop:
1009 		;
1010 	}
1011 
1012 	if (wp == dp && state == SBASE) {
1013 		Xfree(ws, wp);	/* free word */
1014 		/* no word, process LEX1 character */
1015 		if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) {
1016 			if ((c2 = getsc()) == c)
1017 				c = (c == ';') ? BREAK :
1018 				    (c == '|') ? LOGOR :
1019 				    (c == '&') ? LOGAND :
1020 				    /* c == '(' ) */ MDPAREN;
1021 			else if (c == '|' && c2 == '&')
1022 				c = COPROC;
1023 			else
1024 				ungetsc(c2);
1025 		} else if (c == '\n') {
1026 			gethere(false);
1027 			if (cf & CONTIN)
1028 				goto Again;
1029 		} else if (c == '\0')
1030 			/* need here strings at EOF */
1031 			gethere(true);
1032 		return (c);
1033 	}
1034 
1035 	*wp++ = EOS;		/* terminate word */
1036 	yylval.cp = Xclose(ws, wp);
1037 	if (state == SWORD || state == SLETPAREN
1038 	    /* XXX ONEWORD? */
1039 #ifndef MKSH_SMALL
1040 	    || state == SLETARRAY
1041 #endif
1042 	    )
1043 		return (LWORD);
1044 
1045 	/* unget terminator */
1046 	ungetsc(c);
1047 
1048 	/*
1049 	 * note: the alias-vs-function code below depends on several
1050 	 * interna: starting from here, source->str is not modified;
1051 	 * the way getsc() and ungetsc() operate; etc.
1052 	 */
1053 
1054 	/* copy word to unprefixed string ident */
1055 	sp = yylval.cp;
1056 	dp = ident;
1057 	if ((cf & HEREDELIM) && (sp[1] == '<'))
1058 		while (dp < ident+IDENT) {
1059 			if ((c = *sp++) == CHAR)
1060 				*dp++ = *sp++;
1061 			else if ((c != OQUOTE) && (c != CQUOTE))
1062 				break;
1063 		}
1064 	else
1065 		while (dp < ident+IDENT && (c = *sp++) == CHAR)
1066 			*dp++ = *sp++;
1067 	/* Make sure the ident array stays '\0' padded */
1068 	memset(dp, 0, (ident+IDENT) - dp + 1);
1069 	if (c != EOS)
1070 		*ident = '\0';	/* word is not unquoted */
1071 
1072 	if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) {
1073 		struct tbl *p;
1074 		uint32_t h = hash(ident);
1075 
1076 		/* { */
1077 		if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
1078 		    (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}')) {
1079 			afree(yylval.cp, ATEMP);
1080 			return (p->val.i);
1081 		}
1082 		if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
1083 		    (p->flag & ISSET)) {
1084 			/*
1085 			 * this still points to the same character as the
1086 			 * ungetsc'd terminator from above
1087 			 */
1088 			const char *cp = source->str;
1089 
1090 			/* prefer POSIX but not Korn functions over aliases */
1091 			while (*cp == ' ' || *cp == '\t')
1092 				/*
1093 				 * this is like getsc() without skipping
1094 				 * over Source boundaries (including not
1095 				 * parsing ungetsc'd characters that got
1096 				 * pushed into an SREREAD) which is what
1097 				 * we want here anyway: find out whether
1098 				 * the alias name is followed by a POSIX
1099 				 * function definition (only the opening
1100 				 * parenthesis is checked though)
1101 				 */
1102 				++cp;
1103 			/* prefer functions over aliases */
1104 			if (*cp == '(' /*)*/)
1105 				/*
1106 				 * delete alias upon encountering function
1107 				 * definition
1108 				 */
1109 				ktdelete(p);
1110 			else {
1111 				Source *s = source;
1112 
1113 				while (s && (s->flags & SF_HASALIAS))
1114 					if (s->u.tblp == p)
1115 						return (LWORD);
1116 					else
1117 						s = s->next;
1118 				/* push alias expansion */
1119 				s = pushs(SALIAS, source->areap);
1120 				s->start = s->str = p->val.s;
1121 				s->u.tblp = p;
1122 				s->flags |= SF_HASALIAS;
1123 				s->next = source;
1124 				if (source->type == SEOF) {
1125 					/* prevent infinite recursion at EOS */
1126 					source->u.tblp = p;
1127 					source->flags |= SF_HASALIAS;
1128 				}
1129 				source = s;
1130 				afree(yylval.cp, ATEMP);
1131 				goto Again;
1132 			}
1133 		}
1134 	}
1135 
1136 	return (LWORD);
1137 }
1138 
1139 static void
gethere(bool iseof)1140 gethere(bool iseof)
1141 {
1142 	struct ioword **p;
1143 
1144 	for (p = heres; p < herep; p++)
1145 		if (iseof && (*p)->delim[1] != '<')
1146 			/* only here strings at EOF */
1147 			return;
1148 		else
1149 			readhere(*p);
1150 	herep = heres;
1151 }
1152 
1153 /*
1154  * read "<<word" text into temp file
1155  */
1156 
1157 static void
readhere(struct ioword * iop)1158 readhere(struct ioword *iop)
1159 {
1160 	int c;
1161 	char *volatile eof;
1162 	char *eofp;
1163 	int skiptabs;
1164 	XString xs;
1165 	char *xp;
1166 	int xpos;
1167 
1168 	if (iop->delim[1] == '<') {
1169 		/* process the here string */
1170 		xp = iop->heredoc = evalstr(iop->delim, DOBLANK);
1171 		c = strlen(xp) - 1;
1172 		memmove(xp, xp + 1, c);
1173 		xp[c] = '\n';
1174 		return;
1175 	}
1176 
1177 	eof = evalstr(iop->delim, 0);
1178 
1179 	if (!(iop->flag & IOEVAL))
1180 		ignore_backslash_newline++;
1181 
1182 	Xinit(xs, xp, 256, ATEMP);
1183 
1184 	for (;;) {
1185 		eofp = eof;
1186 		skiptabs = iop->flag & IOSKIP;
1187 		xpos = Xsavepos(xs, xp);
1188 		while ((c = getsc()) != 0) {
1189 			if (skiptabs) {
1190 				if (c == '\t')
1191 					continue;
1192 				skiptabs = 0;
1193 			}
1194 			if (c != *eofp)
1195 				break;
1196 			Xcheck(xs, xp);
1197 			Xput(xs, xp, c);
1198 			eofp++;
1199 		}
1200 		/* Allow EOF here so commands with out trailing newlines
1201 		 * will work (eg, ksh -c '...', $(...), etc).
1202 		 */
1203 		if (*eofp == '\0' && (c == 0 || c == '\n')) {
1204 			xp = Xrestpos(xs, xp, xpos);
1205 			break;
1206 		}
1207 		ungetsc(c);
1208 		while ((c = getsc()) != '\n') {
1209 			if (c == 0)
1210 				yyerror("here document '%s' unclosed\n", eof);
1211 			Xcheck(xs, xp);
1212 			Xput(xs, xp, c);
1213 		}
1214 		Xcheck(xs, xp);
1215 		Xput(xs, xp, c);
1216 	}
1217 	Xput(xs, xp, '\0');
1218 	iop->heredoc = Xclose(xs, xp);
1219 
1220 	if (!(iop->flag & IOEVAL))
1221 		ignore_backslash_newline--;
1222 }
1223 
1224 void
yyerror(const char * fmt,...)1225 yyerror(const char *fmt, ...)
1226 {
1227 	va_list va;
1228 
1229 	/* pop aliases and re-reads */
1230 	while (source->type == SALIAS || source->type == SREREAD)
1231 		source = source->next;
1232 	source->str = null;	/* zap pending input */
1233 
1234 	error_prefix(true);
1235 	va_start(va, fmt);
1236 	shf_vfprintf(shl_out, fmt, va);
1237 	va_end(va);
1238 	errorfz();
1239 }
1240 
1241 /*
1242  * input for yylex with alias expansion
1243  */
1244 
1245 Source *
pushs(int type,Area * areap)1246 pushs(int type, Area *areap)
1247 {
1248 	Source *s;
1249 
1250 	s = alloc(sizeof(Source), areap);
1251 	memset(s, 0, sizeof(Source));
1252 	s->type = type;
1253 	s->str = null;
1254 	s->areap = areap;
1255 	if (type == SFILE || type == SSTDIN)
1256 		XinitN(s->xs, 256, s->areap);
1257 	return (s);
1258 }
1259 
1260 static int
getsc__(void)1261 getsc__(void)
1262 {
1263 	Source *s = source;
1264 	int c;
1265 
1266  getsc_again:
1267 	while ((c = *s->str++) == 0) {
1268 		s->str = NULL;		/* return 0 for EOF by default */
1269 		switch (s->type) {
1270 		case SEOF:
1271 			s->str = null;
1272 			return (0);
1273 
1274 		case SSTDIN:
1275 		case SFILE:
1276 			getsc_line(s);
1277 			break;
1278 
1279 		case SWSTR:
1280 			break;
1281 
1282 		case SSTRING:
1283 			break;
1284 
1285 		case SWORDS:
1286 			s->start = s->str = *s->u.strv++;
1287 			s->type = SWORDSEP;
1288 			break;
1289 
1290 		case SWORDSEP:
1291 			if (*s->u.strv == NULL) {
1292 				s->start = s->str = "\n";
1293 				s->type = SEOF;
1294 			} else {
1295 				s->start = s->str = " ";
1296 				s->type = SWORDS;
1297 			}
1298 			break;
1299 
1300 		case SALIAS:
1301 			if (s->flags & SF_ALIASEND) {
1302 				/* pass on an unused SF_ALIAS flag */
1303 				source = s->next;
1304 				source->flags |= s->flags & SF_ALIAS;
1305 				s = source;
1306 			} else if (*s->u.tblp->val.s &&
1307 			    (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) {
1308 				source = s = s->next;	/* pop source stack */
1309 				/* Note that this alias ended with a space,
1310 				 * enabling alias expansion on the following
1311 				 * word.
1312 				 */
1313 				s->flags |= SF_ALIAS;
1314 			} else {
1315 				/* At this point, we need to keep the current
1316 				 * alias in the source list so recursive
1317 				 * aliases can be detected and we also need
1318 				 * to return the next character. Do this
1319 				 * by temporarily popping the alias to get
1320 				 * the next character and then put it back
1321 				 * in the source list with the SF_ALIASEND
1322 				 * flag set.
1323 				 */
1324 				source = s->next;	/* pop source stack */
1325 				source->flags |= s->flags & SF_ALIAS;
1326 				c = getsc__();
1327 				if (c) {
1328 					s->flags |= SF_ALIASEND;
1329 					s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1330 					s->start = s->str = s->ugbuf;
1331 					s->next = source;
1332 					source = s;
1333 				} else {
1334 					s = source;
1335 					/* avoid reading eof twice */
1336 					s->str = NULL;
1337 					break;
1338 				}
1339 			}
1340 			continue;
1341 
1342 		case SREREAD:
1343 			if (s->start != s->ugbuf)	/* yuck */
1344 				afree(s->u.freeme, ATEMP);
1345 			source = s = s->next;
1346 			continue;
1347 		}
1348 		if (s->str == NULL) {
1349 			s->type = SEOF;
1350 			s->start = s->str = null;
1351 			return ('\0');
1352 		}
1353 		if (s->flags & SF_ECHO) {
1354 			shf_puts(s->str, shl_out);
1355 			shf_flush(shl_out);
1356 		}
1357 	}
1358 	/* check for UTF-8 byte order mark */
1359 	if (s->flags & SF_FIRST) {
1360 		s->flags &= ~SF_FIRST;
1361 		if (((unsigned char)c == 0xEF) &&
1362 		    (((const unsigned char *)(s->str))[0] == 0xBB) &&
1363 		    (((const unsigned char *)(s->str))[1] == 0xBF)) {
1364 			s->str += 2;
1365 			UTFMODE = 1;
1366 			goto getsc_again;
1367 		}
1368 	}
1369 	return (c);
1370 }
1371 
1372 static void
getsc_line(Source * s)1373 getsc_line(Source *s)
1374 {
1375 	char *xp = Xstring(s->xs, xp), *cp;
1376 	bool interactive = Flag(FTALKING) && s->type == SSTDIN;
1377 	int have_tty = interactive && (s->flags & SF_TTY);
1378 
1379 	/* Done here to ensure nothing odd happens when a timeout occurs */
1380 	XcheckN(s->xs, xp, LINE);
1381 	*xp = '\0';
1382 	s->start = s->str = xp;
1383 
1384 	if (have_tty && ksh_tmout) {
1385 		ksh_tmout_state = TMOUT_READING;
1386 		alarm(ksh_tmout);
1387 	}
1388 	if (interactive)
1389 		change_winsz();
1390 	if (have_tty && (
1391 #if !MKSH_S_NOVI
1392 	    Flag(FVI) ||
1393 #endif
1394 	    Flag(FEMACS) || Flag(FGMACS))) {
1395 		int nread;
1396 
1397 		nread = x_read(xp, LINE);
1398 		if (nread < 0)	/* read error */
1399 			nread = 0;
1400 		xp[nread] = '\0';
1401 		xp += nread;
1402 	} else {
1403 		if (interactive)
1404 			pprompt(prompt, 0);
1405 		else
1406 			s->line++;
1407 
1408 		while (1) {
1409 			char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1410 
1411 			if (!p && shf_error(s->u.shf) &&
1412 			    shf_errno(s->u.shf) == EINTR) {
1413 				shf_clearerr(s->u.shf);
1414 				if (trap)
1415 					runtraps(0);
1416 				continue;
1417 			}
1418 			if (!p || (xp = p, xp[-1] == '\n'))
1419 				break;
1420 			/* double buffer size */
1421 			xp++;	/* move past NUL so doubling works... */
1422 			XcheckN(s->xs, xp, Xlength(s->xs, xp));
1423 			xp--;	/* ...and move back again */
1424 		}
1425 		/* flush any unwanted input so other programs/builtins
1426 		 * can read it. Not very optimal, but less error prone
1427 		 * than flushing else where, dealing with redirections,
1428 		 * etc.
1429 		 * todo: reduce size of shf buffer (~128?) if SSTDIN
1430 		 */
1431 		if (s->type == SSTDIN)
1432 			shf_flush(s->u.shf);
1433 	}
1434 	/* XXX: temporary kludge to restore source after a
1435 	 * trap may have been executed.
1436 	 */
1437 	source = s;
1438 	if (have_tty && ksh_tmout) {
1439 		ksh_tmout_state = TMOUT_EXECUTING;
1440 		alarm(0);
1441 	}
1442 	cp = Xstring(s->xs, xp);
1443 #ifndef MKSH_SMALL
1444 	if (interactive && *cp == '!' && cur_prompt == PS1) {
1445 		int linelen;
1446 
1447 		linelen = Xlength(s->xs, xp);
1448 		XcheckN(s->xs, xp, fc_e_n + /* NUL */ 1);
1449 		/* reload after potential realloc */
1450 		cp = Xstring(s->xs, xp);
1451 		/* change initial '!' into space */
1452 		*cp = ' ';
1453 		/* NUL terminate the current string */
1454 		*xp = '\0';
1455 		/* move the actual string forward */
1456 		memmove(cp + fc_e_n, cp, linelen + /* NUL */ 1);
1457 		xp += fc_e_n;
1458 		/* prepend it with "fc -e -" */
1459 		memcpy(cp, fc_e_, fc_e_n);
1460 	}
1461 #endif
1462 	s->start = s->str = cp;
1463 	strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1464 	/* Note: if input is all nulls, this is not eof */
1465 	if (Xlength(s->xs, xp) == 0) {
1466 		/* EOF */
1467 		if (s->type == SFILE)
1468 			shf_fdclose(s->u.shf);
1469 		s->str = NULL;
1470 	} else if (interactive && *s->str &&
1471 	    (cur_prompt != PS1 || !ctype(*s->str, C_IFS | C_IFSWS))) {
1472 		histsave(&s->line, s->str, true, true);
1473 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
1474 	} else if (interactive && cur_prompt == PS1) {
1475 		cp = Xstring(s->xs, xp);
1476 		while (*cp && ctype(*cp, C_IFSWS))
1477 			++cp;
1478 		if (!*cp)
1479 			histsync();
1480 #endif
1481 	}
1482 	if (interactive)
1483 		set_prompt(PS2, NULL);
1484 }
1485 
1486 void
set_prompt(int to,Source * s)1487 set_prompt(int to, Source *s)
1488 {
1489 	cur_prompt = to;
1490 
1491 	switch (to) {
1492 	case PS1:	/* command */
1493 		/* Substitute ! and !! here, before substitutions are done
1494 		 * so ! in expanded variables are not expanded.
1495 		 * NOTE: this is not what AT&T ksh does (it does it after
1496 		 * substitutions, POSIX doesn't say which is to be done.
1497 		 */
1498 		{
1499 			struct shf *shf;
1500 			char * volatile ps1;
1501 			Area *saved_atemp;
1502 
1503 			ps1 = str_val(global("PS1"));
1504 			shf = shf_sopen(NULL, strlen(ps1) * 2,
1505 			    SHF_WR | SHF_DYNAMIC, NULL);
1506 			while (*ps1)
1507 				if (*ps1 != '!' || *++ps1 == '!')
1508 					shf_putchar(*ps1++, shf);
1509 				else
1510 					shf_fprintf(shf, "%d",
1511 						s ? s->line + 1 : 0);
1512 			ps1 = shf_sclose(shf);
1513 			saved_atemp = ATEMP;
1514 			newenv(E_ERRH);
1515 			if (sigsetjmp(e->jbuf, 0)) {
1516 				prompt = safe_prompt;
1517 				/* Don't print an error - assume it has already
1518 				 * been printed. Reason is we may have forked
1519 				 * to run a command and the child may be
1520 				 * unwinding its stack through this code as it
1521 				 * exits.
1522 				 */
1523 			} else {
1524 				char *cp = substitute(ps1, 0);
1525 				strdupx(prompt, cp, saved_atemp);
1526 			}
1527 			quitenv(NULL);
1528 		}
1529 		break;
1530 	case PS2:	/* command continuation */
1531 		prompt = str_val(global("PS2"));
1532 		break;
1533 	}
1534 }
1535 
1536 static int
dopprompt(const char * cp,int ntruncate,bool doprint)1537 dopprompt(const char *cp, int ntruncate, bool doprint)
1538 {
1539 	int columns = 0, lines = 0, indelimit = 0;
1540 	char delimiter = 0;
1541 
1542 	/* Undocumented AT&T ksh feature:
1543 	 * If the second char in the prompt string is \r then the first char
1544 	 * is taken to be a non-printing delimiter and any chars between two
1545 	 * instances of the delimiter are not considered to be part of the
1546 	 * prompt length
1547 	 */
1548 	if (*cp && cp[1] == '\r') {
1549 		delimiter = *cp;
1550 		cp += 2;
1551 	}
1552 	for (; *cp; cp++) {
1553 		if (indelimit && *cp != delimiter)
1554 			;
1555 		else if (*cp == '\n' || *cp == '\r') {
1556 			lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
1557 			columns = 0;
1558 		} else if (*cp == '\t') {
1559 			columns = (columns | 7) + 1;
1560 		} else if (*cp == '\b') {
1561 			if (columns > 0)
1562 				columns--;
1563 		} else if (*cp == delimiter)
1564 			indelimit = !indelimit;
1565 		else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
1566 			const char *cp2;
1567 			columns += utf_widthadj(cp, &cp2);
1568 			if (doprint && (indelimit ||
1569 			    (ntruncate < (x_cols * lines + columns))))
1570 				shf_write(cp, cp2 - cp, shl_out);
1571 			cp = cp2 - /* loop increment */ 1;
1572 			continue;
1573 		} else
1574 			columns++;
1575 		if (doprint && (*cp != delimiter) &&
1576 		    (indelimit || (ntruncate < (x_cols * lines + columns))))
1577 			shf_putc(*cp, shl_out);
1578 	}
1579 	if (doprint)
1580 		shf_flush(shl_out);
1581 	return (x_cols * lines + columns);
1582 }
1583 
1584 
1585 void
pprompt(const char * cp,int ntruncate)1586 pprompt(const char *cp, int ntruncate)
1587 {
1588 	dopprompt(cp, ntruncate, true);
1589 }
1590 
1591 int
promptlen(const char * cp)1592 promptlen(const char *cp)
1593 {
1594 	return (dopprompt(cp, 0, false));
1595 }
1596 
1597 /* Read the variable part of a ${...} expression (ie, up to but not including
1598  * the :[-+?=#%] or close-brace.
1599  */
1600 static char *
get_brace_var(XString * wsp,char * wp)1601 get_brace_var(XString *wsp, char *wp)
1602 {
1603 	enum parse_state {
1604 		PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1605 		PS_NUMBER, PS_VAR1
1606 	} state;
1607 	char c;
1608 
1609 	state = PS_INITIAL;
1610 	while (1) {
1611 		c = getsc();
1612 		/* State machine to figure out where the variable part ends. */
1613 		switch (state) {
1614 		case PS_INITIAL:
1615 			if (c == '#' || c == '!' || c == '%') {
1616 				state = PS_SAW_HASH;
1617 				break;
1618 			}
1619 			/* FALLTHROUGH */
1620 		case PS_SAW_HASH:
1621 			if (ksh_isalphx(c))
1622 				state = PS_IDENT;
1623 			else if (ksh_isdigit(c))
1624 				state = PS_NUMBER;
1625 			else if (ctype(c, C_VAR1))
1626 				state = PS_VAR1;
1627 			else
1628 				goto out;
1629 			break;
1630 		case PS_IDENT:
1631 			if (!ksh_isalnux(c)) {
1632 				if (c == '[') {
1633 					char *tmp, *p;
1634 
1635 					if (!arraysub(&tmp))
1636 						yyerror("missing ]\n");
1637 					*wp++ = c;
1638 					for (p = tmp; *p; ) {
1639 						Xcheck(*wsp, wp);
1640 						*wp++ = *p++;
1641 					}
1642 					afree(tmp, ATEMP);
1643 					c = getsc();	/* the ] */
1644 				}
1645 				goto out;
1646 			}
1647 			break;
1648 		case PS_NUMBER:
1649 			if (!ksh_isdigit(c))
1650 				goto out;
1651 			break;
1652 		case PS_VAR1:
1653 			goto out;
1654 		}
1655 		Xcheck(*wsp, wp);
1656 		*wp++ = c;
1657 	}
1658  out:
1659 	*wp++ = '\0';	/* end of variable part */
1660 	ungetsc(c);
1661 	return (wp);
1662 }
1663 
1664 /*
1665  * Save an array subscript - returns true if matching bracket found, false
1666  * if eof or newline was found.
1667  * (Returned string double null terminated)
1668  */
1669 static int
arraysub(char ** strp)1670 arraysub(char **strp)
1671 {
1672 	XString ws;
1673 	char	*wp;
1674 	char	c;
1675 	int	depth = 1;	/* we are just past the initial [ */
1676 
1677 	Xinit(ws, wp, 32, ATEMP);
1678 
1679 	do {
1680 		c = getsc();
1681 		Xcheck(ws, wp);
1682 		*wp++ = c;
1683 		if (c == '[')
1684 			depth++;
1685 		else if (c == ']')
1686 			depth--;
1687 	} while (depth > 0 && c && c != '\n');
1688 
1689 	*wp++ = '\0';
1690 	*strp = Xclose(ws, wp);
1691 
1692 	return (depth == 0 ? 1 : 0);
1693 }
1694 
1695 /* Unget a char: handles case when we are already at the start of the buffer */
1696 static const char *
ungetsc(int c)1697 ungetsc(int c)
1698 {
1699 	if (backslash_skip)
1700 		backslash_skip--;
1701 	/* Don't unget eof... */
1702 	if (source->str == null && c == '\0')
1703 		return (source->str);
1704 	if (source->str > source->start)
1705 		source->str--;
1706 	else {
1707 		Source *s;
1708 
1709 		s = pushs(SREREAD, source->areap);
1710 		s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1711 		s->start = s->str = s->ugbuf;
1712 		s->next = source;
1713 		source = s;
1714 	}
1715 	return (source->str);
1716 }
1717 
1718 
1719 /* Called to get a char that isn't a \newline sequence. */
1720 static int
getsc_bn(void)1721 getsc_bn(void)
1722 {
1723 	int c, c2;
1724 
1725 	if (ignore_backslash_newline)
1726 		return (getsc_());
1727 
1728 	if (backslash_skip == 1) {
1729 		backslash_skip = 2;
1730 		return (getsc_());
1731 	}
1732 
1733 	backslash_skip = 0;
1734 
1735 	while (1) {
1736 		c = getsc_();
1737 		if (c == '\\') {
1738 			if ((c2 = getsc_()) == '\n')
1739 				/* ignore the \newline; get the next char... */
1740 				continue;
1741 			ungetsc(c2);
1742 			backslash_skip = 1;
1743 		}
1744 		return (c);
1745 	}
1746 }
1747 
1748 static Lex_state *
push_state_(State_info * si,Lex_state * old_end)1749 push_state_(State_info *si, Lex_state *old_end)
1750 {
1751 	Lex_state *news = alloc(STATE_BSIZE * sizeof(Lex_state), ATEMP);
1752 
1753 	news[0].ls_info.base = old_end;
1754 	si->base = &news[0];
1755 	si->end = &news[STATE_BSIZE];
1756 	return (&news[1]);
1757 }
1758 
1759 static Lex_state *
pop_state_(State_info * si,Lex_state * old_end)1760 pop_state_(State_info *si, Lex_state *old_end)
1761 {
1762 	Lex_state *old_base = si->base;
1763 
1764 	si->base = old_end->ls_info.base - STATE_BSIZE;
1765 	si->end = old_end->ls_info.base;
1766 
1767 	afree(old_base, ATEMP);
1768 
1769 	return (si->base + STATE_BSIZE - 1);
1770 }
1771 
1772 static int
s_get(void)1773 s_get(void)
1774 {
1775 	return (getsc());
1776 }
1777 
1778 static void
s_put(int c)1779 s_put(int c)
1780 {
1781 	ungetsc(c);
1782 }
1783