• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*	$OpenBSD: lex.c,v 1.46 2013/01/20 14:47:46 stsp Exp $	*/
2 
3 /*-
4  * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5  *		 2011, 2012, 2013
6  *	Thorsten Glaser <tg@mirbsd.org>
7  *
8  * Provided that these terms and disclaimer and all copyright notices
9  * are retained or reproduced in an accompanying document, permission
10  * is granted to deal in this work without restriction, including un-
11  * limited rights to use, publicly perform, distribute, sell, modify,
12  * merge, give away, or sublicence.
13  *
14  * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
15  * the utmost extent permitted by applicable law, neither express nor
16  * implied; without malicious intent or gross negligence. In no event
17  * may a licensor, author or contributor be held liable for indirect,
18  * direct, other damage, loss, or other issues arising in any way out
19  * of dealing in the work, even if advised of the possibility of such
20  * damage or existence of a defect, except proven that it results out
21  * of said person's immediate fault when using the work as intended.
22  */
23 
24 #include "sh.h"
25 
26 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.182 2013/02/19 18:45:20 tg Exp $");
27 
28 /*
29  * states while lexing word
30  */
31 #define SBASE		0	/* outside any lexical constructs */
32 #define SWORD		1	/* implicit quoting for substitute() */
33 #define SLETPAREN	2	/* inside (( )), implicit quoting */
34 #define SSQUOTE		3	/* inside '' */
35 #define SDQUOTE		4	/* inside "" */
36 #define SEQUOTE		5	/* inside $'' */
37 #define SBRACE		6	/* inside ${} */
38 #define SQBRACE		7	/* inside "${}" */
39 #define SBQUOTE		8	/* inside `` */
40 #define SASPAREN	9	/* inside $(( )) */
41 #define SHEREDELIM	10	/* parsing <<,<<-,<<< delimiter */
42 #define SHEREDQUOTE	11	/* parsing " in <<,<<-,<<< delimiter */
43 #define SPATTERN	12	/* parsing *(...|...) pattern (*+?@!) */
44 #define SADELIM		13	/* like SBASE, looking for delimiter */
45 #define STBRACEKORN	14	/* parsing ${...[#%]...} !FSH */
46 #define STBRACEBOURNE	15	/* parsing ${...[#%]...} FSH */
47 #define SINVALID	255	/* invalid state */
48 
49 struct sretrace_info {
50 	struct sretrace_info *next;
51 	XString xs;
52 	char *xp;
53 };
54 
55 /*
56  * Structure to keep track of the lexing state and the various pieces of info
57  * needed for each particular state.
58  */
59 typedef struct lex_state {
60 	union {
61 		/* point to the next state block */
62 		struct lex_state *base;
63 		/* marks start of state output in output string */
64 		int start;
65 		/* SBQUOTE: true if in double quotes: "`...`" */
66 		/* SEQUOTE: got NUL, ignore rest of string */
67 		bool abool;
68 		/* SADELIM information */
69 		struct {
70 			/* character to search for */
71 			unsigned char delimiter;
72 			/* max. number of delimiters */
73 			unsigned char num;
74 		} adelim;
75 	} u;
76 	/* count open parentheses */
77 	short nparen;
78 	/* type of this state */
79 	uint8_t type;
80 } Lex_state;
81 #define ls_base		u.base
82 #define ls_start	u.start
83 #define ls_bool		u.abool
84 #define ls_adelim	u.adelim
85 
86 typedef struct {
87 	Lex_state *base;
88 	Lex_state *end;
89 } State_info;
90 
91 static void readhere(struct ioword *);
92 static void ungetsc(int);
93 static void ungetsc_i(int);
94 static int getsc_uu(void);
95 static void getsc_line(Source *);
96 static int getsc_bn(void);
97 static int s_get(void);
98 static void s_put(int);
99 static char *get_brace_var(XString *, char *);
100 static bool arraysub(char **);
101 static void gethere(bool);
102 static Lex_state *push_state_i(State_info *, Lex_state *);
103 static Lex_state *pop_state_i(State_info *, Lex_state *);
104 
105 static int dopprompt(const char *, int, bool);
106 
107 static int backslash_skip;
108 static int ignore_backslash_newline;
109 
110 /* optimised getsc_bn() */
111 #define o_getsc()	(*source->str != '\0' && *source->str != '\\' && \
112 			    !backslash_skip ? *source->str++ : getsc_bn())
113 /* optimised getsc_uu() */
114 #define	o_getsc_u()	((*source->str != '\0') ? *source->str++ : getsc_uu())
115 
116 /* retrace helper */
117 #define o_getsc_r(carg)	{				\
118 	int cev = (carg);				\
119 	struct sretrace_info *rp = retrace_info;	\
120 							\
121 	while (rp) {					\
122 		Xcheck(rp->xs, rp->xp);			\
123 		*rp->xp++ = cev;			\
124 		rp = rp->next;				\
125 	}						\
126 							\
127 	return (cev);					\
128 }
129 
130 #if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST)
131 static int getsc(void);
132 
133 static int
getsc(void)134 getsc(void)
135 {
136 	o_getsc_r(o_getsc());
137 }
138 #else
139 static int getsc_r(int);
140 
141 static int
getsc_r(int c)142 getsc_r(int c)
143 {
144 	o_getsc_r(c);
145 }
146 
147 #define getsc()		getsc_r(o_getsc())
148 #endif
149 
150 #define STATE_BSIZE	8
151 
152 #define PUSH_STATE(s)	do {					\
153 	if (++statep == state_info.end)				\
154 		statep = push_state_i(&state_info, statep);	\
155 	state = statep->type = (s);				\
156 } while (/* CONSTCOND */ 0)
157 
158 #define POP_STATE()	do {					\
159 	if (--statep == state_info.base)			\
160 		statep = pop_state_i(&state_info, statep);	\
161 	state = statep->type;					\
162 } while (/* CONSTCOND */ 0)
163 
164 #define PUSH_SRETRACE()	do {					\
165 	struct sretrace_info *ri;				\
166 								\
167 	statep->ls_start = Xsavepos(ws, wp);			\
168 	ri = alloc(sizeof(struct sretrace_info), ATEMP);	\
169 	Xinit(ri->xs, ri->xp, 64, ATEMP);			\
170 	ri->next = retrace_info;				\
171 	retrace_info = ri;					\
172 } while (/* CONSTCOND */ 0)
173 
174 #define POP_SRETRACE()	do {					\
175 	wp = Xrestpos(ws, wp, statep->ls_start);		\
176 	*retrace_info->xp = '\0';				\
177 	sp = Xstring(retrace_info->xs, retrace_info->xp);	\
178 	dp = (void *)retrace_info;				\
179 	retrace_info = retrace_info->next;			\
180 	afree(dp, ATEMP);					\
181 } while (/* CONSTCOND */ 0)
182 
183 /**
184  * Lexical analyser
185  *
186  * tokens are not regular expressions, they are LL(1).
187  * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
188  * hence the state stack. Note "$(...)" are now parsed recursively.
189  */
190 
191 int
yylex(int cf)192 yylex(int cf)
193 {
194 	Lex_state states[STATE_BSIZE], *statep, *s2, *base;
195 	State_info state_info;
196 	int c, c2, state;
197 	size_t cz;
198 	XString ws;		/* expandable output word */
199 	char *wp;		/* output word pointer */
200 	char *sp, *dp;
201 
202  Again:
203 	states[0].type = SINVALID;
204 	states[0].ls_base = NULL;
205 	statep = &states[1];
206 	state_info.base = states;
207 	state_info.end = &state_info.base[STATE_BSIZE];
208 
209 	Xinit(ws, wp, 64, ATEMP);
210 
211 	backslash_skip = 0;
212 	ignore_backslash_newline = 0;
213 
214 	if (cf & ONEWORD)
215 		state = SWORD;
216 	else if (cf & LETEXPR) {
217 		/* enclose arguments in (double) quotes */
218 		*wp++ = OQUOTE;
219 		state = SLETPAREN;
220 		statep->nparen = 0;
221 	} else {
222 		/* normal lexing */
223 		state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
224 		while ((c = getsc()) == ' ' || c == '\t')
225 			;
226 		if (c == '#') {
227 			ignore_backslash_newline++;
228 			while ((c = getsc()) != '\0' && c != '\n')
229 				;
230 			ignore_backslash_newline--;
231 		}
232 		ungetsc(c);
233 	}
234 	if (source->flags & SF_ALIAS) {
235 		/* trailing ' ' in alias definition */
236 		source->flags &= ~SF_ALIAS;
237 		cf |= ALIAS;
238 	}
239 
240 	/* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */
241 	statep->type = state;
242 
243 	/* check for here string */
244 	if (state == SHEREDELIM) {
245 		c = getsc();
246 		if (c == '<') {
247 			state = SHEREDELIM;
248 			while ((c = getsc()) == ' ' || c == '\t')
249 				;
250 			ungetsc(c);
251 			c = '<';
252 			goto accept_nonword;
253 		}
254 		ungetsc(c);
255 	}
256 
257 	/* collect non-special or quoted characters to form word */
258 	while (!((c = getsc()) == 0 ||
259 	    ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) {
260 		if (state == SBASE &&
261 		    subshell_nesting_type == /*{*/ '}' &&
262 		    c == /*{*/ '}')
263 			/* possibly end ${ :;} */
264 			break;
265  accept_nonword:
266 		Xcheck(ws, wp);
267 		switch (state) {
268 		case SADELIM:
269 			if (c == '(')
270 				statep->nparen++;
271 			else if (c == ')')
272 				statep->nparen--;
273 			else if (statep->nparen == 0 && (c == /*{*/ '}' ||
274 			    c == (int)statep->ls_adelim.delimiter)) {
275 				*wp++ = ADELIM;
276 				*wp++ = c;
277 				if (c == /*{*/ '}' || --statep->ls_adelim.num == 0)
278 					POP_STATE();
279 				if (c == /*{*/ '}')
280 					POP_STATE();
281 				break;
282 			}
283 			/* FALLTHROUGH */
284 		case SBASE:
285 			if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
286 				/* temporary */
287 				*wp = EOS;
288 				if (is_wdvarname(Xstring(ws, wp), false)) {
289 					char *p, *tmp;
290 
291 					if (arraysub(&tmp)) {
292 						*wp++ = CHAR;
293 						*wp++ = c;
294 						for (p = tmp; *p; ) {
295 							Xcheck(ws, wp);
296 							*wp++ = CHAR;
297 							*wp++ = *p++;
298 						}
299 						afree(tmp, ATEMP);
300 						break;
301 					} else {
302 						Source *s;
303 
304 						s = pushs(SREREAD,
305 						    source->areap);
306 						s->start = s->str =
307 						    s->u.freeme = tmp;
308 						s->next = source;
309 						source = s;
310 					}
311 				}
312 				*wp++ = CHAR;
313 				*wp++ = c;
314 				break;
315 			}
316 			/* FALLTHROUGH */
317  Sbase1:		/* includes *(...|...) pattern (*+?@!) */
318 			if (c == '*' || c == '@' || c == '+' || c == '?' ||
319 			    c == '!') {
320 				c2 = getsc();
321 				if (c2 == '(' /*)*/ ) {
322 					*wp++ = OPAT;
323 					*wp++ = c;
324 					PUSH_STATE(SPATTERN);
325 					break;
326 				}
327 				ungetsc(c2);
328 			}
329 			/* FALLTHROUGH */
330  Sbase2:		/* doesn't include *(...|...) pattern (*+?@!) */
331 			switch (c) {
332 			case '\\':
333  getsc_qchar:
334 				if ((c = getsc())) {
335 					/* trailing \ is lost */
336 					*wp++ = QCHAR;
337 					*wp++ = c;
338 				}
339 				break;
340 			case '\'':
341  open_ssquote:
342 				*wp++ = OQUOTE;
343 				ignore_backslash_newline++;
344 				PUSH_STATE(SSQUOTE);
345 				break;
346 			case '"':
347  open_sdquote:
348 				*wp++ = OQUOTE;
349 				PUSH_STATE(SDQUOTE);
350 				break;
351 			case '$':
352 				/*
353 				 * processing of dollar sign belongs into
354 				 * Subst, except for those which can open
355 				 * a string: $'…' and $"…"
356 				 */
357  subst_dollar_ex:
358 				c = getsc();
359 				switch (c) {
360 				case '"':
361 					goto open_sdquote;
362 				case '\'':
363 					goto open_sequote;
364 				default:
365 					goto SubstS;
366 				}
367 			default:
368 				goto Subst;
369 			}
370 			break;
371 
372  Subst:
373 			switch (c) {
374 			case '\\':
375 				c = getsc();
376 				switch (c) {
377 				case '"':
378 					if ((cf & HEREDOC))
379 						goto heredocquote;
380 					/* FALLTHROUGH */
381 				case '\\':
382 				case '$': case '`':
383  store_qchar:
384 					*wp++ = QCHAR;
385 					*wp++ = c;
386 					break;
387 				default:
388  heredocquote:
389 					Xcheck(ws, wp);
390 					if (c) {
391 						/* trailing \ is lost */
392 						*wp++ = CHAR;
393 						*wp++ = '\\';
394 						*wp++ = CHAR;
395 						*wp++ = c;
396 					}
397 					break;
398 				}
399 				break;
400 			case '$':
401 				c = getsc();
402  SubstS:
403 				if (c == '(') /*)*/ {
404 					c = getsc();
405 					if (c == '(') /*)*/ {
406 						*wp++ = EXPRSUB;
407 						PUSH_STATE(SASPAREN);
408 						statep->nparen = 2;
409 						PUSH_SRETRACE();
410 						*retrace_info->xp++ = '(';
411 					} else {
412 						ungetsc(c);
413  subst_command:
414 						c = COMSUB;
415  subst_command2:
416 						sp = yyrecursive(c);
417 						cz = strlen(sp) + 1;
418 						XcheckN(ws, wp, cz);
419 						*wp++ = c;
420 						memcpy(wp, sp, cz);
421 						wp += cz;
422 					}
423 				} else if (c == '{') /*}*/ {
424 					c = getsc();
425 					if (ctype(c, C_IFSWS)) {
426 						/*
427 						 * non-subenvironment
428 						 * "command" substitution
429 						 */
430 						c = FUNSUB;
431 						goto subst_command2;
432 					}
433 					ungetsc(c);
434 					*wp++ = OSUBST;
435 					*wp++ = '{'; /*}*/
436 					wp = get_brace_var(&ws, wp);
437 					c = getsc();
438 					/* allow :# and :% (ksh88 compat) */
439 					if (c == ':') {
440 						*wp++ = CHAR;
441 						*wp++ = c;
442 						c = getsc();
443 						if (c == ':') {
444 							*wp++ = CHAR;
445 							*wp++ = '0';
446 							*wp++ = ADELIM;
447 							*wp++ = ':';
448 							PUSH_STATE(SBRACE);
449 							PUSH_STATE(SADELIM);
450 							statep->ls_adelim.delimiter = ':';
451 							statep->ls_adelim.num = 1;
452 							statep->nparen = 0;
453 							break;
454 						} else if (ksh_isdigit(c) ||
455 						    c == '('/*)*/ || c == ' ' ||
456 						    /*XXX what else? */
457 						    c == '$') {
458 							/* substring subst. */
459 							if (c != ' ') {
460 								*wp++ = CHAR;
461 								*wp++ = ' ';
462 							}
463 							ungetsc(c);
464 							PUSH_STATE(SBRACE);
465 							PUSH_STATE(SADELIM);
466 							statep->ls_adelim.delimiter = ':';
467 							statep->ls_adelim.num = 2;
468 							statep->nparen = 0;
469 							break;
470 						}
471 					} else if (c == '/') {
472 						*wp++ = CHAR;
473 						*wp++ = c;
474 						if ((c = getsc()) == '/') {
475 							*wp++ = ADELIM;
476 							*wp++ = c;
477 						} else
478 							ungetsc(c);
479 						PUSH_STATE(SBRACE);
480 						PUSH_STATE(SADELIM);
481 						statep->ls_adelim.delimiter = '/';
482 						statep->ls_adelim.num = 1;
483 						statep->nparen = 0;
484 						break;
485 					}
486 					/*
487 					 * If this is a trim operation,
488 					 * treat (,|,) specially in STBRACE.
489 					 */
490 					if (ctype(c, C_SUBOP2)) {
491 						ungetsc(c);
492 						if (Flag(FSH))
493 							PUSH_STATE(STBRACEBOURNE);
494 						else
495 							PUSH_STATE(STBRACEKORN);
496 					} else {
497 						ungetsc(c);
498 						if (state == SDQUOTE)
499 							PUSH_STATE(SQBRACE);
500 						else
501 							PUSH_STATE(SBRACE);
502 					}
503 				} else if (ksh_isalphx(c)) {
504 					*wp++ = OSUBST;
505 					*wp++ = 'X';
506 					do {
507 						Xcheck(ws, wp);
508 						*wp++ = c;
509 						c = getsc();
510 					} while (ksh_isalnux(c));
511 					*wp++ = '\0';
512 					*wp++ = CSUBST;
513 					*wp++ = 'X';
514 					ungetsc(c);
515 				} else if (ctype(c, C_VAR1 | C_DIGIT)) {
516 					Xcheck(ws, wp);
517 					*wp++ = OSUBST;
518 					*wp++ = 'X';
519 					*wp++ = c;
520 					*wp++ = '\0';
521 					*wp++ = CSUBST;
522 					*wp++ = 'X';
523 				} else {
524 					*wp++ = CHAR;
525 					*wp++ = '$';
526 					ungetsc(c);
527 				}
528 				break;
529 			case '`':
530  subst_gravis:
531 				PUSH_STATE(SBQUOTE);
532 				*wp++ = COMSUB;
533 				/*
534 				 * Need to know if we are inside double quotes
535 				 * since sh/AT&T-ksh translate the \" to " in
536 				 * "`...\"...`".
537 				 * This is not done in POSIX mode (section
538 				 * 3.2.3, Double Quotes: "The backquote shall
539 				 * retain its special meaning introducing the
540 				 * other form of command substitution (see
541 				 * 3.6.3). The portion of the quoted string
542 				 * from the initial backquote and the
543 				 * characters up to the next backquote that
544 				 * is not preceded by a backslash (having
545 				 * escape characters removed) defines that
546 				 * command whose output replaces `...` when
547 				 * the word is expanded."
548 				 * Section 3.6.3, Command Substitution:
549 				 * "Within the backquoted style of command
550 				 * substitution, backslash shall retain its
551 				 * literal meaning, except when followed by
552 				 * $ ` \.").
553 				 */
554 				statep->ls_bool = false;
555 				s2 = statep;
556 				base = state_info.base;
557 				while (/* CONSTCOND */ 1) {
558 					for (; s2 != base; s2--) {
559 						if (s2->type == SDQUOTE) {
560 							statep->ls_bool = true;
561 							break;
562 						}
563 					}
564 					if (s2 != base)
565 						break;
566 					if (!(s2 = s2->ls_base))
567 						break;
568 					base = s2-- - STATE_BSIZE;
569 				}
570 				break;
571 			case QCHAR:
572 				if (cf & LQCHAR) {
573 					*wp++ = QCHAR;
574 					*wp++ = getsc();
575 					break;
576 				}
577 				/* FALLTHROUGH */
578 			default:
579  store_char:
580 				*wp++ = CHAR;
581 				*wp++ = c;
582 			}
583 			break;
584 
585 		case SEQUOTE:
586 			if (c == '\'') {
587 				POP_STATE();
588 				*wp++ = CQUOTE;
589 				ignore_backslash_newline--;
590 			} else if (c == '\\') {
591 				if ((c2 = unbksl(true, s_get, s_put)) == -1)
592 					c2 = s_get();
593 				if (c2 == 0)
594 					statep->ls_bool = true;
595 				if (!statep->ls_bool) {
596 					char ts[4];
597 
598 					if ((unsigned int)c2 < 0x100) {
599 						*wp++ = QCHAR;
600 						*wp++ = c2;
601 					} else {
602 						cz = utf_wctomb(ts, c2 - 0x100);
603 						ts[cz] = 0;
604 						for (cz = 0; ts[cz]; ++cz) {
605 							*wp++ = QCHAR;
606 							*wp++ = ts[cz];
607 						}
608 					}
609 				}
610 			} else if (!statep->ls_bool) {
611 				*wp++ = QCHAR;
612 				*wp++ = c;
613 			}
614 			break;
615 
616 		case SSQUOTE:
617 			if (c == '\'') {
618 				POP_STATE();
619 				*wp++ = CQUOTE;
620 				ignore_backslash_newline--;
621 			} else {
622 				*wp++ = QCHAR;
623 				*wp++ = c;
624 			}
625 			break;
626 
627 		case SDQUOTE:
628 			if (c == '"') {
629 				POP_STATE();
630 				*wp++ = CQUOTE;
631 			} else
632 				goto Subst;
633 			break;
634 
635 		/* $(( ... )) */
636 		case SASPAREN:
637 			if (c == '(')
638 				statep->nparen++;
639 			else if (c == ')') {
640 				statep->nparen--;
641 				if (statep->nparen == 1) {
642 					/* end of EXPRSUB */
643 					POP_SRETRACE();
644 					POP_STATE();
645 
646 					if ((c2 = getsc()) == /*(*/ ')') {
647 						cz = strlen(sp) - 2;
648 						XcheckN(ws, wp, cz);
649 						memcpy(wp, sp + 1, cz);
650 						wp += cz;
651 						afree(sp, ATEMP);
652 						*wp++ = '\0';
653 						break;
654 					} else {
655 						Source *s;
656 
657 						ungetsc(c2);
658 						/*
659 						 * mismatched parenthesis -
660 						 * assume we were really
661 						 * parsing a $(...) expression
662 						 */
663 						--wp;
664 						s = pushs(SREREAD,
665 						    source->areap);
666 						s->start = s->str =
667 						    s->u.freeme = sp;
668 						s->next = source;
669 						source = s;
670 						goto subst_command;
671 					}
672 				}
673 			}
674 			/* reuse existing state machine */
675 			goto Sbase2;
676 
677 		case SQBRACE:
678 			if (c == '\\') {
679 				/*
680 				 * perform POSIX "quote removal" if the back-
681 				 * slash is "special", i.e. same cases as the
682 				 * {case '\\':} in Subst: plus closing brace;
683 				 * in mksh code "quote removal" on '\c' means
684 				 * write QCHAR+c, otherwise CHAR+\+CHAR+c are
685 				 * emitted (in heredocquote:)
686 				 */
687 				if ((c = getsc()) == '"' || c == '\\' ||
688 				    c == '$' || c == '`' || c == /*{*/'}')
689 					goto store_qchar;
690 				goto heredocquote;
691 			}
692 			goto common_SQBRACE;
693 
694 		case SBRACE:
695 			if (c == '\'')
696 				goto open_ssquote;
697 			else if (c == '\\')
698 				goto getsc_qchar;
699  common_SQBRACE:
700 			if (c == '"')
701 				goto open_sdquote;
702 			else if (c == '$')
703 				goto subst_dollar_ex;
704 			else if (c == '`')
705 				goto subst_gravis;
706 			else if (c != /*{*/ '}')
707 				goto store_char;
708 			POP_STATE();
709 			*wp++ = CSUBST;
710 			*wp++ = /*{*/ '}';
711 			break;
712 
713 		/* Same as SBASE, except (,|,) treated specially */
714 		case STBRACEKORN:
715 			if (c == '|')
716 				*wp++ = SPAT;
717 			else if (c == '(') {
718 				*wp++ = OPAT;
719 				/* simile for @ */
720 				*wp++ = ' ';
721 				PUSH_STATE(SPATTERN);
722 			} else /* FALLTHROUGH */
723 		case STBRACEBOURNE:
724 			  if (c == /*{*/ '}') {
725 				POP_STATE();
726 				*wp++ = CSUBST;
727 				*wp++ = /*{*/ '}';
728 			} else
729 				goto Sbase1;
730 			break;
731 
732 		case SBQUOTE:
733 			if (c == '`') {
734 				*wp++ = 0;
735 				POP_STATE();
736 			} else if (c == '\\') {
737 				switch (c = getsc()) {
738 				case 0:
739 					/* trailing \ is lost */
740 					break;
741 				case '\\':
742 				case '$': case '`':
743 					*wp++ = c;
744 					break;
745 				case '"':
746 					if (statep->ls_bool) {
747 						*wp++ = c;
748 						break;
749 					}
750 					/* FALLTHROUGH */
751 				default:
752 					*wp++ = '\\';
753 					*wp++ = c;
754 					break;
755 				}
756 			} else
757 				*wp++ = c;
758 			break;
759 
760 		/* ONEWORD */
761 		case SWORD:
762 			goto Subst;
763 
764 		/* LETEXPR: (( ... )) */
765 		case SLETPAREN:
766 			if (c == /*(*/ ')') {
767 				if (statep->nparen > 0)
768 					--statep->nparen;
769 				else if ((c2 = getsc()) == /*(*/ ')') {
770 					c = 0;
771 					*wp++ = CQUOTE;
772 					goto Done;
773 				} else {
774 					Source *s;
775 
776 					ungetsc(c2);
777 					/*
778 					 * mismatched parenthesis -
779 					 * assume we were really
780 					 * parsing a (...) expression
781 					 */
782 					*wp = EOS;
783 					sp = Xstring(ws, wp);
784 					dp = wdstrip(sp, WDS_KEEPQ);
785 					s = pushs(SREREAD, source->areap);
786 					s->start = s->str = s->u.freeme = dp;
787 					s->next = source;
788 					source = s;
789 					return ('('/*)*/);
790 				}
791 			} else if (c == '(')
792 				/*
793 				 * parentheses inside quotes and
794 				 * backslashes are lost, but AT&T ksh
795 				 * doesn't count them either
796 				 */
797 				++statep->nparen;
798 			goto Sbase2;
799 
800 		/* <<, <<-, <<< delimiter */
801 		case SHEREDELIM:
802 			/*
803 			 * here delimiters need a special case since
804 			 * $ and `...` are not to be treated specially
805 			 */
806 			switch (c) {
807 			case '\\':
808 				if ((c = getsc())) {
809 					/* trailing \ is lost */
810 					*wp++ = QCHAR;
811 					*wp++ = c;
812 				}
813 				break;
814 			case '\'':
815 				goto open_ssquote;
816 			case '$':
817 				if ((c2 = getsc()) == '\'') {
818  open_sequote:
819 					*wp++ = OQUOTE;
820 					ignore_backslash_newline++;
821 					PUSH_STATE(SEQUOTE);
822 					statep->ls_bool = false;
823 					break;
824 				} else if (c2 == '"') {
825 					/* FALLTHROUGH */
826 			case '"':
827 					state = statep->type = SHEREDQUOTE;
828 					PUSH_SRETRACE();
829 					break;
830 				}
831 				ungetsc(c2);
832 				/* FALLTHROUGH */
833 			default:
834 				*wp++ = CHAR;
835 				*wp++ = c;
836 			}
837 			break;
838 
839 		/* " in <<, <<-, <<< delimiter */
840 		case SHEREDQUOTE:
841 			if (c != '"')
842 				goto Subst;
843 			POP_SRETRACE();
844 			dp = strnul(sp) - 1;
845 			/* remove the trailing double quote */
846 			*dp = '\0';
847 			/* store the quoted string */
848 			*wp++ = OQUOTE;
849 			XcheckN(ws, wp, (dp - sp));
850 			dp = sp;
851 			while ((c = *dp++)) {
852 				if (c == '\\') {
853 					switch ((c = *dp++)) {
854 					case '\\':
855 					case '"':
856 					case '$':
857 					case '`':
858 						break;
859 					default:
860 						*wp++ = CHAR;
861 						*wp++ = '\\';
862 						break;
863 					}
864 				}
865 				*wp++ = CHAR;
866 				*wp++ = c;
867 			}
868 			afree(sp, ATEMP);
869 			*wp++ = CQUOTE;
870 			state = statep->type = SHEREDELIM;
871 			break;
872 
873 		/* in *(...|...) pattern (*+?@!) */
874 		case SPATTERN:
875 			if (c == /*(*/ ')') {
876 				*wp++ = CPAT;
877 				POP_STATE();
878 			} else if (c == '|') {
879 				*wp++ = SPAT;
880 			} else if (c == '(') {
881 				*wp++ = OPAT;
882 				/* simile for @ */
883 				*wp++ = ' ';
884 				PUSH_STATE(SPATTERN);
885 			} else
886 				goto Sbase1;
887 			break;
888 		}
889 	}
890  Done:
891 	Xcheck(ws, wp);
892 	if (statep != &states[1])
893 		/* XXX figure out what is missing */
894 		yyerror("no closing quote\n");
895 
896 	/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
897 	if (state == SHEREDELIM)
898 		state = SBASE;
899 
900 	dp = Xstring(ws, wp);
901 	if ((c == '<' || c == '>' || c == '&') && state == SBASE) {
902 		struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
903 
904 		if (Xlength(ws, wp) == 0)
905 			iop->unit = c == '<' ? 0 : 1;
906 		else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) {
907 			if (dp[c2] != CHAR)
908 				goto no_iop;
909 			if (!ksh_isdigit(dp[c2 + 1]))
910 				goto no_iop;
911 			iop->unit = (iop->unit * 10) + dp[c2 + 1] - '0';
912 		}
913 
914 		if (iop->unit >= FDBASE)
915 			goto no_iop;
916 
917 		if (c == '&') {
918 			if ((c2 = getsc()) != '>') {
919 				ungetsc(c2);
920 				goto no_iop;
921 			}
922 			c = c2;
923 			iop->flag = IOBASH;
924 		} else
925 			iop->flag = 0;
926 
927 		c2 = getsc();
928 		/* <<, >>, <> are ok, >< is not */
929 		if (c == c2 || (c == '<' && c2 == '>')) {
930 			iop->flag |= c == c2 ?
931 			    (c == '>' ? IOCAT : IOHERE) : IORDWR;
932 			if (iop->flag == IOHERE) {
933 				if ((c2 = getsc()) == '-') {
934 					iop->flag |= IOSKIP;
935 					c2 = getsc();
936 				} else if (c2 == '<')
937 					iop->flag |= IOHERESTR;
938 				ungetsc(c2);
939 				if (c2 == '\n')
940 					iop->flag |= IONDELIM;
941 			}
942 		} else if (c2 == '&')
943 			iop->flag |= IODUP | (c == '<' ? IORDUP : 0);
944 		else {
945 			iop->flag |= c == '>' ? IOWRITE : IOREAD;
946 			if (c == '>' && c2 == '|')
947 				iop->flag |= IOCLOB;
948 			else
949 				ungetsc(c2);
950 		}
951 
952 		iop->name = NULL;
953 		iop->delim = NULL;
954 		iop->heredoc = NULL;
955 		/* free word */
956 		Xfree(ws, wp);
957 		yylval.iop = iop;
958 		return (REDIR);
959  no_iop:
960 		afree(iop, ATEMP);
961 	}
962 
963 	if (wp == dp && state == SBASE) {
964 		/* free word */
965 		Xfree(ws, wp);
966 		/* no word, process LEX1 character */
967 		if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) {
968 			if ((c2 = getsc()) == c)
969 				c = (c == ';') ? BREAK :
970 				    (c == '|') ? LOGOR :
971 				    (c == '&') ? LOGAND :
972 				    /* c == '(' ) */ MDPAREN;
973 			else if (c == '|' && c2 == '&')
974 				c = COPROC;
975 			else if (c == ';' && c2 == '|')
976 				c = BRKEV;
977 			else if (c == ';' && c2 == '&')
978 				c = BRKFT;
979 			else
980 				ungetsc(c2);
981 #ifndef MKSH_SMALL
982 			if (c == BREAK) {
983 				if ((c2 = getsc()) == '&')
984 					c = BRKEV;
985 				else
986 					ungetsc(c2);
987 			}
988 #endif
989 		} else if (c == '\n') {
990 			gethere(false);
991 			if (cf & CONTIN)
992 				goto Again;
993 		} else if (c == '\0')
994 			/* need here strings at EOF */
995 			gethere(true);
996 		return (c);
997 	}
998 
999 	/* terminate word */
1000 	*wp++ = EOS;
1001 	yylval.cp = Xclose(ws, wp);
1002 	if (state == SWORD || state == SLETPAREN
1003 	    /* XXX ONEWORD? */)
1004 		return (LWORD);
1005 
1006 	/* unget terminator */
1007 	ungetsc(c);
1008 
1009 	/*
1010 	 * note: the alias-vs-function code below depends on several
1011 	 * interna: starting from here, source->str is not modified;
1012 	 * the way getsc() and ungetsc() operate; etc.
1013 	 */
1014 
1015 	/* copy word to unprefixed string ident */
1016 	sp = yylval.cp;
1017 	dp = ident;
1018 	if ((cf & HEREDELIM) && (sp[1] == '<'))
1019 		while ((dp - ident) < IDENT) {
1020 			if ((c = *sp++) == CHAR)
1021 				*dp++ = *sp++;
1022 			else if ((c != OQUOTE) && (c != CQUOTE))
1023 				break;
1024 		}
1025 	else
1026 		while ((dp - ident) < IDENT && (c = *sp++) == CHAR)
1027 			*dp++ = *sp++;
1028 	/* Make sure the ident array stays '\0' padded */
1029 	memset(dp, 0, (ident + IDENT) - dp + 1);
1030 	if (c != EOS)
1031 		/* word is not unquoted */
1032 		*ident = '\0';
1033 
1034 	if (*ident != '\0' && (cf & (KEYWORD | ALIAS))) {
1035 		struct tbl *p;
1036 		uint32_t h = hash(ident);
1037 
1038 		if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
1039 		    (!(cf & ESACONLY) || p->val.i == ESAC ||
1040 		    p->val.i == /*{*/ '}')) {
1041 			afree(yylval.cp, ATEMP);
1042 			return (p->val.i);
1043 		}
1044 		if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
1045 		    (p->flag & ISSET)) {
1046 			/*
1047 			 * this still points to the same character as the
1048 			 * ungetsc'd terminator from above
1049 			 */
1050 			const char *cp = source->str;
1051 
1052 			/* prefer POSIX but not Korn functions over aliases */
1053 			while (*cp == ' ' || *cp == '\t')
1054 				/*
1055 				 * this is like getsc() without skipping
1056 				 * over Source boundaries (including not
1057 				 * parsing ungetsc'd characters that got
1058 				 * pushed into an SREREAD) which is what
1059 				 * we want here anyway: find out whether
1060 				 * the alias name is followed by a POSIX
1061 				 * function definition (only the opening
1062 				 * parenthesis is checked though)
1063 				 */
1064 				++cp;
1065 			/* prefer functions over aliases */
1066 			if (cp[0] != '(' || cp[1] != ')') {
1067 				Source *s = source;
1068 
1069 				while (s && (s->flags & SF_HASALIAS))
1070 					if (s->u.tblp == p)
1071 						return (LWORD);
1072 					else
1073 						s = s->next;
1074 				/* push alias expansion */
1075 				s = pushs(SALIAS, source->areap);
1076 				s->start = s->str = p->val.s;
1077 				s->u.tblp = p;
1078 				s->flags |= SF_HASALIAS;
1079 				s->next = source;
1080 				if (source->type == SEOF) {
1081 					/* prevent infinite recursion at EOS */
1082 					source->u.tblp = p;
1083 					source->flags |= SF_HASALIAS;
1084 				}
1085 				source = s;
1086 				afree(yylval.cp, ATEMP);
1087 				goto Again;
1088 			}
1089 		}
1090 	}
1091 
1092 	return (LWORD);
1093 }
1094 
1095 static void
gethere(bool iseof)1096 gethere(bool iseof)
1097 {
1098 	struct ioword **p;
1099 
1100 	for (p = heres; p < herep; p++)
1101 		if (iseof && !((*p)->flag & IOHERESTR))
1102 			/* only here strings at EOF */
1103 			return;
1104 		else
1105 			readhere(*p);
1106 	herep = heres;
1107 }
1108 
1109 /*
1110  * read "<<word" text into temp file
1111  */
1112 
1113 static void
readhere(struct ioword * iop)1114 readhere(struct ioword *iop)
1115 {
1116 	int c;
1117 	const char *eof, *eofp;
1118 	XString xs;
1119 	char *xp;
1120 	int xpos;
1121 
1122 	if (iop->flag & IOHERESTR) {
1123 		/* process the here string */
1124 		iop->heredoc = xp = evalstr(iop->delim, DOBLANK);
1125 		xpos = strlen(xp) - 1;
1126 		memmove(xp, xp + 1, xpos);
1127 		xp[xpos] = '\n';
1128 		return;
1129 	}
1130 
1131 	eof = iop->flag & IONDELIM ? "<<" : evalstr(iop->delim, 0);
1132 
1133 	if (!(iop->flag & IOEVAL))
1134 		ignore_backslash_newline++;
1135 
1136 	Xinit(xs, xp, 256, ATEMP);
1137 
1138  heredoc_read_line:
1139 	/* beginning of line */
1140 	eofp = eof;
1141 	xpos = Xsavepos(xs, xp);
1142 	if (iop->flag & IOSKIP) {
1143 		/* skip over leading tabs */
1144 		while ((c = getsc()) == '\t')
1145 			/* nothing */;
1146 		goto heredoc_parse_char;
1147 	}
1148  heredoc_read_char:
1149 	c = getsc();
1150  heredoc_parse_char:
1151 	/* compare with here document marker */
1152 	if (!*eofp) {
1153 		/* end of here document marker, what to do? */
1154 		switch (c) {
1155 		case /*(*/ ')':
1156 			if (!subshell_nesting_type)
1157 				/*-
1158 				 * not allowed outside $(...) or (...)
1159 				 * => mismatch
1160 				 */
1161 				break;
1162 			/* allow $(...) or (...) to close here */
1163 			ungetsc(/*(*/ ')');
1164 			/* FALLTHROUGH */
1165 		case 0:
1166 			/*
1167 			 * Allow EOF here to commands without trailing
1168 			 * newlines (mksh -c '...') will work as well.
1169 			 */
1170 		case '\n':
1171 			/* Newline terminates here document marker */
1172 			goto heredoc_found_terminator;
1173 		}
1174 	} else if (c == *eofp++)
1175 		/* store; then read and compare next character */
1176 		goto heredoc_store_and_loop;
1177 	/* nope, mismatch; read until end of line */
1178 	while (c != '\n') {
1179 		if (!c)
1180 			/* oops, reached EOF */
1181 			yyerror("%s '%s' unclosed\n", "here document", eof);
1182 		/* store character */
1183 		Xcheck(xs, xp);
1184 		Xput(xs, xp, c);
1185 		/* read next character */
1186 		c = getsc();
1187 	}
1188 	/* we read a newline as last character */
1189  heredoc_store_and_loop:
1190 	/* store character */
1191 	Xcheck(xs, xp);
1192 	Xput(xs, xp, c);
1193 	if (c == '\n')
1194 		goto heredoc_read_line;
1195 	goto heredoc_read_char;
1196 
1197  heredoc_found_terminator:
1198 	/* jump back to saved beginning of line */
1199 	xp = Xrestpos(xs, xp, xpos);
1200 	/* terminate, close and store */
1201 	Xput(xs, xp, '\0');
1202 	iop->heredoc = Xclose(xs, xp);
1203 
1204 	if (!(iop->flag & IOEVAL))
1205 		ignore_backslash_newline--;
1206 }
1207 
1208 void
yyerror(const char * fmt,...)1209 yyerror(const char *fmt, ...)
1210 {
1211 	va_list va;
1212 
1213 	/* pop aliases and re-reads */
1214 	while (source->type == SALIAS || source->type == SREREAD)
1215 		source = source->next;
1216 	/* zap pending input */
1217 	source->str = null;
1218 
1219 	error_prefix(true);
1220 	va_start(va, fmt);
1221 	shf_vfprintf(shl_out, fmt, va);
1222 	va_end(va);
1223 	errorfz();
1224 }
1225 
1226 /*
1227  * input for yylex with alias expansion
1228  */
1229 
1230 Source *
pushs(int type,Area * areap)1231 pushs(int type, Area *areap)
1232 {
1233 	Source *s;
1234 
1235 	s = alloc(sizeof(Source), areap);
1236 	memset(s, 0, sizeof(Source));
1237 	s->type = type;
1238 	s->str = null;
1239 	s->areap = areap;
1240 	if (type == SFILE || type == SSTDIN)
1241 		XinitN(s->xs, 256, s->areap);
1242 	return (s);
1243 }
1244 
1245 static int
getsc_uu(void)1246 getsc_uu(void)
1247 {
1248 	Source *s = source;
1249 	int c;
1250 
1251 	while ((c = *s->str++) == 0) {
1252 		/* return 0 for EOF by default */
1253 		s->str = NULL;
1254 		switch (s->type) {
1255 		case SEOF:
1256 			s->str = null;
1257 			return (0);
1258 
1259 		case SSTDIN:
1260 		case SFILE:
1261 			getsc_line(s);
1262 			break;
1263 
1264 		case SWSTR:
1265 			break;
1266 
1267 		case SSTRING:
1268 		case SSTRINGCMDLINE:
1269 			break;
1270 
1271 		case SWORDS:
1272 			s->start = s->str = *s->u.strv++;
1273 			s->type = SWORDSEP;
1274 			break;
1275 
1276 		case SWORDSEP:
1277 			if (*s->u.strv == NULL) {
1278 				s->start = s->str = "\n";
1279 				s->type = SEOF;
1280 			} else {
1281 				s->start = s->str = " ";
1282 				s->type = SWORDS;
1283 			}
1284 			break;
1285 
1286 		case SALIAS:
1287 			if (s->flags & SF_ALIASEND) {
1288 				/* pass on an unused SF_ALIAS flag */
1289 				source = s->next;
1290 				source->flags |= s->flags & SF_ALIAS;
1291 				s = source;
1292 			} else if (*s->u.tblp->val.s &&
1293 			    (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) {
1294 				/* pop source stack */
1295 				source = s = s->next;
1296 				/*
1297 				 * Note that this alias ended with a
1298 				 * space, enabling alias expansion on
1299 				 * the following word.
1300 				 */
1301 				s->flags |= SF_ALIAS;
1302 			} else {
1303 				/*
1304 				 * At this point, we need to keep the current
1305 				 * alias in the source list so recursive
1306 				 * aliases can be detected and we also need to
1307 				 * return the next character. Do this by
1308 				 * temporarily popping the alias to get the
1309 				 * next character and then put it back in the
1310 				 * source list with the SF_ALIASEND flag set.
1311 				 */
1312 				/* pop source stack */
1313 				source = s->next;
1314 				source->flags |= s->flags & SF_ALIAS;
1315 				c = getsc_uu();
1316 				if (c) {
1317 					s->flags |= SF_ALIASEND;
1318 					s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1319 					s->start = s->str = s->ugbuf;
1320 					s->next = source;
1321 					source = s;
1322 				} else {
1323 					s = source;
1324 					/* avoid reading EOF twice */
1325 					s->str = NULL;
1326 					break;
1327 				}
1328 			}
1329 			continue;
1330 
1331 		case SREREAD:
1332 			if (s->start != s->ugbuf)
1333 				/* yuck */
1334 				afree(s->u.freeme, ATEMP);
1335 			source = s = s->next;
1336 			continue;
1337 		}
1338 		if (s->str == NULL) {
1339 			s->type = SEOF;
1340 			s->start = s->str = null;
1341 			return ('\0');
1342 		}
1343 		if (s->flags & SF_ECHO) {
1344 			shf_puts(s->str, shl_out);
1345 			shf_flush(shl_out);
1346 		}
1347 	}
1348 	return (c);
1349 }
1350 
1351 static void
getsc_line(Source * s)1352 getsc_line(Source *s)
1353 {
1354 	char *xp = Xstring(s->xs, xp), *cp;
1355 	bool interactive = Flag(FTALKING) && s->type == SSTDIN;
1356 	bool have_tty = tobool(interactive && (s->flags & SF_TTY));
1357 
1358 	/* Done here to ensure nothing odd happens when a timeout occurs */
1359 	XcheckN(s->xs, xp, LINE);
1360 	*xp = '\0';
1361 	s->start = s->str = xp;
1362 
1363 	if (have_tty && ksh_tmout) {
1364 		ksh_tmout_state = TMOUT_READING;
1365 		alarm(ksh_tmout);
1366 	}
1367 	if (interactive)
1368 		change_winsz();
1369 #ifndef MKSH_NO_CMDLINE_EDITING
1370 	if (have_tty && (
1371 #if !MKSH_S_NOVI
1372 	    Flag(FVI) ||
1373 #endif
1374 	    Flag(FEMACS) || Flag(FGMACS))) {
1375 		int nread;
1376 
1377 		nread = x_read(xp, LINE);
1378 		if (nread < 0)
1379 			/* read error */
1380 			nread = 0;
1381 		xp[nread] = '\0';
1382 		xp += nread;
1383 	} else
1384 #endif
1385 	  {
1386 		if (interactive)
1387 			pprompt(prompt, 0);
1388 		else
1389 			s->line++;
1390 
1391 		while (/* CONSTCOND */ 1) {
1392 			char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1393 
1394 			if (!p && shf_error(s->u.shf) &&
1395 			    shf_errno(s->u.shf) == EINTR) {
1396 				shf_clearerr(s->u.shf);
1397 				if (trap)
1398 					runtraps(0);
1399 				continue;
1400 			}
1401 			if (!p || (xp = p, xp[-1] == '\n'))
1402 				break;
1403 			/* double buffer size */
1404 			/* move past NUL so doubling works... */
1405 			xp++;
1406 			XcheckN(s->xs, xp, Xlength(s->xs, xp));
1407 			/* ...and move back again */
1408 			xp--;
1409 		}
1410 		/*
1411 		 * flush any unwanted input so other programs/builtins
1412 		 * can read it. Not very optimal, but less error prone
1413 		 * than flushing else where, dealing with redirections,
1414 		 * etc.
1415 		 * TODO: reduce size of shf buffer (~128?) if SSTDIN
1416 		 */
1417 		if (s->type == SSTDIN)
1418 			shf_flush(s->u.shf);
1419 	}
1420 	/*
1421 	 * XXX: temporary kludge to restore source after a
1422 	 * trap may have been executed.
1423 	 */
1424 	source = s;
1425 	if (have_tty && ksh_tmout) {
1426 		ksh_tmout_state = TMOUT_EXECUTING;
1427 		alarm(0);
1428 	}
1429 	cp = Xstring(s->xs, xp);
1430 	s->start = s->str = cp;
1431 	strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1432 	/* Note: if input is all nulls, this is not eof */
1433 	if (Xlength(s->xs, xp) == 0) {
1434 		/* EOF */
1435 		if (s->type == SFILE)
1436 			shf_fdclose(s->u.shf);
1437 		s->str = NULL;
1438 	} else if (interactive && *s->str &&
1439 	    (cur_prompt != PS1 || !ctype(*s->str, C_IFS | C_IFSWS))) {
1440 		histsave(&s->line, s->str, true, true);
1441 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
1442 	} else if (interactive && cur_prompt == PS1) {
1443 		cp = Xstring(s->xs, xp);
1444 		while (*cp && ctype(*cp, C_IFSWS))
1445 			++cp;
1446 		if (!*cp)
1447 			histsync();
1448 #endif
1449 	}
1450 	if (interactive)
1451 		set_prompt(PS2, NULL);
1452 }
1453 
1454 void
set_prompt(int to,Source * s)1455 set_prompt(int to, Source *s)
1456 {
1457 	cur_prompt = to;
1458 
1459 	switch (to) {
1460 	/* command */
1461 	case PS1:
1462 		/*
1463 		 * Substitute ! and !! here, before substitutions are done
1464 		 * so ! in expanded variables are not expanded.
1465 		 * NOTE: this is not what AT&T ksh does (it does it after
1466 		 * substitutions, POSIX doesn't say which is to be done.
1467 		 */
1468 		{
1469 			struct shf *shf;
1470 			char * volatile ps1;
1471 			Area *saved_atemp;
1472 
1473 			ps1 = str_val(global("PS1"));
1474 			shf = shf_sopen(NULL, strlen(ps1) * 2,
1475 			    SHF_WR | SHF_DYNAMIC, NULL);
1476 			while (*ps1)
1477 				if (*ps1 != '!' || *++ps1 == '!')
1478 					shf_putchar(*ps1++, shf);
1479 				else
1480 					shf_fprintf(shf, "%d",
1481 						s ? s->line + 1 : 0);
1482 			ps1 = shf_sclose(shf);
1483 			saved_atemp = ATEMP;
1484 			newenv(E_ERRH);
1485 			if (kshsetjmp(e->jbuf)) {
1486 				prompt = safe_prompt;
1487 				/*
1488 				 * Don't print an error - assume it has already
1489 				 * been printed. Reason is we may have forked
1490 				 * to run a command and the child may be
1491 				 * unwinding its stack through this code as it
1492 				 * exits.
1493 				 */
1494 			} else {
1495 				char *cp = substitute(ps1, 0);
1496 				strdupx(prompt, cp, saved_atemp);
1497 			}
1498 			quitenv(NULL);
1499 		}
1500 		break;
1501 	/* command continuation */
1502 	case PS2:
1503 		prompt = str_val(global("PS2"));
1504 		break;
1505 	}
1506 }
1507 
1508 static int
dopprompt(const char * cp,int ntruncate,bool doprint)1509 dopprompt(const char *cp, int ntruncate, bool doprint)
1510 {
1511 	int columns = 0, lines = 0;
1512 	bool indelimit = false;
1513 	char delimiter = 0;
1514 
1515 	/*
1516 	 * Undocumented AT&T ksh feature:
1517 	 * If the second char in the prompt string is \r then the first
1518 	 * char is taken to be a non-printing delimiter and any chars
1519 	 * between two instances of the delimiter are not considered to
1520 	 * be part of the prompt length
1521 	 */
1522 	if (*cp && cp[1] == '\r') {
1523 		delimiter = *cp;
1524 		cp += 2;
1525 	}
1526 	for (; *cp; cp++) {
1527 		if (indelimit && *cp != delimiter)
1528 			;
1529 		else if (*cp == '\n' || *cp == '\r') {
1530 			lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
1531 			columns = 0;
1532 		} else if (*cp == '\t') {
1533 			columns = (columns | 7) + 1;
1534 		} else if (*cp == '\b') {
1535 			if (columns > 0)
1536 				columns--;
1537 		} else if (*cp == delimiter)
1538 			indelimit = !indelimit;
1539 		else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
1540 			const char *cp2;
1541 			columns += utf_widthadj(cp, &cp2);
1542 			if (doprint && (indelimit ||
1543 			    (ntruncate < (x_cols * lines + columns))))
1544 				shf_write(cp, cp2 - cp, shl_out);
1545 			cp = cp2 - /* loop increment */ 1;
1546 			continue;
1547 		} else
1548 			columns++;
1549 		if (doprint && (*cp != delimiter) &&
1550 		    (indelimit || (ntruncate < (x_cols * lines + columns))))
1551 			shf_putc(*cp, shl_out);
1552 	}
1553 	if (doprint)
1554 		shf_flush(shl_out);
1555 	return (x_cols * lines + columns);
1556 }
1557 
1558 
1559 void
pprompt(const char * cp,int ntruncate)1560 pprompt(const char *cp, int ntruncate)
1561 {
1562 	dopprompt(cp, ntruncate, true);
1563 }
1564 
1565 int
promptlen(const char * cp)1566 promptlen(const char *cp)
1567 {
1568 	return (dopprompt(cp, 0, false));
1569 }
1570 
1571 /*
1572  * Read the variable part of a ${...} expression (i.e. up to but not
1573  * including the :[-+?=#%] or close-brace).
1574  */
1575 static char *
get_brace_var(XString * wsp,char * wp)1576 get_brace_var(XString *wsp, char *wp)
1577 {
1578 	char c;
1579 	enum parse_state {
1580 		PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1581 		PS_NUMBER, PS_VAR1
1582 	} state = PS_INITIAL;
1583 
1584 	while (/* CONSTCOND */ 1) {
1585 		c = getsc();
1586 		/* State machine to figure out where the variable part ends. */
1587 		switch (state) {
1588 		case PS_INITIAL:
1589 			if (c == '#' || c == '!' || c == '%') {
1590 				state = PS_SAW_HASH;
1591 				break;
1592 			}
1593 			/* FALLTHROUGH */
1594 		case PS_SAW_HASH:
1595 			if (ksh_isalphx(c))
1596 				state = PS_IDENT;
1597 			else if (ksh_isdigit(c))
1598 				state = PS_NUMBER;
1599 			else if (c == '#') {
1600 				if (state == PS_SAW_HASH) {
1601 					char c2;
1602 
1603 					c2 = getsc();
1604 					ungetsc(c2);
1605 					if (c2 != /*{*/ '}') {
1606 						ungetsc(c);
1607 						goto out;
1608 					}
1609 				}
1610 				state = PS_VAR1;
1611 			} else if (ctype(c, C_VAR1))
1612 				state = PS_VAR1;
1613 			else
1614 				goto out;
1615 			break;
1616 		case PS_IDENT:
1617 			if (!ksh_isalnux(c)) {
1618 				if (c == '[') {
1619 					char *tmp, *p;
1620 
1621 					if (!arraysub(&tmp))
1622 						yyerror("missing ]\n");
1623 					*wp++ = c;
1624 					for (p = tmp; *p; ) {
1625 						Xcheck(*wsp, wp);
1626 						*wp++ = *p++;
1627 					}
1628 					afree(tmp, ATEMP);
1629 					/* the ] */
1630 					c = getsc();
1631 				}
1632 				goto out;
1633 			}
1634 			break;
1635 		case PS_NUMBER:
1636 			if (!ksh_isdigit(c))
1637 				goto out;
1638 			break;
1639 		case PS_VAR1:
1640 			goto out;
1641 		}
1642 		Xcheck(*wsp, wp);
1643 		*wp++ = c;
1644 	}
1645  out:
1646 	/* end of variable part */
1647 	*wp++ = '\0';
1648 	ungetsc(c);
1649 	return (wp);
1650 }
1651 
1652 /*
1653  * Save an array subscript - returns true if matching bracket found, false
1654  * if eof or newline was found.
1655  * (Returned string double null terminated)
1656  */
1657 static bool
arraysub(char ** strp)1658 arraysub(char **strp)
1659 {
1660 	XString ws;
1661 	char *wp, c;
1662 	/* we are just past the initial [ */
1663 	unsigned int depth = 1;
1664 
1665 	Xinit(ws, wp, 32, ATEMP);
1666 
1667 	do {
1668 		c = getsc();
1669 		Xcheck(ws, wp);
1670 		*wp++ = c;
1671 		if (c == '[')
1672 			depth++;
1673 		else if (c == ']')
1674 			depth--;
1675 	} while (depth > 0 && c && c != '\n');
1676 
1677 	*wp++ = '\0';
1678 	*strp = Xclose(ws, wp);
1679 
1680 	return (tobool(depth == 0));
1681 }
1682 
1683 /* Unget a char: handles case when we are already at the start of the buffer */
1684 static void
ungetsc(int c)1685 ungetsc(int c)
1686 {
1687 	struct sretrace_info *rp = retrace_info;
1688 
1689 	if (backslash_skip)
1690 		backslash_skip--;
1691 	/* Don't unget EOF... */
1692 	if (source->str == null && c == '\0')
1693 		return;
1694 	while (rp) {
1695 		if (Xlength(rp->xs, rp->xp))
1696 			rp->xp--;
1697 		rp = rp->next;
1698 	}
1699 	ungetsc_i(c);
1700 }
1701 static void
ungetsc_i(int c)1702 ungetsc_i(int c)
1703 {
1704 	if (source->str > source->start)
1705 		source->str--;
1706 	else {
1707 		Source *s;
1708 
1709 		s = pushs(SREREAD, source->areap);
1710 		s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1711 		s->start = s->str = s->ugbuf;
1712 		s->next = source;
1713 		source = s;
1714 	}
1715 }
1716 
1717 
1718 /* Called to get a char that isn't a \newline sequence. */
1719 static int
getsc_bn(void)1720 getsc_bn(void)
1721 {
1722 	int c, c2;
1723 
1724 	if (ignore_backslash_newline)
1725 		return (o_getsc_u());
1726 
1727 	if (backslash_skip == 1) {
1728 		backslash_skip = 2;
1729 		return (o_getsc_u());
1730 	}
1731 
1732 	backslash_skip = 0;
1733 
1734 	while (/* CONSTCOND */ 1) {
1735 		c = o_getsc_u();
1736 		if (c == '\\') {
1737 			if ((c2 = o_getsc_u()) == '\n')
1738 				/* ignore the \newline; get the next char... */
1739 				continue;
1740 			ungetsc_i(c2);
1741 			backslash_skip = 1;
1742 		}
1743 		return (c);
1744 	}
1745 }
1746 
1747 void
yyskiputf8bom(void)1748 yyskiputf8bom(void)
1749 {
1750 	int c;
1751 
1752 	if ((unsigned char)(c = o_getsc_u()) != 0xEF) {
1753 		ungetsc_i(c);
1754 		return;
1755 	}
1756 	if ((unsigned char)(c = o_getsc_u()) != 0xBB) {
1757 		ungetsc_i(c);
1758 		ungetsc_i(0xEF);
1759 		return;
1760 	}
1761 	if ((unsigned char)(c = o_getsc_u()) != 0xBF) {
1762 		ungetsc_i(c);
1763 		ungetsc_i(0xBB);
1764 		ungetsc_i(0xEF);
1765 		return;
1766 	}
1767 	UTFMODE |= 8;
1768 }
1769 
1770 static Lex_state *
push_state_i(State_info * si,Lex_state * old_end)1771 push_state_i(State_info *si, Lex_state *old_end)
1772 {
1773 	Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP);
1774 
1775 	news[0].ls_base = old_end;
1776 	si->base = &news[0];
1777 	si->end = &news[STATE_BSIZE];
1778 	return (&news[1]);
1779 }
1780 
1781 static Lex_state *
pop_state_i(State_info * si,Lex_state * old_end)1782 pop_state_i(State_info *si, Lex_state *old_end)
1783 {
1784 	Lex_state *old_base = si->base;
1785 
1786 	si->base = old_end->ls_base - STATE_BSIZE;
1787 	si->end = old_end->ls_base;
1788 
1789 	afree(old_base, ATEMP);
1790 
1791 	return (si->base + STATE_BSIZE - 1);
1792 }
1793 
1794 static int
s_get(void)1795 s_get(void)
1796 {
1797 	return (getsc());
1798 }
1799 
1800 static void
s_put(int c)1801 s_put(int c)
1802 {
1803 	ungetsc(c);
1804 }
1805