1 /* $OpenBSD: lex.c,v 1.45 2011/03/09 09:30:39 okan Exp $ */
2
3 /*-
4 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
5 * Thorsten Glaser <tg@mirbsd.org>
6 *
7 * Provided that these terms and disclaimer and all copyright notices
8 * are retained or reproduced in an accompanying document, permission
9 * is granted to deal in this work without restriction, including un-
10 * limited rights to use, publicly perform, distribute, sell, modify,
11 * merge, give away, or sublicence.
12 *
13 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
14 * the utmost extent permitted by applicable law, neither express nor
15 * implied; without malicious intent or gross negligence. In no event
16 * may a licensor, author or contributor be held liable for indirect,
17 * direct, other damage, loss, or other issues arising in any way out
18 * of dealing in the work, even if advised of the possibility of such
19 * damage or existence of a defect, except proven that it results out
20 * of said person's immediate fault when using the work as intended.
21 */
22
23 #include "sh.h"
24
25 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.156 2011/09/07 15:24:16 tg Exp $");
26
27 /*
28 * states while lexing word
29 */
30 #define SBASE 0 /* outside any lexical constructs */
31 #define SWORD 1 /* implicit quoting for substitute() */
32 #define SLETPAREN 2 /* inside (( )), implicit quoting */
33 #define SSQUOTE 3 /* inside '' */
34 #define SDQUOTE 4 /* inside "" */
35 #define SEQUOTE 5 /* inside $'' */
36 #define SBRACE 6 /* inside ${} */
37 #define SQBRACE 7 /* inside "${}" */
38 #define SBQUOTE 8 /* inside `` */
39 #define SASPAREN 9 /* inside $(( )) */
40 #define SHEREDELIM 10 /* parsing <<,<<- delimiter */
41 #define SHEREDQUOTE 11 /* parsing " in <<,<<- delimiter */
42 #define SPATTERN 12 /* parsing *(...|...) pattern (*+?@!) */
43 #define SADELIM 13 /* like SBASE, looking for delimiter */
44 #define SHERESTRING 14 /* parsing <<< string */
45 #define STBRACEKORN 15 /* parsing ${...[#%]...} !FSH */
46 #define STBRACEBOURNE 16 /* parsing ${...[#%]...} FSH */
47 #define SINVALID 255 /* invalid state */
48
49 struct sretrace_info {
50 struct sretrace_info *next;
51 XString xs;
52 char *xp;
53 };
54
55 /*
56 * Structure to keep track of the lexing state and the various pieces of info
57 * needed for each particular state.
58 */
59 typedef struct lex_state {
60 union {
61 /* point to the next state block */
62 struct lex_state *base;
63 /* marks start of state output in output string */
64 int start;
65 /* SBQUOTE: true if in double quotes: "`...`" */
66 /* SEQUOTE: got NUL, ignore rest of string */
67 bool abool;
68 /* SADELIM information */
69 struct {
70 /* character to search for */
71 unsigned char delimiter;
72 /* max. number of delimiters */
73 unsigned char num;
74 } adelim;
75 } u;
76 /* count open parentheses */
77 short nparen;
78 /* type of this state */
79 uint8_t type;
80 } Lex_state;
81 #define ls_base u.base
82 #define ls_start u.start
83 #define ls_bool u.abool
84 #define ls_adelim u.adelim
85
86 typedef struct {
87 Lex_state *base;
88 Lex_state *end;
89 } State_info;
90
91 static void readhere(struct ioword *);
92 static void ungetsc(int);
93 static void ungetsc_(int);
94 static int getsc_uu(void);
95 static void getsc_line(Source *);
96 static int getsc_bn(void);
97 static int s_get(void);
98 static void s_put(int);
99 static char *get_brace_var(XString *, char *);
100 static bool arraysub(char **);
101 static void gethere(bool);
102 static Lex_state *push_state_(State_info *, Lex_state *);
103 static Lex_state *pop_state_(State_info *, Lex_state *);
104
105 static int dopprompt(const char *, int, bool);
106 void yyskiputf8bom(void);
107
108 static int backslash_skip;
109 static int ignore_backslash_newline;
110 static struct sretrace_info *retrace_info;
111 short subshell_nesting_level = 0;
112
113 /* optimised getsc_bn() */
114 #define o_getsc() (*source->str != '\0' && *source->str != '\\' && \
115 !backslash_skip ? *source->str++ : getsc_bn())
116 /* optimised getsc_uu() */
117 #define o_getsc_u() ((*source->str != '\0') ? *source->str++ : getsc_uu())
118
119 /* retrace helper */
120 #define o_getsc_r(carg) { \
121 int cev = (carg); \
122 struct sretrace_info *rp = retrace_info; \
123 \
124 while (rp) { \
125 Xcheck(rp->xs, rp->xp); \
126 *rp->xp++ = cev; \
127 rp = rp->next; \
128 } \
129 \
130 return (cev); \
131 }
132
133 #ifdef MKSH_SMALL
134 static int getsc(void);
135
136 static int
getsc(void)137 getsc(void)
138 {
139 o_getsc_r(o_getsc());
140 }
141 #else
142 static int getsc_r(int);
143
144 static int
getsc_r(int c)145 getsc_r(int c)
146 {
147 o_getsc_r(c);
148 }
149
150 #define getsc() getsc_r(o_getsc())
151 #endif
152
153 #define STATE_BSIZE 8
154
155 #define PUSH_STATE(s) do { \
156 if (++statep == state_info.end) \
157 statep = push_state_(&state_info, statep); \
158 state = statep->type = (s); \
159 } while (/* CONSTCOND */ 0)
160
161 #define POP_STATE() do { \
162 if (--statep == state_info.base) \
163 statep = pop_state_(&state_info, statep); \
164 state = statep->type; \
165 } while (/* CONSTCOND */ 0)
166
167 #define PUSH_SRETRACE() do { \
168 struct sretrace_info *ri; \
169 \
170 statep->ls_start = Xsavepos(ws, wp); \
171 ri = alloc(sizeof(struct sretrace_info), ATEMP); \
172 Xinit(ri->xs, ri->xp, 64, ATEMP); \
173 ri->next = retrace_info; \
174 retrace_info = ri; \
175 } while (/* CONSTCOND */ 0)
176
177 #define POP_SRETRACE() do { \
178 wp = Xrestpos(ws, wp, statep->ls_start); \
179 *retrace_info->xp = '\0'; \
180 sp = Xstring(retrace_info->xs, retrace_info->xp); \
181 dp = (void *)retrace_info; \
182 retrace_info = retrace_info->next; \
183 afree(dp, ATEMP); \
184 } while (/* CONSTCOND */ 0)
185
186 /**
187 * Lexical analyser
188 *
189 * tokens are not regular expressions, they are LL(1).
190 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
191 * hence the state stack. Note "$(...)" are now parsed recursively.
192 */
193
194 int
yylex(int cf)195 yylex(int cf)
196 {
197 Lex_state states[STATE_BSIZE], *statep, *s2, *base;
198 State_info state_info;
199 int c, c2, state;
200 size_t cz;
201 XString ws; /* expandable output word */
202 char *wp; /* output word pointer */
203 char *sp, *dp;
204
205 Again:
206 states[0].type = SINVALID;
207 states[0].ls_base = NULL;
208 statep = &states[1];
209 state_info.base = states;
210 state_info.end = &state_info.base[STATE_BSIZE];
211
212 Xinit(ws, wp, 64, ATEMP);
213
214 backslash_skip = 0;
215 ignore_backslash_newline = 0;
216
217 if (cf & ONEWORD)
218 state = SWORD;
219 else if (cf & LETEXPR) {
220 /* enclose arguments in (double) quotes */
221 *wp++ = OQUOTE;
222 state = SLETPAREN;
223 statep->nparen = 0;
224 } else {
225 /* normal lexing */
226 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
227 while ((c = getsc()) == ' ' || c == '\t')
228 ;
229 if (c == '#') {
230 ignore_backslash_newline++;
231 while ((c = getsc()) != '\0' && c != '\n')
232 ;
233 ignore_backslash_newline--;
234 }
235 ungetsc(c);
236 }
237 if (source->flags & SF_ALIAS) {
238 /* trailing ' ' in alias definition */
239 source->flags &= ~SF_ALIAS;
240 cf |= ALIAS;
241 }
242
243 /* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */
244 statep->type = state;
245
246 /* check for here string */
247 if (state == SHEREDELIM) {
248 c = getsc();
249 if (c == '<') {
250 state = SHERESTRING;
251 while ((c = getsc()) == ' ' || c == '\t')
252 ;
253 ungetsc(c);
254 c = '<';
255 goto accept_nonword;
256 }
257 ungetsc(c);
258 }
259
260 /* collect non-special or quoted characters to form word */
261 while (!((c = getsc()) == 0 ||
262 ((state == SBASE || state == SHEREDELIM || state == SHERESTRING) &&
263 ctype(c, C_LEX1)))) {
264 accept_nonword:
265 Xcheck(ws, wp);
266 switch (state) {
267 case SADELIM:
268 if (c == '(')
269 statep->nparen++;
270 else if (c == ')')
271 statep->nparen--;
272 else if (statep->nparen == 0 &&
273 (c == /*{*/ '}' || c == statep->ls_adelim.delimiter)) {
274 *wp++ = ADELIM;
275 *wp++ = c;
276 if (c == /*{*/ '}' || --statep->ls_adelim.num == 0)
277 POP_STATE();
278 if (c == /*{*/ '}')
279 POP_STATE();
280 break;
281 }
282 /* FALLTHROUGH */
283 case SBASE:
284 if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
285 /* temporary */
286 *wp = EOS;
287 if (is_wdvarname(Xstring(ws, wp), false)) {
288 char *p, *tmp;
289
290 if (arraysub(&tmp)) {
291 *wp++ = CHAR;
292 *wp++ = c;
293 for (p = tmp; *p; ) {
294 Xcheck(ws, wp);
295 *wp++ = CHAR;
296 *wp++ = *p++;
297 }
298 afree(tmp, ATEMP);
299 break;
300 } else {
301 Source *s;
302
303 s = pushs(SREREAD,
304 source->areap);
305 s->start = s->str =
306 s->u.freeme = tmp;
307 s->next = source;
308 source = s;
309 }
310 }
311 *wp++ = CHAR;
312 *wp++ = c;
313 break;
314 }
315 /* FALLTHROUGH */
316 Sbase1: /* includes *(...|...) pattern (*+?@!) */
317 if (c == '*' || c == '@' || c == '+' || c == '?' ||
318 c == '!') {
319 c2 = getsc();
320 if (c2 == '(' /*)*/ ) {
321 *wp++ = OPAT;
322 *wp++ = c;
323 PUSH_STATE(SPATTERN);
324 break;
325 }
326 ungetsc(c2);
327 }
328 /* FALLTHROUGH */
329 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */
330 switch (c) {
331 case '\\':
332 getsc_qchar:
333 if ((c = getsc())) {
334 /* trailing \ is lost */
335 *wp++ = QCHAR;
336 *wp++ = c;
337 }
338 break;
339 case '\'':
340 open_ssquote:
341 *wp++ = OQUOTE;
342 ignore_backslash_newline++;
343 PUSH_STATE(SSQUOTE);
344 break;
345 case '"':
346 open_sdquote:
347 *wp++ = OQUOTE;
348 PUSH_STATE(SDQUOTE);
349 break;
350 default:
351 goto Subst;
352 }
353 break;
354
355 Subst:
356 switch (c) {
357 case '\\':
358 c = getsc();
359 switch (c) {
360 case '"':
361 if ((cf & HEREDOC))
362 goto heredocquote;
363 /* FALLTHROUGH */
364 case '\\':
365 case '$': case '`':
366 store_qchar:
367 *wp++ = QCHAR;
368 *wp++ = c;
369 break;
370 default:
371 heredocquote:
372 Xcheck(ws, wp);
373 if (c) {
374 /* trailing \ is lost */
375 *wp++ = CHAR;
376 *wp++ = '\\';
377 *wp++ = CHAR;
378 *wp++ = c;
379 }
380 break;
381 }
382 break;
383 case '$':
384 subst_dollar:
385 c = getsc();
386 if (c == '(') /*)*/ {
387 c = getsc();
388 if (c == '(') /*)*/ {
389 *wp++ = EXPRSUB;
390 PUSH_STATE(SASPAREN);
391 statep->nparen = 2;
392 PUSH_SRETRACE();
393 *retrace_info->xp++ = '(';
394 } else {
395 ungetsc(c);
396 subst_command:
397 sp = yyrecursive();
398 cz = strlen(sp) + 1;
399 XcheckN(ws, wp, cz);
400 *wp++ = COMSUB;
401 memcpy(wp, sp, cz);
402 wp += cz;
403 }
404 } else if (c == '{') /*}*/ {
405 *wp++ = OSUBST;
406 *wp++ = '{'; /*}*/
407 wp = get_brace_var(&ws, wp);
408 c = getsc();
409 /* allow :# and :% (ksh88 compat) */
410 if (c == ':') {
411 *wp++ = CHAR;
412 *wp++ = c;
413 c = getsc();
414 if (c == ':') {
415 *wp++ = CHAR;
416 *wp++ = '0';
417 *wp++ = ADELIM;
418 *wp++ = ':';
419 PUSH_STATE(SBRACE);
420 PUSH_STATE(SADELIM);
421 statep->ls_adelim.delimiter = ':';
422 statep->ls_adelim.num = 1;
423 statep->nparen = 0;
424 break;
425 } else if (ksh_isdigit(c) ||
426 c == '('/*)*/ || c == ' ' ||
427 /*XXX what else? */
428 c == '$') {
429 /* substring subst. */
430 if (c != ' ') {
431 *wp++ = CHAR;
432 *wp++ = ' ';
433 }
434 ungetsc(c);
435 PUSH_STATE(SBRACE);
436 PUSH_STATE(SADELIM);
437 statep->ls_adelim.delimiter = ':';
438 statep->ls_adelim.num = 2;
439 statep->nparen = 0;
440 break;
441 }
442 } else if (c == '/') {
443 *wp++ = CHAR;
444 *wp++ = c;
445 if ((c = getsc()) == '/') {
446 *wp++ = ADELIM;
447 *wp++ = c;
448 } else
449 ungetsc(c);
450 PUSH_STATE(SBRACE);
451 PUSH_STATE(SADELIM);
452 statep->ls_adelim.delimiter = '/';
453 statep->ls_adelim.num = 1;
454 statep->nparen = 0;
455 break;
456 }
457 /*
458 * If this is a trim operation,
459 * treat (,|,) specially in STBRACE.
460 */
461 if (ctype(c, C_SUBOP2)) {
462 ungetsc(c);
463 if (Flag(FSH))
464 PUSH_STATE(STBRACEBOURNE);
465 else
466 PUSH_STATE(STBRACEKORN);
467 } else {
468 ungetsc(c);
469 if (state == SDQUOTE)
470 PUSH_STATE(SQBRACE);
471 else
472 PUSH_STATE(SBRACE);
473 }
474 } else if (ksh_isalphx(c)) {
475 *wp++ = OSUBST;
476 *wp++ = 'X';
477 do {
478 Xcheck(ws, wp);
479 *wp++ = c;
480 c = getsc();
481 } while (ksh_isalnux(c));
482 *wp++ = '\0';
483 *wp++ = CSUBST;
484 *wp++ = 'X';
485 ungetsc(c);
486 } else if (ctype(c, C_VAR1 | C_DIGIT)) {
487 Xcheck(ws, wp);
488 *wp++ = OSUBST;
489 *wp++ = 'X';
490 *wp++ = c;
491 *wp++ = '\0';
492 *wp++ = CSUBST;
493 *wp++ = 'X';
494 } else if (c == '\'' && (state == SBASE)) {
495 /* XXX which other states are valid? */
496 *wp++ = OQUOTE;
497 ignore_backslash_newline++;
498 PUSH_STATE(SEQUOTE);
499 statep->ls_bool = false;
500 break;
501 } else if (c == '"' && (state == SBASE)) {
502 /* XXX which other states are valid? */
503 goto DEQUOTE;
504 } else {
505 *wp++ = CHAR;
506 *wp++ = '$';
507 DEQUOTE:
508 ungetsc(c);
509 }
510 break;
511 case '`':
512 subst_gravis:
513 PUSH_STATE(SBQUOTE);
514 *wp++ = COMSUB;
515 /*
516 * Need to know if we are inside double quotes
517 * since sh/AT&T-ksh translate the \" to " in
518 * "`...\"...`".
519 * This is not done in POSIX mode (section
520 * 3.2.3, Double Quotes: "The backquote shall
521 * retain its special meaning introducing the
522 * other form of command substitution (see
523 * 3.6.3). The portion of the quoted string
524 * from the initial backquote and the
525 * characters up to the next backquote that
526 * is not preceded by a backslash (having
527 * escape characters removed) defines that
528 * command whose output replaces `...` when
529 * the word is expanded."
530 * Section 3.6.3, Command Substitution:
531 * "Within the backquoted style of command
532 * substitution, backslash shall retain its
533 * literal meaning, except when followed by
534 * $ ` \.").
535 */
536 statep->ls_bool = false;
537 s2 = statep;
538 base = state_info.base;
539 while (/* CONSTCOND */ 1) {
540 for (; s2 != base; s2--) {
541 if (s2->type == SDQUOTE) {
542 statep->ls_bool = true;
543 break;
544 }
545 }
546 if (s2 != base)
547 break;
548 if (!(s2 = s2->ls_base))
549 break;
550 base = s2-- - STATE_BSIZE;
551 }
552 break;
553 case QCHAR:
554 if (cf & LQCHAR) {
555 *wp++ = QCHAR;
556 *wp++ = getsc();
557 break;
558 }
559 /* FALLTHROUGH */
560 default:
561 store_char:
562 *wp++ = CHAR;
563 *wp++ = c;
564 }
565 break;
566
567 case SEQUOTE:
568 if (c == '\'') {
569 POP_STATE();
570 *wp++ = CQUOTE;
571 ignore_backslash_newline--;
572 } else if (c == '\\') {
573 if ((c2 = unbksl(true, s_get, s_put)) == -1)
574 c2 = s_get();
575 if (c2 == 0)
576 statep->ls_bool = true;
577 if (!statep->ls_bool) {
578 char ts[4];
579
580 if ((unsigned int)c2 < 0x100) {
581 *wp++ = QCHAR;
582 *wp++ = c2;
583 } else {
584 cz = utf_wctomb(ts, c2 - 0x100);
585 ts[cz] = 0;
586 for (cz = 0; ts[cz]; ++cz) {
587 *wp++ = QCHAR;
588 *wp++ = ts[cz];
589 }
590 }
591 }
592 } else if (!statep->ls_bool) {
593 *wp++ = QCHAR;
594 *wp++ = c;
595 }
596 break;
597
598 case SSQUOTE:
599 if (c == '\'') {
600 POP_STATE();
601 *wp++ = CQUOTE;
602 ignore_backslash_newline--;
603 } else {
604 *wp++ = QCHAR;
605 *wp++ = c;
606 }
607 break;
608
609 case SDQUOTE:
610 if (c == '"') {
611 POP_STATE();
612 *wp++ = CQUOTE;
613 } else
614 goto Subst;
615 break;
616
617 /* $(( ... )) */
618 case SASPAREN:
619 if (c == '(')
620 statep->nparen++;
621 else if (c == ')') {
622 statep->nparen--;
623 if (statep->nparen == 1) {
624 /* end of EXPRSUB */
625 POP_SRETRACE();
626 POP_STATE();
627
628 if ((c2 = getsc()) == /*(*/ ')') {
629 cz = strlen(sp) - 2;
630 XcheckN(ws, wp, cz);
631 memcpy(wp, sp + 1, cz);
632 wp += cz;
633 afree(sp, ATEMP);
634 *wp++ = '\0';
635 break;
636 } else {
637 Source *s;
638
639 ungetsc(c2);
640 /*
641 * mismatched parenthesis -
642 * assume we were really
643 * parsing a $(...) expression
644 */
645 --wp;
646 s = pushs(SREREAD,
647 source->areap);
648 s->start = s->str =
649 s->u.freeme = sp;
650 s->next = source;
651 source = s;
652 goto subst_command;
653 }
654 }
655 }
656 /* reuse existing state machine */
657 goto Sbase2;
658
659 case SQBRACE:
660 if (c == '\\') {
661 /*
662 * perform POSIX "quote removal" if the back-
663 * slash is "special", i.e. same cases as the
664 * {case '\\':} in Subst: plus closing brace;
665 * in mksh code "quote removal" on '\c' means
666 * write QCHAR+c, otherwise CHAR+\+CHAR+c are
667 * emitted (in heredocquote:)
668 */
669 if ((c = getsc()) == '"' || c == '\\' ||
670 c == '$' || c == '`' || c == /*{*/'}')
671 goto store_qchar;
672 goto heredocquote;
673 }
674 goto common_SQBRACE;
675
676 case SBRACE:
677 if (c == '\'')
678 goto open_ssquote;
679 else if (c == '\\')
680 goto getsc_qchar;
681 common_SQBRACE:
682 if (c == '"')
683 goto open_sdquote;
684 else if (c == '$')
685 goto subst_dollar;
686 else if (c == '`')
687 goto subst_gravis;
688 else if (c != /*{*/ '}')
689 goto store_char;
690 POP_STATE();
691 *wp++ = CSUBST;
692 *wp++ = /*{*/ '}';
693 break;
694
695 /* Same as SBASE, except (,|,) treated specially */
696 case STBRACEKORN:
697 if (c == '|')
698 *wp++ = SPAT;
699 else if (c == '(') {
700 *wp++ = OPAT;
701 /* simile for @ */
702 *wp++ = ' ';
703 PUSH_STATE(SPATTERN);
704 } else /* FALLTHROUGH */
705 case STBRACEBOURNE:
706 if (c == /*{*/ '}') {
707 POP_STATE();
708 *wp++ = CSUBST;
709 *wp++ = /*{*/ '}';
710 } else
711 goto Sbase1;
712 break;
713
714 case SBQUOTE:
715 if (c == '`') {
716 *wp++ = 0;
717 POP_STATE();
718 } else if (c == '\\') {
719 switch (c = getsc()) {
720 case 0:
721 /* trailing \ is lost */
722 break;
723 case '\\':
724 case '$': case '`':
725 *wp++ = c;
726 break;
727 case '"':
728 if (statep->ls_bool) {
729 *wp++ = c;
730 break;
731 }
732 /* FALLTHROUGH */
733 default:
734 *wp++ = '\\';
735 *wp++ = c;
736 break;
737 }
738 } else
739 *wp++ = c;
740 break;
741
742 /* ONEWORD */
743 case SWORD:
744 goto Subst;
745
746 /* LETEXPR: (( ... )) */
747 case SLETPAREN:
748 if (c == /*(*/ ')') {
749 if (statep->nparen > 0)
750 --statep->nparen;
751 else if ((c2 = getsc()) == /*(*/ ')') {
752 c = 0;
753 *wp++ = CQUOTE;
754 goto Done;
755 } else {
756 Source *s;
757
758 ungetsc(c2);
759 /*
760 * mismatched parenthesis -
761 * assume we were really
762 * parsing a (...) expression
763 */
764 *wp = EOS;
765 sp = Xstring(ws, wp);
766 dp = wdstrip(sp, WDS_KEEPQ);
767 s = pushs(SREREAD, source->areap);
768 s->start = s->str = s->u.freeme = dp;
769 s->next = source;
770 source = s;
771 return ('('/*)*/);
772 }
773 } else if (c == '(')
774 /*
775 * parentheses inside quotes and
776 * backslashes are lost, but AT&T ksh
777 * doesn't count them either
778 */
779 ++statep->nparen;
780 goto Sbase2;
781
782 /* <<< delimiter */
783 case SHERESTRING:
784 if (c == '\\') {
785 c = getsc();
786 if (c) {
787 /* trailing \ is lost */
788 *wp++ = QCHAR;
789 *wp++ = c;
790 }
791 } else if (c == '$') {
792 if ((c2 = getsc()) == '\'') {
793 PUSH_STATE(SEQUOTE);
794 statep->ls_bool = false;
795 goto sherestring_quoted;
796 } else if (c2 == '"')
797 goto sherestring_dquoted;
798 ungetsc(c2);
799 goto sherestring_regular;
800 } else if (c == '\'') {
801 PUSH_STATE(SSQUOTE);
802 sherestring_quoted:
803 *wp++ = OQUOTE;
804 ignore_backslash_newline++;
805 } else if (c == '"') {
806 sherestring_dquoted:
807 state = statep->type = SHEREDQUOTE;
808 *wp++ = OQUOTE;
809 /* just don't IFS split; no quoting mode */
810 } else {
811 sherestring_regular:
812 *wp++ = CHAR;
813 *wp++ = c;
814 }
815 break;
816
817 /* <<,<<- delimiter */
818 case SHEREDELIM:
819 /*
820 * XXX chuck this state (and the next) - use
821 * the existing states ($ and \`...` should be
822 * stripped of their specialness after the
823 * fact).
824 */
825 /*
826 * here delimiters need a special case since
827 * $ and `...` are not to be treated specially
828 */
829 if (c == '\\') {
830 c = getsc();
831 if (c) {
832 /* trailing \ is lost */
833 *wp++ = QCHAR;
834 *wp++ = c;
835 }
836 } else if (c == '$') {
837 if ((c2 = getsc()) == '\'') {
838 PUSH_STATE(SEQUOTE);
839 statep->ls_bool = false;
840 goto sheredelim_quoted;
841 } else if (c2 == '"')
842 goto sheredelim_dquoted;
843 ungetsc(c2);
844 goto sheredelim_regular;
845 } else if (c == '\'') {
846 PUSH_STATE(SSQUOTE);
847 sheredelim_quoted:
848 *wp++ = OQUOTE;
849 ignore_backslash_newline++;
850 } else if (c == '"') {
851 sheredelim_dquoted:
852 state = statep->type = SHEREDQUOTE;
853 *wp++ = OQUOTE;
854 } else {
855 sheredelim_regular:
856 *wp++ = CHAR;
857 *wp++ = c;
858 }
859 break;
860
861 /* " in <<,<<- delimiter */
862 case SHEREDQUOTE:
863 if (c == '"') {
864 *wp++ = CQUOTE;
865 state = statep->type =
866 /* dp[1] == '<' means here string */
867 Xstring(ws, wp)[1] == '<' ?
868 SHERESTRING : SHEREDELIM;
869 } else {
870 if (c == '\\') {
871 switch (c = getsc()) {
872 case 0:
873 /* trailing \ is lost */
874 case '\\':
875 case '"':
876 case '$':
877 case '`':
878 break;
879 default:
880 *wp++ = CHAR;
881 *wp++ = '\\';
882 break;
883 }
884 }
885 *wp++ = CHAR;
886 *wp++ = c;
887 }
888 break;
889
890 /* in *(...|...) pattern (*+?@!) */
891 case SPATTERN:
892 if (c == /*(*/ ')') {
893 *wp++ = CPAT;
894 POP_STATE();
895 } else if (c == '|') {
896 *wp++ = SPAT;
897 } else if (c == '(') {
898 *wp++ = OPAT;
899 /* simile for @ */
900 *wp++ = ' ';
901 PUSH_STATE(SPATTERN);
902 } else
903 goto Sbase1;
904 break;
905 }
906 }
907 Done:
908 Xcheck(ws, wp);
909 if (statep != &states[1])
910 /* XXX figure out what is missing */
911 yyerror("no closing quote\n");
912
913 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */
914 if (state == SHEREDELIM || state == SHERESTRING)
915 state = SBASE;
916
917 dp = Xstring(ws, wp);
918 if ((c == '<' || c == '>' || c == '&') && state == SBASE) {
919 struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
920
921 if (Xlength(ws, wp) == 0)
922 iop->unit = c == '<' ? 0 : 1;
923 else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) {
924 if (dp[c2] != CHAR)
925 goto no_iop;
926 if (!ksh_isdigit(dp[c2 + 1]))
927 goto no_iop;
928 iop->unit = (iop->unit * 10) + dp[c2 + 1] - '0';
929 }
930
931 if (iop->unit >= FDBASE)
932 goto no_iop;
933
934 if (c == '&') {
935 if ((c2 = getsc()) != '>') {
936 ungetsc(c2);
937 goto no_iop;
938 }
939 c = c2;
940 iop->flag = IOBASH;
941 } else
942 iop->flag = 0;
943
944 c2 = getsc();
945 /* <<, >>, <> are ok, >< is not */
946 if (c == c2 || (c == '<' && c2 == '>')) {
947 iop->flag |= c == c2 ?
948 (c == '>' ? IOCAT : IOHERE) : IORDWR;
949 if (iop->flag == IOHERE) {
950 if ((c2 = getsc()) == '-') {
951 iop->flag |= IOSKIP;
952 c2 = getsc();
953 } else if (c2 == '<')
954 iop->flag |= IOHERESTR;
955 ungetsc(c2);
956 if (c2 == '\n')
957 iop->flag |= IONDELIM;
958 }
959 } else if (c2 == '&')
960 iop->flag |= IODUP | (c == '<' ? IORDUP : 0);
961 else {
962 iop->flag |= c == '>' ? IOWRITE : IOREAD;
963 if (c == '>' && c2 == '|')
964 iop->flag |= IOCLOB;
965 else
966 ungetsc(c2);
967 }
968
969 iop->name = NULL;
970 iop->delim = NULL;
971 iop->heredoc = NULL;
972 /* free word */
973 Xfree(ws, wp);
974 yylval.iop = iop;
975 return (REDIR);
976 no_iop:
977 afree(iop, ATEMP);
978 }
979
980 if (wp == dp && state == SBASE) {
981 /* free word */
982 Xfree(ws, wp);
983 /* no word, process LEX1 character */
984 if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) {
985 if ((c2 = getsc()) == c)
986 c = (c == ';') ? BREAK :
987 (c == '|') ? LOGOR :
988 (c == '&') ? LOGAND :
989 /* c == '(' ) */ MDPAREN;
990 else if (c == '|' && c2 == '&')
991 c = COPROC;
992 else if (c == ';' && c2 == '|')
993 c = BRKEV;
994 else if (c == ';' && c2 == '&')
995 c = BRKFT;
996 else
997 ungetsc(c2);
998 #ifndef MKSH_SMALL
999 if (c == BREAK) {
1000 if ((c2 = getsc()) == '&')
1001 c = BRKEV;
1002 else
1003 ungetsc(c2);
1004 }
1005 #endif
1006 } else if (c == '\n') {
1007 gethere(false);
1008 if (cf & CONTIN)
1009 goto Again;
1010 } else if (c == '\0')
1011 /* need here strings at EOF */
1012 gethere(true);
1013 return (c);
1014 }
1015
1016 /* terminate word */
1017 *wp++ = EOS;
1018 yylval.cp = Xclose(ws, wp);
1019 if (state == SWORD || state == SLETPAREN
1020 /* XXX ONEWORD? */)
1021 return (LWORD);
1022
1023 /* unget terminator */
1024 ungetsc(c);
1025
1026 /*
1027 * note: the alias-vs-function code below depends on several
1028 * interna: starting from here, source->str is not modified;
1029 * the way getsc() and ungetsc() operate; etc.
1030 */
1031
1032 /* copy word to unprefixed string ident */
1033 sp = yylval.cp;
1034 dp = ident;
1035 if ((cf & HEREDELIM) && (sp[1] == '<'))
1036 while (dp < ident+IDENT) {
1037 if ((c = *sp++) == CHAR)
1038 *dp++ = *sp++;
1039 else if ((c != OQUOTE) && (c != CQUOTE))
1040 break;
1041 }
1042 else
1043 while (dp < ident+IDENT && (c = *sp++) == CHAR)
1044 *dp++ = *sp++;
1045 /* Make sure the ident array stays '\0' padded */
1046 memset(dp, 0, (ident+IDENT) - dp + 1);
1047 if (c != EOS)
1048 /* word is not unquoted */
1049 *ident = '\0';
1050
1051 if (*ident != '\0' && (cf & (KEYWORD | ALIAS))) {
1052 struct tbl *p;
1053 uint32_t h = hash(ident);
1054
1055 if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
1056 (!(cf & ESACONLY) || p->val.i == ESAC ||
1057 p->val.i == /*{*/ '}')) {
1058 afree(yylval.cp, ATEMP);
1059 return (p->val.i);
1060 }
1061 if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
1062 (p->flag & ISSET)) {
1063 /*
1064 * this still points to the same character as the
1065 * ungetsc'd terminator from above
1066 */
1067 const char *cp = source->str;
1068
1069 /* prefer POSIX but not Korn functions over aliases */
1070 while (*cp == ' ' || *cp == '\t')
1071 /*
1072 * this is like getsc() without skipping
1073 * over Source boundaries (including not
1074 * parsing ungetsc'd characters that got
1075 * pushed into an SREREAD) which is what
1076 * we want here anyway: find out whether
1077 * the alias name is followed by a POSIX
1078 * function definition (only the opening
1079 * parenthesis is checked though)
1080 */
1081 ++cp;
1082 /* prefer functions over aliases */
1083 if (cp[0] != '(' || cp[1] != ')') {
1084 Source *s = source;
1085
1086 while (s && (s->flags & SF_HASALIAS))
1087 if (s->u.tblp == p)
1088 return (LWORD);
1089 else
1090 s = s->next;
1091 /* push alias expansion */
1092 s = pushs(SALIAS, source->areap);
1093 s->start = s->str = p->val.s;
1094 s->u.tblp = p;
1095 s->flags |= SF_HASALIAS;
1096 s->next = source;
1097 if (source->type == SEOF) {
1098 /* prevent infinite recursion at EOS */
1099 source->u.tblp = p;
1100 source->flags |= SF_HASALIAS;
1101 }
1102 source = s;
1103 afree(yylval.cp, ATEMP);
1104 goto Again;
1105 }
1106 }
1107 }
1108
1109 return (LWORD);
1110 }
1111
1112 static void
gethere(bool iseof)1113 gethere(bool iseof)
1114 {
1115 struct ioword **p;
1116
1117 for (p = heres; p < herep; p++)
1118 if (iseof && !((*p)->flag & IOHERESTR))
1119 /* only here strings at EOF */
1120 return;
1121 else
1122 readhere(*p);
1123 herep = heres;
1124 }
1125
1126 /*
1127 * read "<<word" text into temp file
1128 */
1129
1130 static void
readhere(struct ioword * iop)1131 readhere(struct ioword *iop)
1132 {
1133 int c;
1134 const char *eof, *eofp;
1135 XString xs;
1136 char *xp;
1137 int xpos;
1138
1139 if (iop->flag & IOHERESTR) {
1140 /* process the here string */
1141 iop->heredoc = xp = evalstr(iop->delim, DOBLANK);
1142 xpos = strlen(xp) - 1;
1143 memmove(xp, xp + 1, xpos);
1144 xp[xpos] = '\n';
1145 return;
1146 }
1147
1148 eof = iop->flag & IONDELIM ? "<<" : evalstr(iop->delim, 0);
1149
1150 if (!(iop->flag & IOEVAL))
1151 ignore_backslash_newline++;
1152
1153 Xinit(xs, xp, 256, ATEMP);
1154
1155 heredoc_read_line:
1156 /* beginning of line */
1157 eofp = eof;
1158 xpos = Xsavepos(xs, xp);
1159 if (iop->flag & IOSKIP) {
1160 /* skip over leading tabs */
1161 while ((c = getsc()) == '\t')
1162 /* nothing */;
1163 goto heredoc_parse_char;
1164 }
1165 heredoc_read_char:
1166 c = getsc();
1167 heredoc_parse_char:
1168 /* compare with here document marker */
1169 if (!*eofp) {
1170 /* end of here document marker, what to do? */
1171 switch (c) {
1172 case /*(*/ ')':
1173 if (!subshell_nesting_level)
1174 /*-
1175 * not allowed outside $(...) or (...)
1176 * => mismatch
1177 */
1178 break;
1179 /* allow $(...) or (...) to close here */
1180 ungetsc(/*(*/ ')');
1181 /* FALLTHROUGH */
1182 case 0:
1183 /*
1184 * Allow EOF here to commands without trailing
1185 * newlines (mksh -c '...') will work as well.
1186 */
1187 case '\n':
1188 /* Newline terminates here document marker */
1189 goto heredoc_found_terminator;
1190 }
1191 } else if (c == *eofp++)
1192 /* store; then read and compare next character */
1193 goto heredoc_store_and_loop;
1194 /* nope, mismatch; read until end of line */
1195 while (c != '\n') {
1196 if (!c)
1197 /* oops, reached EOF */
1198 yyerror("%s '%s' unclosed\n", "here document", eof);
1199 /* store character */
1200 Xcheck(xs, xp);
1201 Xput(xs, xp, c);
1202 /* read next character */
1203 c = getsc();
1204 }
1205 /* we read a newline as last character */
1206 heredoc_store_and_loop:
1207 /* store character */
1208 Xcheck(xs, xp);
1209 Xput(xs, xp, c);
1210 if (c == '\n')
1211 goto heredoc_read_line;
1212 goto heredoc_read_char;
1213
1214 heredoc_found_terminator:
1215 /* jump back to saved beginning of line */
1216 xp = Xrestpos(xs, xp, xpos);
1217 /* terminate, close and store */
1218 Xput(xs, xp, '\0');
1219 iop->heredoc = Xclose(xs, xp);
1220
1221 if (!(iop->flag & IOEVAL))
1222 ignore_backslash_newline--;
1223 }
1224
1225 void
yyerror(const char * fmt,...)1226 yyerror(const char *fmt, ...)
1227 {
1228 va_list va;
1229
1230 /* pop aliases and re-reads */
1231 while (source->type == SALIAS || source->type == SREREAD)
1232 source = source->next;
1233 /* zap pending input */
1234 source->str = null;
1235
1236 error_prefix(true);
1237 va_start(va, fmt);
1238 shf_vfprintf(shl_out, fmt, va);
1239 va_end(va);
1240 errorfz();
1241 }
1242
1243 /*
1244 * input for yylex with alias expansion
1245 */
1246
1247 Source *
pushs(int type,Area * areap)1248 pushs(int type, Area *areap)
1249 {
1250 Source *s;
1251
1252 s = alloc(sizeof(Source), areap);
1253 memset(s, 0, sizeof(Source));
1254 s->type = type;
1255 s->str = null;
1256 s->areap = areap;
1257 if (type == SFILE || type == SSTDIN)
1258 XinitN(s->xs, 256, s->areap);
1259 return (s);
1260 }
1261
1262 static int
getsc_uu(void)1263 getsc_uu(void)
1264 {
1265 Source *s = source;
1266 int c;
1267
1268 while ((c = *s->str++) == 0) {
1269 /* return 0 for EOF by default */
1270 s->str = NULL;
1271 switch (s->type) {
1272 case SEOF:
1273 s->str = null;
1274 return (0);
1275
1276 case SSTDIN:
1277 case SFILE:
1278 getsc_line(s);
1279 break;
1280
1281 case SWSTR:
1282 break;
1283
1284 case SSTRING:
1285 break;
1286
1287 case SWORDS:
1288 s->start = s->str = *s->u.strv++;
1289 s->type = SWORDSEP;
1290 break;
1291
1292 case SWORDSEP:
1293 if (*s->u.strv == NULL) {
1294 s->start = s->str = "\n";
1295 s->type = SEOF;
1296 } else {
1297 s->start = s->str = " ";
1298 s->type = SWORDS;
1299 }
1300 break;
1301
1302 case SALIAS:
1303 if (s->flags & SF_ALIASEND) {
1304 /* pass on an unused SF_ALIAS flag */
1305 source = s->next;
1306 source->flags |= s->flags & SF_ALIAS;
1307 s = source;
1308 } else if (*s->u.tblp->val.s &&
1309 (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) {
1310 /* pop source stack */
1311 source = s = s->next;
1312 /*
1313 * Note that this alias ended with a
1314 * space, enabling alias expansion on
1315 * the following word.
1316 */
1317 s->flags |= SF_ALIAS;
1318 } else {
1319 /*
1320 * At this point, we need to keep the current
1321 * alias in the source list so recursive
1322 * aliases can be detected and we also need to
1323 * return the next character. Do this by
1324 * temporarily popping the alias to get the
1325 * next character and then put it back in the
1326 * source list with the SF_ALIASEND flag set.
1327 */
1328 /* pop source stack */
1329 source = s->next;
1330 source->flags |= s->flags & SF_ALIAS;
1331 c = getsc_uu();
1332 if (c) {
1333 s->flags |= SF_ALIASEND;
1334 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1335 s->start = s->str = s->ugbuf;
1336 s->next = source;
1337 source = s;
1338 } else {
1339 s = source;
1340 /* avoid reading EOF twice */
1341 s->str = NULL;
1342 break;
1343 }
1344 }
1345 continue;
1346
1347 case SREREAD:
1348 if (s->start != s->ugbuf)
1349 /* yuck */
1350 afree(s->u.freeme, ATEMP);
1351 source = s = s->next;
1352 continue;
1353 }
1354 if (s->str == NULL) {
1355 s->type = SEOF;
1356 s->start = s->str = null;
1357 return ('\0');
1358 }
1359 if (s->flags & SF_ECHO) {
1360 shf_puts(s->str, shl_out);
1361 shf_flush(shl_out);
1362 }
1363 }
1364 return (c);
1365 }
1366
1367 static void
getsc_line(Source * s)1368 getsc_line(Source *s)
1369 {
1370 char *xp = Xstring(s->xs, xp), *cp;
1371 bool interactive = Flag(FTALKING) && s->type == SSTDIN;
1372 int have_tty = interactive && (s->flags & SF_TTY);
1373
1374 /* Done here to ensure nothing odd happens when a timeout occurs */
1375 XcheckN(s->xs, xp, LINE);
1376 *xp = '\0';
1377 s->start = s->str = xp;
1378
1379 if (have_tty && ksh_tmout) {
1380 ksh_tmout_state = TMOUT_READING;
1381 alarm(ksh_tmout);
1382 }
1383 if (interactive)
1384 change_winsz();
1385 if (have_tty && (
1386 #if !MKSH_S_NOVI
1387 Flag(FVI) ||
1388 #endif
1389 Flag(FEMACS) || Flag(FGMACS))) {
1390 int nread;
1391
1392 nread = x_read(xp, LINE);
1393 if (nread < 0)
1394 /* read error */
1395 nread = 0;
1396 xp[nread] = '\0';
1397 xp += nread;
1398 } else {
1399 if (interactive)
1400 pprompt(prompt, 0);
1401 else
1402 s->line++;
1403
1404 while (/* CONSTCOND */ 1) {
1405 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1406
1407 if (!p && shf_error(s->u.shf) &&
1408 shf_errno(s->u.shf) == EINTR) {
1409 shf_clearerr(s->u.shf);
1410 if (trap)
1411 runtraps(0);
1412 continue;
1413 }
1414 if (!p || (xp = p, xp[-1] == '\n'))
1415 break;
1416 /* double buffer size */
1417 /* move past NUL so doubling works... */
1418 xp++;
1419 XcheckN(s->xs, xp, Xlength(s->xs, xp));
1420 /* ...and move back again */
1421 xp--;
1422 }
1423 /*
1424 * flush any unwanted input so other programs/builtins
1425 * can read it. Not very optimal, but less error prone
1426 * than flushing else where, dealing with redirections,
1427 * etc.
1428 * TODO: reduce size of shf buffer (~128?) if SSTDIN
1429 */
1430 if (s->type == SSTDIN)
1431 shf_flush(s->u.shf);
1432 }
1433 /*
1434 * XXX: temporary kludge to restore source after a
1435 * trap may have been executed.
1436 */
1437 source = s;
1438 if (have_tty && ksh_tmout) {
1439 ksh_tmout_state = TMOUT_EXECUTING;
1440 alarm(0);
1441 }
1442 cp = Xstring(s->xs, xp);
1443 #ifndef MKSH_SMALL
1444 if (interactive && *cp == '!' && cur_prompt == PS1) {
1445 int linelen;
1446
1447 linelen = Xlength(s->xs, xp);
1448 XcheckN(s->xs, xp, Zfc_e_dash + /* NUL */ 1);
1449 /* reload after potential realloc */
1450 cp = Xstring(s->xs, xp);
1451 /* change initial '!' into space */
1452 *cp = ' ';
1453 /* NUL terminate the current string */
1454 *xp = '\0';
1455 /* move the actual string forward */
1456 memmove(cp + Zfc_e_dash, cp, linelen + /* NUL */ 1);
1457 xp += Zfc_e_dash;
1458 /* prepend it with "fc -e -" */
1459 memcpy(cp, Tfc_e_dash, Zfc_e_dash);
1460 }
1461 #endif
1462 s->start = s->str = cp;
1463 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1464 /* Note: if input is all nulls, this is not eof */
1465 if (Xlength(s->xs, xp) == 0) {
1466 /* EOF */
1467 if (s->type == SFILE)
1468 shf_fdclose(s->u.shf);
1469 s->str = NULL;
1470 } else if (interactive && *s->str &&
1471 (cur_prompt != PS1 || !ctype(*s->str, C_IFS | C_IFSWS))) {
1472 histsave(&s->line, s->str, true, true);
1473 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
1474 } else if (interactive && cur_prompt == PS1) {
1475 cp = Xstring(s->xs, xp);
1476 while (*cp && ctype(*cp, C_IFSWS))
1477 ++cp;
1478 if (!*cp)
1479 histsync();
1480 #endif
1481 }
1482 if (interactive)
1483 set_prompt(PS2, NULL);
1484 }
1485
1486 void
set_prompt(int to,Source * s)1487 set_prompt(int to, Source *s)
1488 {
1489 cur_prompt = to;
1490
1491 switch (to) {
1492 /* command */
1493 case PS1:
1494 /*
1495 * Substitute ! and !! here, before substitutions are done
1496 * so ! in expanded variables are not expanded.
1497 * NOTE: this is not what AT&T ksh does (it does it after
1498 * substitutions, POSIX doesn't say which is to be done.
1499 */
1500 {
1501 struct shf *shf;
1502 char * volatile ps1;
1503 Area *saved_atemp;
1504
1505 ps1 = str_val(global("PS1"));
1506 shf = shf_sopen(NULL, strlen(ps1) * 2,
1507 SHF_WR | SHF_DYNAMIC, NULL);
1508 while (*ps1)
1509 if (*ps1 != '!' || *++ps1 == '!')
1510 shf_putchar(*ps1++, shf);
1511 else
1512 shf_fprintf(shf, "%d",
1513 s ? s->line + 1 : 0);
1514 ps1 = shf_sclose(shf);
1515 saved_atemp = ATEMP;
1516 newenv(E_ERRH);
1517 if (sigsetjmp(e->jbuf, 0)) {
1518 prompt = safe_prompt;
1519 /*
1520 * Don't print an error - assume it has already
1521 * been printed. Reason is we may have forked
1522 * to run a command and the child may be
1523 * unwinding its stack through this code as it
1524 * exits.
1525 */
1526 } else {
1527 char *cp = substitute(ps1, 0);
1528 strdupx(prompt, cp, saved_atemp);
1529 }
1530 quitenv(NULL);
1531 }
1532 break;
1533 /* command continuation */
1534 case PS2:
1535 prompt = str_val(global("PS2"));
1536 break;
1537 }
1538 }
1539
1540 static int
dopprompt(const char * cp,int ntruncate,bool doprint)1541 dopprompt(const char *cp, int ntruncate, bool doprint)
1542 {
1543 int columns = 0, lines = 0, indelimit = 0;
1544 char delimiter = 0;
1545
1546 /*
1547 * Undocumented AT&T ksh feature:
1548 * If the second char in the prompt string is \r then the first
1549 * char is taken to be a non-printing delimiter and any chars
1550 * between two instances of the delimiter are not considered to
1551 * be part of the prompt length
1552 */
1553 if (*cp && cp[1] == '\r') {
1554 delimiter = *cp;
1555 cp += 2;
1556 }
1557 for (; *cp; cp++) {
1558 if (indelimit && *cp != delimiter)
1559 ;
1560 else if (*cp == '\n' || *cp == '\r') {
1561 lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
1562 columns = 0;
1563 } else if (*cp == '\t') {
1564 columns = (columns | 7) + 1;
1565 } else if (*cp == '\b') {
1566 if (columns > 0)
1567 columns--;
1568 } else if (*cp == delimiter)
1569 indelimit = !indelimit;
1570 else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
1571 const char *cp2;
1572 columns += utf_widthadj(cp, &cp2);
1573 if (doprint && (indelimit ||
1574 (ntruncate < (x_cols * lines + columns))))
1575 shf_write(cp, cp2 - cp, shl_out);
1576 cp = cp2 - /* loop increment */ 1;
1577 continue;
1578 } else
1579 columns++;
1580 if (doprint && (*cp != delimiter) &&
1581 (indelimit || (ntruncate < (x_cols * lines + columns))))
1582 shf_putc(*cp, shl_out);
1583 }
1584 if (doprint)
1585 shf_flush(shl_out);
1586 return (x_cols * lines + columns);
1587 }
1588
1589
1590 void
pprompt(const char * cp,int ntruncate)1591 pprompt(const char *cp, int ntruncate)
1592 {
1593 dopprompt(cp, ntruncate, true);
1594 }
1595
1596 int
promptlen(const char * cp)1597 promptlen(const char *cp)
1598 {
1599 return (dopprompt(cp, 0, false));
1600 }
1601
1602 /*
1603 * Read the variable part of a ${...} expression (i.e. up to but not
1604 * including the :[-+?=#%] or close-brace).
1605 */
1606 static char *
get_brace_var(XString * wsp,char * wp)1607 get_brace_var(XString *wsp, char *wp)
1608 {
1609 char c;
1610 enum parse_state {
1611 PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1612 PS_NUMBER, PS_VAR1
1613 } state = PS_INITIAL;
1614
1615 while (/* CONSTCOND */ 1) {
1616 c = getsc();
1617 /* State machine to figure out where the variable part ends. */
1618 switch (state) {
1619 case PS_INITIAL:
1620 if (c == '#' || c == '!' || c == '%') {
1621 state = PS_SAW_HASH;
1622 break;
1623 }
1624 /* FALLTHROUGH */
1625 case PS_SAW_HASH:
1626 if (ksh_isalphx(c))
1627 state = PS_IDENT;
1628 else if (ksh_isdigit(c))
1629 state = PS_NUMBER;
1630 else if (c == '#') {
1631 if (state == PS_SAW_HASH) {
1632 char c2;
1633
1634 c2 = getsc();
1635 ungetsc(c2);
1636 if (c2 != '}') {
1637 ungetsc(c);
1638 goto out;
1639 }
1640 }
1641 state = PS_VAR1;
1642 } else if (ctype(c, C_VAR1))
1643 state = PS_VAR1;
1644 else
1645 goto out;
1646 break;
1647 case PS_IDENT:
1648 if (!ksh_isalnux(c)) {
1649 if (c == '[') {
1650 char *tmp, *p;
1651
1652 if (!arraysub(&tmp))
1653 yyerror("missing ]\n");
1654 *wp++ = c;
1655 for (p = tmp; *p; ) {
1656 Xcheck(*wsp, wp);
1657 *wp++ = *p++;
1658 }
1659 afree(tmp, ATEMP);
1660 /* the ] */
1661 c = getsc();
1662 }
1663 goto out;
1664 }
1665 break;
1666 case PS_NUMBER:
1667 if (!ksh_isdigit(c))
1668 goto out;
1669 break;
1670 case PS_VAR1:
1671 goto out;
1672 }
1673 Xcheck(*wsp, wp);
1674 *wp++ = c;
1675 }
1676 out:
1677 /* end of variable part */
1678 *wp++ = '\0';
1679 ungetsc(c);
1680 return (wp);
1681 }
1682
1683 /*
1684 * Save an array subscript - returns true if matching bracket found, false
1685 * if eof or newline was found.
1686 * (Returned string double null terminated)
1687 */
1688 static bool
arraysub(char ** strp)1689 arraysub(char **strp)
1690 {
1691 XString ws;
1692 char *wp, c;
1693 /* we are just past the initial [ */
1694 int depth = 1;
1695
1696 Xinit(ws, wp, 32, ATEMP);
1697
1698 do {
1699 c = getsc();
1700 Xcheck(ws, wp);
1701 *wp++ = c;
1702 if (c == '[')
1703 depth++;
1704 else if (c == ']')
1705 depth--;
1706 } while (depth > 0 && c && c != '\n');
1707
1708 *wp++ = '\0';
1709 *strp = Xclose(ws, wp);
1710
1711 return (tobool(depth == 0));
1712 }
1713
1714 /* Unget a char: handles case when we are already at the start of the buffer */
1715 static void
ungetsc(int c)1716 ungetsc(int c)
1717 {
1718 struct sretrace_info *rp = retrace_info;
1719
1720 if (backslash_skip)
1721 backslash_skip--;
1722 /* Don't unget EOF... */
1723 if (source->str == null && c == '\0')
1724 return;
1725 while (rp) {
1726 if (Xlength(rp->xs, rp->xp))
1727 rp->xp--;
1728 rp = rp->next;
1729 }
1730 ungetsc_(c);
1731 }
1732 static void
ungetsc_(int c)1733 ungetsc_(int c)
1734 {
1735 if (source->str > source->start)
1736 source->str--;
1737 else {
1738 Source *s;
1739
1740 s = pushs(SREREAD, source->areap);
1741 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1742 s->start = s->str = s->ugbuf;
1743 s->next = source;
1744 source = s;
1745 }
1746 }
1747
1748
1749 /* Called to get a char that isn't a \newline sequence. */
1750 static int
getsc_bn(void)1751 getsc_bn(void)
1752 {
1753 int c, c2;
1754
1755 if (ignore_backslash_newline)
1756 return (o_getsc_u());
1757
1758 if (backslash_skip == 1) {
1759 backslash_skip = 2;
1760 return (o_getsc_u());
1761 }
1762
1763 backslash_skip = 0;
1764
1765 while (/* CONSTCOND */ 1) {
1766 c = o_getsc_u();
1767 if (c == '\\') {
1768 if ((c2 = o_getsc_u()) == '\n')
1769 /* ignore the \newline; get the next char... */
1770 continue;
1771 ungetsc_(c2);
1772 backslash_skip = 1;
1773 }
1774 return (c);
1775 }
1776 }
1777
1778 void
yyskiputf8bom(void)1779 yyskiputf8bom(void)
1780 {
1781 int c;
1782
1783 if ((unsigned char)(c = o_getsc_u()) != 0xEF) {
1784 ungetsc_(c);
1785 return;
1786 }
1787 if ((unsigned char)(c = o_getsc_u()) != 0xBB) {
1788 ungetsc_(c);
1789 ungetsc_(0xEF);
1790 return;
1791 }
1792 if ((unsigned char)(c = o_getsc_u()) != 0xBF) {
1793 ungetsc_(c);
1794 ungetsc_(0xBB);
1795 ungetsc_(0xEF);
1796 return;
1797 }
1798 UTFMODE |= 8;
1799 }
1800
1801 static Lex_state *
push_state_(State_info * si,Lex_state * old_end)1802 push_state_(State_info *si, Lex_state *old_end)
1803 {
1804 Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP);
1805
1806 news[0].ls_base = old_end;
1807 si->base = &news[0];
1808 si->end = &news[STATE_BSIZE];
1809 return (&news[1]);
1810 }
1811
1812 static Lex_state *
pop_state_(State_info * si,Lex_state * old_end)1813 pop_state_(State_info *si, Lex_state *old_end)
1814 {
1815 Lex_state *old_base = si->base;
1816
1817 si->base = old_end->ls_base - STATE_BSIZE;
1818 si->end = old_end->ls_base;
1819
1820 afree(old_base, ATEMP);
1821
1822 return (si->base + STATE_BSIZE - 1);
1823 }
1824
1825 static int
s_get(void)1826 s_get(void)
1827 {
1828 return (getsc());
1829 }
1830
1831 static void
s_put(int c)1832 s_put(int c)
1833 {
1834 ungetsc(c);
1835 }
1836