1 /* $OpenBSD: lex.c,v 1.46 2013/01/20 14:47:46 stsp Exp $ */
2
3 /*-
4 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 * 2011, 2012, 2013
6 * Thorsten Glaser <tg@mirbsd.org>
7 *
8 * Provided that these terms and disclaimer and all copyright notices
9 * are retained or reproduced in an accompanying document, permission
10 * is granted to deal in this work without restriction, including un-
11 * limited rights to use, publicly perform, distribute, sell, modify,
12 * merge, give away, or sublicence.
13 *
14 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
15 * the utmost extent permitted by applicable law, neither express nor
16 * implied; without malicious intent or gross negligence. In no event
17 * may a licensor, author or contributor be held liable for indirect,
18 * direct, other damage, loss, or other issues arising in any way out
19 * of dealing in the work, even if advised of the possibility of such
20 * damage or existence of a defect, except proven that it results out
21 * of said person's immediate fault when using the work as intended.
22 */
23
24 #include "sh.h"
25
26 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.182 2013/02/19 18:45:20 tg Exp $");
27
28 /*
29 * states while lexing word
30 */
31 #define SBASE 0 /* outside any lexical constructs */
32 #define SWORD 1 /* implicit quoting for substitute() */
33 #define SLETPAREN 2 /* inside (( )), implicit quoting */
34 #define SSQUOTE 3 /* inside '' */
35 #define SDQUOTE 4 /* inside "" */
36 #define SEQUOTE 5 /* inside $'' */
37 #define SBRACE 6 /* inside ${} */
38 #define SQBRACE 7 /* inside "${}" */
39 #define SBQUOTE 8 /* inside `` */
40 #define SASPAREN 9 /* inside $(( )) */
41 #define SHEREDELIM 10 /* parsing <<,<<-,<<< delimiter */
42 #define SHEREDQUOTE 11 /* parsing " in <<,<<-,<<< delimiter */
43 #define SPATTERN 12 /* parsing *(...|...) pattern (*+?@!) */
44 #define SADELIM 13 /* like SBASE, looking for delimiter */
45 #define STBRACEKORN 14 /* parsing ${...[#%]...} !FSH */
46 #define STBRACEBOURNE 15 /* parsing ${...[#%]...} FSH */
47 #define SINVALID 255 /* invalid state */
48
49 struct sretrace_info {
50 struct sretrace_info *next;
51 XString xs;
52 char *xp;
53 };
54
55 /*
56 * Structure to keep track of the lexing state and the various pieces of info
57 * needed for each particular state.
58 */
59 typedef struct lex_state {
60 union {
61 /* point to the next state block */
62 struct lex_state *base;
63 /* marks start of state output in output string */
64 int start;
65 /* SBQUOTE: true if in double quotes: "`...`" */
66 /* SEQUOTE: got NUL, ignore rest of string */
67 bool abool;
68 /* SADELIM information */
69 struct {
70 /* character to search for */
71 unsigned char delimiter;
72 /* max. number of delimiters */
73 unsigned char num;
74 } adelim;
75 } u;
76 /* count open parentheses */
77 short nparen;
78 /* type of this state */
79 uint8_t type;
80 } Lex_state;
81 #define ls_base u.base
82 #define ls_start u.start
83 #define ls_bool u.abool
84 #define ls_adelim u.adelim
85
86 typedef struct {
87 Lex_state *base;
88 Lex_state *end;
89 } State_info;
90
91 static void readhere(struct ioword *);
92 static void ungetsc(int);
93 static void ungetsc_i(int);
94 static int getsc_uu(void);
95 static void getsc_line(Source *);
96 static int getsc_bn(void);
97 static int s_get(void);
98 static void s_put(int);
99 static char *get_brace_var(XString *, char *);
100 static bool arraysub(char **);
101 static void gethere(bool);
102 static Lex_state *push_state_i(State_info *, Lex_state *);
103 static Lex_state *pop_state_i(State_info *, Lex_state *);
104
105 static int dopprompt(const char *, int, bool);
106
107 static int backslash_skip;
108 static int ignore_backslash_newline;
109
110 /* optimised getsc_bn() */
111 #define o_getsc() (*source->str != '\0' && *source->str != '\\' && \
112 !backslash_skip ? *source->str++ : getsc_bn())
113 /* optimised getsc_uu() */
114 #define o_getsc_u() ((*source->str != '\0') ? *source->str++ : getsc_uu())
115
116 /* retrace helper */
117 #define o_getsc_r(carg) { \
118 int cev = (carg); \
119 struct sretrace_info *rp = retrace_info; \
120 \
121 while (rp) { \
122 Xcheck(rp->xs, rp->xp); \
123 *rp->xp++ = cev; \
124 rp = rp->next; \
125 } \
126 \
127 return (cev); \
128 }
129
130 #if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST)
131 static int getsc(void);
132
133 static int
getsc(void)134 getsc(void)
135 {
136 o_getsc_r(o_getsc());
137 }
138 #else
139 static int getsc_r(int);
140
141 static int
getsc_r(int c)142 getsc_r(int c)
143 {
144 o_getsc_r(c);
145 }
146
147 #define getsc() getsc_r(o_getsc())
148 #endif
149
150 #define STATE_BSIZE 8
151
152 #define PUSH_STATE(s) do { \
153 if (++statep == state_info.end) \
154 statep = push_state_i(&state_info, statep); \
155 state = statep->type = (s); \
156 } while (/* CONSTCOND */ 0)
157
158 #define POP_STATE() do { \
159 if (--statep == state_info.base) \
160 statep = pop_state_i(&state_info, statep); \
161 state = statep->type; \
162 } while (/* CONSTCOND */ 0)
163
164 #define PUSH_SRETRACE() do { \
165 struct sretrace_info *ri; \
166 \
167 statep->ls_start = Xsavepos(ws, wp); \
168 ri = alloc(sizeof(struct sretrace_info), ATEMP); \
169 Xinit(ri->xs, ri->xp, 64, ATEMP); \
170 ri->next = retrace_info; \
171 retrace_info = ri; \
172 } while (/* CONSTCOND */ 0)
173
174 #define POP_SRETRACE() do { \
175 wp = Xrestpos(ws, wp, statep->ls_start); \
176 *retrace_info->xp = '\0'; \
177 sp = Xstring(retrace_info->xs, retrace_info->xp); \
178 dp = (void *)retrace_info; \
179 retrace_info = retrace_info->next; \
180 afree(dp, ATEMP); \
181 } while (/* CONSTCOND */ 0)
182
183 /**
184 * Lexical analyser
185 *
186 * tokens are not regular expressions, they are LL(1).
187 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
188 * hence the state stack. Note "$(...)" are now parsed recursively.
189 */
190
191 int
yylex(int cf)192 yylex(int cf)
193 {
194 Lex_state states[STATE_BSIZE], *statep, *s2, *base;
195 State_info state_info;
196 int c, c2, state;
197 size_t cz;
198 XString ws; /* expandable output word */
199 char *wp; /* output word pointer */
200 char *sp, *dp;
201
202 Again:
203 states[0].type = SINVALID;
204 states[0].ls_base = NULL;
205 statep = &states[1];
206 state_info.base = states;
207 state_info.end = &state_info.base[STATE_BSIZE];
208
209 Xinit(ws, wp, 64, ATEMP);
210
211 backslash_skip = 0;
212 ignore_backslash_newline = 0;
213
214 if (cf & ONEWORD)
215 state = SWORD;
216 else if (cf & LETEXPR) {
217 /* enclose arguments in (double) quotes */
218 *wp++ = OQUOTE;
219 state = SLETPAREN;
220 statep->nparen = 0;
221 } else {
222 /* normal lexing */
223 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
224 while ((c = getsc()) == ' ' || c == '\t')
225 ;
226 if (c == '#') {
227 ignore_backslash_newline++;
228 while ((c = getsc()) != '\0' && c != '\n')
229 ;
230 ignore_backslash_newline--;
231 }
232 ungetsc(c);
233 }
234 if (source->flags & SF_ALIAS) {
235 /* trailing ' ' in alias definition */
236 source->flags &= ~SF_ALIAS;
237 cf |= ALIAS;
238 }
239
240 /* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */
241 statep->type = state;
242
243 /* check for here string */
244 if (state == SHEREDELIM) {
245 c = getsc();
246 if (c == '<') {
247 state = SHEREDELIM;
248 while ((c = getsc()) == ' ' || c == '\t')
249 ;
250 ungetsc(c);
251 c = '<';
252 goto accept_nonword;
253 }
254 ungetsc(c);
255 }
256
257 /* collect non-special or quoted characters to form word */
258 while (!((c = getsc()) == 0 ||
259 ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) {
260 if (state == SBASE &&
261 subshell_nesting_type == /*{*/ '}' &&
262 c == /*{*/ '}')
263 /* possibly end ${ :;} */
264 break;
265 accept_nonword:
266 Xcheck(ws, wp);
267 switch (state) {
268 case SADELIM:
269 if (c == '(')
270 statep->nparen++;
271 else if (c == ')')
272 statep->nparen--;
273 else if (statep->nparen == 0 && (c == /*{*/ '}' ||
274 c == (int)statep->ls_adelim.delimiter)) {
275 *wp++ = ADELIM;
276 *wp++ = c;
277 if (c == /*{*/ '}' || --statep->ls_adelim.num == 0)
278 POP_STATE();
279 if (c == /*{*/ '}')
280 POP_STATE();
281 break;
282 }
283 /* FALLTHROUGH */
284 case SBASE:
285 if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
286 /* temporary */
287 *wp = EOS;
288 if (is_wdvarname(Xstring(ws, wp), false)) {
289 char *p, *tmp;
290
291 if (arraysub(&tmp)) {
292 *wp++ = CHAR;
293 *wp++ = c;
294 for (p = tmp; *p; ) {
295 Xcheck(ws, wp);
296 *wp++ = CHAR;
297 *wp++ = *p++;
298 }
299 afree(tmp, ATEMP);
300 break;
301 } else {
302 Source *s;
303
304 s = pushs(SREREAD,
305 source->areap);
306 s->start = s->str =
307 s->u.freeme = tmp;
308 s->next = source;
309 source = s;
310 }
311 }
312 *wp++ = CHAR;
313 *wp++ = c;
314 break;
315 }
316 /* FALLTHROUGH */
317 Sbase1: /* includes *(...|...) pattern (*+?@!) */
318 if (c == '*' || c == '@' || c == '+' || c == '?' ||
319 c == '!') {
320 c2 = getsc();
321 if (c2 == '(' /*)*/ ) {
322 *wp++ = OPAT;
323 *wp++ = c;
324 PUSH_STATE(SPATTERN);
325 break;
326 }
327 ungetsc(c2);
328 }
329 /* FALLTHROUGH */
330 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */
331 switch (c) {
332 case '\\':
333 getsc_qchar:
334 if ((c = getsc())) {
335 /* trailing \ is lost */
336 *wp++ = QCHAR;
337 *wp++ = c;
338 }
339 break;
340 case '\'':
341 open_ssquote:
342 *wp++ = OQUOTE;
343 ignore_backslash_newline++;
344 PUSH_STATE(SSQUOTE);
345 break;
346 case '"':
347 open_sdquote:
348 *wp++ = OQUOTE;
349 PUSH_STATE(SDQUOTE);
350 break;
351 case '$':
352 /*
353 * processing of dollar sign belongs into
354 * Subst, except for those which can open
355 * a string: $'…' and $"…"
356 */
357 subst_dollar_ex:
358 c = getsc();
359 switch (c) {
360 case '"':
361 goto open_sdquote;
362 case '\'':
363 goto open_sequote;
364 default:
365 goto SubstS;
366 }
367 default:
368 goto Subst;
369 }
370 break;
371
372 Subst:
373 switch (c) {
374 case '\\':
375 c = getsc();
376 switch (c) {
377 case '"':
378 if ((cf & HEREDOC))
379 goto heredocquote;
380 /* FALLTHROUGH */
381 case '\\':
382 case '$': case '`':
383 store_qchar:
384 *wp++ = QCHAR;
385 *wp++ = c;
386 break;
387 default:
388 heredocquote:
389 Xcheck(ws, wp);
390 if (c) {
391 /* trailing \ is lost */
392 *wp++ = CHAR;
393 *wp++ = '\\';
394 *wp++ = CHAR;
395 *wp++ = c;
396 }
397 break;
398 }
399 break;
400 case '$':
401 c = getsc();
402 SubstS:
403 if (c == '(') /*)*/ {
404 c = getsc();
405 if (c == '(') /*)*/ {
406 *wp++ = EXPRSUB;
407 PUSH_STATE(SASPAREN);
408 statep->nparen = 2;
409 PUSH_SRETRACE();
410 *retrace_info->xp++ = '(';
411 } else {
412 ungetsc(c);
413 subst_command:
414 c = COMSUB;
415 subst_command2:
416 sp = yyrecursive(c);
417 cz = strlen(sp) + 1;
418 XcheckN(ws, wp, cz);
419 *wp++ = c;
420 memcpy(wp, sp, cz);
421 wp += cz;
422 }
423 } else if (c == '{') /*}*/ {
424 c = getsc();
425 if (ctype(c, C_IFSWS)) {
426 /*
427 * non-subenvironment
428 * "command" substitution
429 */
430 c = FUNSUB;
431 goto subst_command2;
432 }
433 ungetsc(c);
434 *wp++ = OSUBST;
435 *wp++ = '{'; /*}*/
436 wp = get_brace_var(&ws, wp);
437 c = getsc();
438 /* allow :# and :% (ksh88 compat) */
439 if (c == ':') {
440 *wp++ = CHAR;
441 *wp++ = c;
442 c = getsc();
443 if (c == ':') {
444 *wp++ = CHAR;
445 *wp++ = '0';
446 *wp++ = ADELIM;
447 *wp++ = ':';
448 PUSH_STATE(SBRACE);
449 PUSH_STATE(SADELIM);
450 statep->ls_adelim.delimiter = ':';
451 statep->ls_adelim.num = 1;
452 statep->nparen = 0;
453 break;
454 } else if (ksh_isdigit(c) ||
455 c == '('/*)*/ || c == ' ' ||
456 /*XXX what else? */
457 c == '$') {
458 /* substring subst. */
459 if (c != ' ') {
460 *wp++ = CHAR;
461 *wp++ = ' ';
462 }
463 ungetsc(c);
464 PUSH_STATE(SBRACE);
465 PUSH_STATE(SADELIM);
466 statep->ls_adelim.delimiter = ':';
467 statep->ls_adelim.num = 2;
468 statep->nparen = 0;
469 break;
470 }
471 } else if (c == '/') {
472 *wp++ = CHAR;
473 *wp++ = c;
474 if ((c = getsc()) == '/') {
475 *wp++ = ADELIM;
476 *wp++ = c;
477 } else
478 ungetsc(c);
479 PUSH_STATE(SBRACE);
480 PUSH_STATE(SADELIM);
481 statep->ls_adelim.delimiter = '/';
482 statep->ls_adelim.num = 1;
483 statep->nparen = 0;
484 break;
485 }
486 /*
487 * If this is a trim operation,
488 * treat (,|,) specially in STBRACE.
489 */
490 if (ctype(c, C_SUBOP2)) {
491 ungetsc(c);
492 if (Flag(FSH))
493 PUSH_STATE(STBRACEBOURNE);
494 else
495 PUSH_STATE(STBRACEKORN);
496 } else {
497 ungetsc(c);
498 if (state == SDQUOTE)
499 PUSH_STATE(SQBRACE);
500 else
501 PUSH_STATE(SBRACE);
502 }
503 } else if (ksh_isalphx(c)) {
504 *wp++ = OSUBST;
505 *wp++ = 'X';
506 do {
507 Xcheck(ws, wp);
508 *wp++ = c;
509 c = getsc();
510 } while (ksh_isalnux(c));
511 *wp++ = '\0';
512 *wp++ = CSUBST;
513 *wp++ = 'X';
514 ungetsc(c);
515 } else if (ctype(c, C_VAR1 | C_DIGIT)) {
516 Xcheck(ws, wp);
517 *wp++ = OSUBST;
518 *wp++ = 'X';
519 *wp++ = c;
520 *wp++ = '\0';
521 *wp++ = CSUBST;
522 *wp++ = 'X';
523 } else {
524 *wp++ = CHAR;
525 *wp++ = '$';
526 ungetsc(c);
527 }
528 break;
529 case '`':
530 subst_gravis:
531 PUSH_STATE(SBQUOTE);
532 *wp++ = COMSUB;
533 /*
534 * Need to know if we are inside double quotes
535 * since sh/AT&T-ksh translate the \" to " in
536 * "`...\"...`".
537 * This is not done in POSIX mode (section
538 * 3.2.3, Double Quotes: "The backquote shall
539 * retain its special meaning introducing the
540 * other form of command substitution (see
541 * 3.6.3). The portion of the quoted string
542 * from the initial backquote and the
543 * characters up to the next backquote that
544 * is not preceded by a backslash (having
545 * escape characters removed) defines that
546 * command whose output replaces `...` when
547 * the word is expanded."
548 * Section 3.6.3, Command Substitution:
549 * "Within the backquoted style of command
550 * substitution, backslash shall retain its
551 * literal meaning, except when followed by
552 * $ ` \.").
553 */
554 statep->ls_bool = false;
555 s2 = statep;
556 base = state_info.base;
557 while (/* CONSTCOND */ 1) {
558 for (; s2 != base; s2--) {
559 if (s2->type == SDQUOTE) {
560 statep->ls_bool = true;
561 break;
562 }
563 }
564 if (s2 != base)
565 break;
566 if (!(s2 = s2->ls_base))
567 break;
568 base = s2-- - STATE_BSIZE;
569 }
570 break;
571 case QCHAR:
572 if (cf & LQCHAR) {
573 *wp++ = QCHAR;
574 *wp++ = getsc();
575 break;
576 }
577 /* FALLTHROUGH */
578 default:
579 store_char:
580 *wp++ = CHAR;
581 *wp++ = c;
582 }
583 break;
584
585 case SEQUOTE:
586 if (c == '\'') {
587 POP_STATE();
588 *wp++ = CQUOTE;
589 ignore_backslash_newline--;
590 } else if (c == '\\') {
591 if ((c2 = unbksl(true, s_get, s_put)) == -1)
592 c2 = s_get();
593 if (c2 == 0)
594 statep->ls_bool = true;
595 if (!statep->ls_bool) {
596 char ts[4];
597
598 if ((unsigned int)c2 < 0x100) {
599 *wp++ = QCHAR;
600 *wp++ = c2;
601 } else {
602 cz = utf_wctomb(ts, c2 - 0x100);
603 ts[cz] = 0;
604 for (cz = 0; ts[cz]; ++cz) {
605 *wp++ = QCHAR;
606 *wp++ = ts[cz];
607 }
608 }
609 }
610 } else if (!statep->ls_bool) {
611 *wp++ = QCHAR;
612 *wp++ = c;
613 }
614 break;
615
616 case SSQUOTE:
617 if (c == '\'') {
618 POP_STATE();
619 *wp++ = CQUOTE;
620 ignore_backslash_newline--;
621 } else {
622 *wp++ = QCHAR;
623 *wp++ = c;
624 }
625 break;
626
627 case SDQUOTE:
628 if (c == '"') {
629 POP_STATE();
630 *wp++ = CQUOTE;
631 } else
632 goto Subst;
633 break;
634
635 /* $(( ... )) */
636 case SASPAREN:
637 if (c == '(')
638 statep->nparen++;
639 else if (c == ')') {
640 statep->nparen--;
641 if (statep->nparen == 1) {
642 /* end of EXPRSUB */
643 POP_SRETRACE();
644 POP_STATE();
645
646 if ((c2 = getsc()) == /*(*/ ')') {
647 cz = strlen(sp) - 2;
648 XcheckN(ws, wp, cz);
649 memcpy(wp, sp + 1, cz);
650 wp += cz;
651 afree(sp, ATEMP);
652 *wp++ = '\0';
653 break;
654 } else {
655 Source *s;
656
657 ungetsc(c2);
658 /*
659 * mismatched parenthesis -
660 * assume we were really
661 * parsing a $(...) expression
662 */
663 --wp;
664 s = pushs(SREREAD,
665 source->areap);
666 s->start = s->str =
667 s->u.freeme = sp;
668 s->next = source;
669 source = s;
670 goto subst_command;
671 }
672 }
673 }
674 /* reuse existing state machine */
675 goto Sbase2;
676
677 case SQBRACE:
678 if (c == '\\') {
679 /*
680 * perform POSIX "quote removal" if the back-
681 * slash is "special", i.e. same cases as the
682 * {case '\\':} in Subst: plus closing brace;
683 * in mksh code "quote removal" on '\c' means
684 * write QCHAR+c, otherwise CHAR+\+CHAR+c are
685 * emitted (in heredocquote:)
686 */
687 if ((c = getsc()) == '"' || c == '\\' ||
688 c == '$' || c == '`' || c == /*{*/'}')
689 goto store_qchar;
690 goto heredocquote;
691 }
692 goto common_SQBRACE;
693
694 case SBRACE:
695 if (c == '\'')
696 goto open_ssquote;
697 else if (c == '\\')
698 goto getsc_qchar;
699 common_SQBRACE:
700 if (c == '"')
701 goto open_sdquote;
702 else if (c == '$')
703 goto subst_dollar_ex;
704 else if (c == '`')
705 goto subst_gravis;
706 else if (c != /*{*/ '}')
707 goto store_char;
708 POP_STATE();
709 *wp++ = CSUBST;
710 *wp++ = /*{*/ '}';
711 break;
712
713 /* Same as SBASE, except (,|,) treated specially */
714 case STBRACEKORN:
715 if (c == '|')
716 *wp++ = SPAT;
717 else if (c == '(') {
718 *wp++ = OPAT;
719 /* simile for @ */
720 *wp++ = ' ';
721 PUSH_STATE(SPATTERN);
722 } else /* FALLTHROUGH */
723 case STBRACEBOURNE:
724 if (c == /*{*/ '}') {
725 POP_STATE();
726 *wp++ = CSUBST;
727 *wp++ = /*{*/ '}';
728 } else
729 goto Sbase1;
730 break;
731
732 case SBQUOTE:
733 if (c == '`') {
734 *wp++ = 0;
735 POP_STATE();
736 } else if (c == '\\') {
737 switch (c = getsc()) {
738 case 0:
739 /* trailing \ is lost */
740 break;
741 case '\\':
742 case '$': case '`':
743 *wp++ = c;
744 break;
745 case '"':
746 if (statep->ls_bool) {
747 *wp++ = c;
748 break;
749 }
750 /* FALLTHROUGH */
751 default:
752 *wp++ = '\\';
753 *wp++ = c;
754 break;
755 }
756 } else
757 *wp++ = c;
758 break;
759
760 /* ONEWORD */
761 case SWORD:
762 goto Subst;
763
764 /* LETEXPR: (( ... )) */
765 case SLETPAREN:
766 if (c == /*(*/ ')') {
767 if (statep->nparen > 0)
768 --statep->nparen;
769 else if ((c2 = getsc()) == /*(*/ ')') {
770 c = 0;
771 *wp++ = CQUOTE;
772 goto Done;
773 } else {
774 Source *s;
775
776 ungetsc(c2);
777 /*
778 * mismatched parenthesis -
779 * assume we were really
780 * parsing a (...) expression
781 */
782 *wp = EOS;
783 sp = Xstring(ws, wp);
784 dp = wdstrip(sp, WDS_KEEPQ);
785 s = pushs(SREREAD, source->areap);
786 s->start = s->str = s->u.freeme = dp;
787 s->next = source;
788 source = s;
789 return ('('/*)*/);
790 }
791 } else if (c == '(')
792 /*
793 * parentheses inside quotes and
794 * backslashes are lost, but AT&T ksh
795 * doesn't count them either
796 */
797 ++statep->nparen;
798 goto Sbase2;
799
800 /* <<, <<-, <<< delimiter */
801 case SHEREDELIM:
802 /*
803 * here delimiters need a special case since
804 * $ and `...` are not to be treated specially
805 */
806 switch (c) {
807 case '\\':
808 if ((c = getsc())) {
809 /* trailing \ is lost */
810 *wp++ = QCHAR;
811 *wp++ = c;
812 }
813 break;
814 case '\'':
815 goto open_ssquote;
816 case '$':
817 if ((c2 = getsc()) == '\'') {
818 open_sequote:
819 *wp++ = OQUOTE;
820 ignore_backslash_newline++;
821 PUSH_STATE(SEQUOTE);
822 statep->ls_bool = false;
823 break;
824 } else if (c2 == '"') {
825 /* FALLTHROUGH */
826 case '"':
827 state = statep->type = SHEREDQUOTE;
828 PUSH_SRETRACE();
829 break;
830 }
831 ungetsc(c2);
832 /* FALLTHROUGH */
833 default:
834 *wp++ = CHAR;
835 *wp++ = c;
836 }
837 break;
838
839 /* " in <<, <<-, <<< delimiter */
840 case SHEREDQUOTE:
841 if (c != '"')
842 goto Subst;
843 POP_SRETRACE();
844 dp = strnul(sp) - 1;
845 /* remove the trailing double quote */
846 *dp = '\0';
847 /* store the quoted string */
848 *wp++ = OQUOTE;
849 XcheckN(ws, wp, (dp - sp));
850 dp = sp;
851 while ((c = *dp++)) {
852 if (c == '\\') {
853 switch ((c = *dp++)) {
854 case '\\':
855 case '"':
856 case '$':
857 case '`':
858 break;
859 default:
860 *wp++ = CHAR;
861 *wp++ = '\\';
862 break;
863 }
864 }
865 *wp++ = CHAR;
866 *wp++ = c;
867 }
868 afree(sp, ATEMP);
869 *wp++ = CQUOTE;
870 state = statep->type = SHEREDELIM;
871 break;
872
873 /* in *(...|...) pattern (*+?@!) */
874 case SPATTERN:
875 if (c == /*(*/ ')') {
876 *wp++ = CPAT;
877 POP_STATE();
878 } else if (c == '|') {
879 *wp++ = SPAT;
880 } else if (c == '(') {
881 *wp++ = OPAT;
882 /* simile for @ */
883 *wp++ = ' ';
884 PUSH_STATE(SPATTERN);
885 } else
886 goto Sbase1;
887 break;
888 }
889 }
890 Done:
891 Xcheck(ws, wp);
892 if (statep != &states[1])
893 /* XXX figure out what is missing */
894 yyerror("no closing quote\n");
895
896 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */
897 if (state == SHEREDELIM)
898 state = SBASE;
899
900 dp = Xstring(ws, wp);
901 if ((c == '<' || c == '>' || c == '&') && state == SBASE) {
902 struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
903
904 if (Xlength(ws, wp) == 0)
905 iop->unit = c == '<' ? 0 : 1;
906 else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) {
907 if (dp[c2] != CHAR)
908 goto no_iop;
909 if (!ksh_isdigit(dp[c2 + 1]))
910 goto no_iop;
911 iop->unit = (iop->unit * 10) + dp[c2 + 1] - '0';
912 }
913
914 if (iop->unit >= FDBASE)
915 goto no_iop;
916
917 if (c == '&') {
918 if ((c2 = getsc()) != '>') {
919 ungetsc(c2);
920 goto no_iop;
921 }
922 c = c2;
923 iop->flag = IOBASH;
924 } else
925 iop->flag = 0;
926
927 c2 = getsc();
928 /* <<, >>, <> are ok, >< is not */
929 if (c == c2 || (c == '<' && c2 == '>')) {
930 iop->flag |= c == c2 ?
931 (c == '>' ? IOCAT : IOHERE) : IORDWR;
932 if (iop->flag == IOHERE) {
933 if ((c2 = getsc()) == '-') {
934 iop->flag |= IOSKIP;
935 c2 = getsc();
936 } else if (c2 == '<')
937 iop->flag |= IOHERESTR;
938 ungetsc(c2);
939 if (c2 == '\n')
940 iop->flag |= IONDELIM;
941 }
942 } else if (c2 == '&')
943 iop->flag |= IODUP | (c == '<' ? IORDUP : 0);
944 else {
945 iop->flag |= c == '>' ? IOWRITE : IOREAD;
946 if (c == '>' && c2 == '|')
947 iop->flag |= IOCLOB;
948 else
949 ungetsc(c2);
950 }
951
952 iop->name = NULL;
953 iop->delim = NULL;
954 iop->heredoc = NULL;
955 /* free word */
956 Xfree(ws, wp);
957 yylval.iop = iop;
958 return (REDIR);
959 no_iop:
960 afree(iop, ATEMP);
961 }
962
963 if (wp == dp && state == SBASE) {
964 /* free word */
965 Xfree(ws, wp);
966 /* no word, process LEX1 character */
967 if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) {
968 if ((c2 = getsc()) == c)
969 c = (c == ';') ? BREAK :
970 (c == '|') ? LOGOR :
971 (c == '&') ? LOGAND :
972 /* c == '(' ) */ MDPAREN;
973 else if (c == '|' && c2 == '&')
974 c = COPROC;
975 else if (c == ';' && c2 == '|')
976 c = BRKEV;
977 else if (c == ';' && c2 == '&')
978 c = BRKFT;
979 else
980 ungetsc(c2);
981 #ifndef MKSH_SMALL
982 if (c == BREAK) {
983 if ((c2 = getsc()) == '&')
984 c = BRKEV;
985 else
986 ungetsc(c2);
987 }
988 #endif
989 } else if (c == '\n') {
990 gethere(false);
991 if (cf & CONTIN)
992 goto Again;
993 } else if (c == '\0')
994 /* need here strings at EOF */
995 gethere(true);
996 return (c);
997 }
998
999 /* terminate word */
1000 *wp++ = EOS;
1001 yylval.cp = Xclose(ws, wp);
1002 if (state == SWORD || state == SLETPAREN
1003 /* XXX ONEWORD? */)
1004 return (LWORD);
1005
1006 /* unget terminator */
1007 ungetsc(c);
1008
1009 /*
1010 * note: the alias-vs-function code below depends on several
1011 * interna: starting from here, source->str is not modified;
1012 * the way getsc() and ungetsc() operate; etc.
1013 */
1014
1015 /* copy word to unprefixed string ident */
1016 sp = yylval.cp;
1017 dp = ident;
1018 if ((cf & HEREDELIM) && (sp[1] == '<'))
1019 while ((dp - ident) < IDENT) {
1020 if ((c = *sp++) == CHAR)
1021 *dp++ = *sp++;
1022 else if ((c != OQUOTE) && (c != CQUOTE))
1023 break;
1024 }
1025 else
1026 while ((dp - ident) < IDENT && (c = *sp++) == CHAR)
1027 *dp++ = *sp++;
1028 /* Make sure the ident array stays '\0' padded */
1029 memset(dp, 0, (ident + IDENT) - dp + 1);
1030 if (c != EOS)
1031 /* word is not unquoted */
1032 *ident = '\0';
1033
1034 if (*ident != '\0' && (cf & (KEYWORD | ALIAS))) {
1035 struct tbl *p;
1036 uint32_t h = hash(ident);
1037
1038 if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
1039 (!(cf & ESACONLY) || p->val.i == ESAC ||
1040 p->val.i == /*{*/ '}')) {
1041 afree(yylval.cp, ATEMP);
1042 return (p->val.i);
1043 }
1044 if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
1045 (p->flag & ISSET)) {
1046 /*
1047 * this still points to the same character as the
1048 * ungetsc'd terminator from above
1049 */
1050 const char *cp = source->str;
1051
1052 /* prefer POSIX but not Korn functions over aliases */
1053 while (*cp == ' ' || *cp == '\t')
1054 /*
1055 * this is like getsc() without skipping
1056 * over Source boundaries (including not
1057 * parsing ungetsc'd characters that got
1058 * pushed into an SREREAD) which is what
1059 * we want here anyway: find out whether
1060 * the alias name is followed by a POSIX
1061 * function definition (only the opening
1062 * parenthesis is checked though)
1063 */
1064 ++cp;
1065 /* prefer functions over aliases */
1066 if (cp[0] != '(' || cp[1] != ')') {
1067 Source *s = source;
1068
1069 while (s && (s->flags & SF_HASALIAS))
1070 if (s->u.tblp == p)
1071 return (LWORD);
1072 else
1073 s = s->next;
1074 /* push alias expansion */
1075 s = pushs(SALIAS, source->areap);
1076 s->start = s->str = p->val.s;
1077 s->u.tblp = p;
1078 s->flags |= SF_HASALIAS;
1079 s->next = source;
1080 if (source->type == SEOF) {
1081 /* prevent infinite recursion at EOS */
1082 source->u.tblp = p;
1083 source->flags |= SF_HASALIAS;
1084 }
1085 source = s;
1086 afree(yylval.cp, ATEMP);
1087 goto Again;
1088 }
1089 }
1090 }
1091
1092 return (LWORD);
1093 }
1094
1095 static void
gethere(bool iseof)1096 gethere(bool iseof)
1097 {
1098 struct ioword **p;
1099
1100 for (p = heres; p < herep; p++)
1101 if (iseof && !((*p)->flag & IOHERESTR))
1102 /* only here strings at EOF */
1103 return;
1104 else
1105 readhere(*p);
1106 herep = heres;
1107 }
1108
1109 /*
1110 * read "<<word" text into temp file
1111 */
1112
1113 static void
readhere(struct ioword * iop)1114 readhere(struct ioword *iop)
1115 {
1116 int c;
1117 const char *eof, *eofp;
1118 XString xs;
1119 char *xp;
1120 int xpos;
1121
1122 if (iop->flag & IOHERESTR) {
1123 /* process the here string */
1124 iop->heredoc = xp = evalstr(iop->delim, DOBLANK);
1125 xpos = strlen(xp) - 1;
1126 memmove(xp, xp + 1, xpos);
1127 xp[xpos] = '\n';
1128 return;
1129 }
1130
1131 eof = iop->flag & IONDELIM ? "<<" : evalstr(iop->delim, 0);
1132
1133 if (!(iop->flag & IOEVAL))
1134 ignore_backslash_newline++;
1135
1136 Xinit(xs, xp, 256, ATEMP);
1137
1138 heredoc_read_line:
1139 /* beginning of line */
1140 eofp = eof;
1141 xpos = Xsavepos(xs, xp);
1142 if (iop->flag & IOSKIP) {
1143 /* skip over leading tabs */
1144 while ((c = getsc()) == '\t')
1145 /* nothing */;
1146 goto heredoc_parse_char;
1147 }
1148 heredoc_read_char:
1149 c = getsc();
1150 heredoc_parse_char:
1151 /* compare with here document marker */
1152 if (!*eofp) {
1153 /* end of here document marker, what to do? */
1154 switch (c) {
1155 case /*(*/ ')':
1156 if (!subshell_nesting_type)
1157 /*-
1158 * not allowed outside $(...) or (...)
1159 * => mismatch
1160 */
1161 break;
1162 /* allow $(...) or (...) to close here */
1163 ungetsc(/*(*/ ')');
1164 /* FALLTHROUGH */
1165 case 0:
1166 /*
1167 * Allow EOF here to commands without trailing
1168 * newlines (mksh -c '...') will work as well.
1169 */
1170 case '\n':
1171 /* Newline terminates here document marker */
1172 goto heredoc_found_terminator;
1173 }
1174 } else if (c == *eofp++)
1175 /* store; then read and compare next character */
1176 goto heredoc_store_and_loop;
1177 /* nope, mismatch; read until end of line */
1178 while (c != '\n') {
1179 if (!c)
1180 /* oops, reached EOF */
1181 yyerror("%s '%s' unclosed\n", "here document", eof);
1182 /* store character */
1183 Xcheck(xs, xp);
1184 Xput(xs, xp, c);
1185 /* read next character */
1186 c = getsc();
1187 }
1188 /* we read a newline as last character */
1189 heredoc_store_and_loop:
1190 /* store character */
1191 Xcheck(xs, xp);
1192 Xput(xs, xp, c);
1193 if (c == '\n')
1194 goto heredoc_read_line;
1195 goto heredoc_read_char;
1196
1197 heredoc_found_terminator:
1198 /* jump back to saved beginning of line */
1199 xp = Xrestpos(xs, xp, xpos);
1200 /* terminate, close and store */
1201 Xput(xs, xp, '\0');
1202 iop->heredoc = Xclose(xs, xp);
1203
1204 if (!(iop->flag & IOEVAL))
1205 ignore_backslash_newline--;
1206 }
1207
1208 void
yyerror(const char * fmt,...)1209 yyerror(const char *fmt, ...)
1210 {
1211 va_list va;
1212
1213 /* pop aliases and re-reads */
1214 while (source->type == SALIAS || source->type == SREREAD)
1215 source = source->next;
1216 /* zap pending input */
1217 source->str = null;
1218
1219 error_prefix(true);
1220 va_start(va, fmt);
1221 shf_vfprintf(shl_out, fmt, va);
1222 va_end(va);
1223 errorfz();
1224 }
1225
1226 /*
1227 * input for yylex with alias expansion
1228 */
1229
1230 Source *
pushs(int type,Area * areap)1231 pushs(int type, Area *areap)
1232 {
1233 Source *s;
1234
1235 s = alloc(sizeof(Source), areap);
1236 memset(s, 0, sizeof(Source));
1237 s->type = type;
1238 s->str = null;
1239 s->areap = areap;
1240 if (type == SFILE || type == SSTDIN)
1241 XinitN(s->xs, 256, s->areap);
1242 return (s);
1243 }
1244
1245 static int
getsc_uu(void)1246 getsc_uu(void)
1247 {
1248 Source *s = source;
1249 int c;
1250
1251 while ((c = *s->str++) == 0) {
1252 /* return 0 for EOF by default */
1253 s->str = NULL;
1254 switch (s->type) {
1255 case SEOF:
1256 s->str = null;
1257 return (0);
1258
1259 case SSTDIN:
1260 case SFILE:
1261 getsc_line(s);
1262 break;
1263
1264 case SWSTR:
1265 break;
1266
1267 case SSTRING:
1268 case SSTRINGCMDLINE:
1269 break;
1270
1271 case SWORDS:
1272 s->start = s->str = *s->u.strv++;
1273 s->type = SWORDSEP;
1274 break;
1275
1276 case SWORDSEP:
1277 if (*s->u.strv == NULL) {
1278 s->start = s->str = "\n";
1279 s->type = SEOF;
1280 } else {
1281 s->start = s->str = " ";
1282 s->type = SWORDS;
1283 }
1284 break;
1285
1286 case SALIAS:
1287 if (s->flags & SF_ALIASEND) {
1288 /* pass on an unused SF_ALIAS flag */
1289 source = s->next;
1290 source->flags |= s->flags & SF_ALIAS;
1291 s = source;
1292 } else if (*s->u.tblp->val.s &&
1293 (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) {
1294 /* pop source stack */
1295 source = s = s->next;
1296 /*
1297 * Note that this alias ended with a
1298 * space, enabling alias expansion on
1299 * the following word.
1300 */
1301 s->flags |= SF_ALIAS;
1302 } else {
1303 /*
1304 * At this point, we need to keep the current
1305 * alias in the source list so recursive
1306 * aliases can be detected and we also need to
1307 * return the next character. Do this by
1308 * temporarily popping the alias to get the
1309 * next character and then put it back in the
1310 * source list with the SF_ALIASEND flag set.
1311 */
1312 /* pop source stack */
1313 source = s->next;
1314 source->flags |= s->flags & SF_ALIAS;
1315 c = getsc_uu();
1316 if (c) {
1317 s->flags |= SF_ALIASEND;
1318 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1319 s->start = s->str = s->ugbuf;
1320 s->next = source;
1321 source = s;
1322 } else {
1323 s = source;
1324 /* avoid reading EOF twice */
1325 s->str = NULL;
1326 break;
1327 }
1328 }
1329 continue;
1330
1331 case SREREAD:
1332 if (s->start != s->ugbuf)
1333 /* yuck */
1334 afree(s->u.freeme, ATEMP);
1335 source = s = s->next;
1336 continue;
1337 }
1338 if (s->str == NULL) {
1339 s->type = SEOF;
1340 s->start = s->str = null;
1341 return ('\0');
1342 }
1343 if (s->flags & SF_ECHO) {
1344 shf_puts(s->str, shl_out);
1345 shf_flush(shl_out);
1346 }
1347 }
1348 return (c);
1349 }
1350
1351 static void
getsc_line(Source * s)1352 getsc_line(Source *s)
1353 {
1354 char *xp = Xstring(s->xs, xp), *cp;
1355 bool interactive = Flag(FTALKING) && s->type == SSTDIN;
1356 bool have_tty = tobool(interactive && (s->flags & SF_TTY));
1357
1358 /* Done here to ensure nothing odd happens when a timeout occurs */
1359 XcheckN(s->xs, xp, LINE);
1360 *xp = '\0';
1361 s->start = s->str = xp;
1362
1363 if (have_tty && ksh_tmout) {
1364 ksh_tmout_state = TMOUT_READING;
1365 alarm(ksh_tmout);
1366 }
1367 if (interactive)
1368 change_winsz();
1369 #ifndef MKSH_NO_CMDLINE_EDITING
1370 if (have_tty && (
1371 #if !MKSH_S_NOVI
1372 Flag(FVI) ||
1373 #endif
1374 Flag(FEMACS) || Flag(FGMACS))) {
1375 int nread;
1376
1377 nread = x_read(xp, LINE);
1378 if (nread < 0)
1379 /* read error */
1380 nread = 0;
1381 xp[nread] = '\0';
1382 xp += nread;
1383 } else
1384 #endif
1385 {
1386 if (interactive)
1387 pprompt(prompt, 0);
1388 else
1389 s->line++;
1390
1391 while (/* CONSTCOND */ 1) {
1392 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1393
1394 if (!p && shf_error(s->u.shf) &&
1395 shf_errno(s->u.shf) == EINTR) {
1396 shf_clearerr(s->u.shf);
1397 if (trap)
1398 runtraps(0);
1399 continue;
1400 }
1401 if (!p || (xp = p, xp[-1] == '\n'))
1402 break;
1403 /* double buffer size */
1404 /* move past NUL so doubling works... */
1405 xp++;
1406 XcheckN(s->xs, xp, Xlength(s->xs, xp));
1407 /* ...and move back again */
1408 xp--;
1409 }
1410 /*
1411 * flush any unwanted input so other programs/builtins
1412 * can read it. Not very optimal, but less error prone
1413 * than flushing else where, dealing with redirections,
1414 * etc.
1415 * TODO: reduce size of shf buffer (~128?) if SSTDIN
1416 */
1417 if (s->type == SSTDIN)
1418 shf_flush(s->u.shf);
1419 }
1420 /*
1421 * XXX: temporary kludge to restore source after a
1422 * trap may have been executed.
1423 */
1424 source = s;
1425 if (have_tty && ksh_tmout) {
1426 ksh_tmout_state = TMOUT_EXECUTING;
1427 alarm(0);
1428 }
1429 cp = Xstring(s->xs, xp);
1430 s->start = s->str = cp;
1431 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1432 /* Note: if input is all nulls, this is not eof */
1433 if (Xlength(s->xs, xp) == 0) {
1434 /* EOF */
1435 if (s->type == SFILE)
1436 shf_fdclose(s->u.shf);
1437 s->str = NULL;
1438 } else if (interactive && *s->str &&
1439 (cur_prompt != PS1 || !ctype(*s->str, C_IFS | C_IFSWS))) {
1440 histsave(&s->line, s->str, true, true);
1441 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
1442 } else if (interactive && cur_prompt == PS1) {
1443 cp = Xstring(s->xs, xp);
1444 while (*cp && ctype(*cp, C_IFSWS))
1445 ++cp;
1446 if (!*cp)
1447 histsync();
1448 #endif
1449 }
1450 if (interactive)
1451 set_prompt(PS2, NULL);
1452 }
1453
1454 void
set_prompt(int to,Source * s)1455 set_prompt(int to, Source *s)
1456 {
1457 cur_prompt = to;
1458
1459 switch (to) {
1460 /* command */
1461 case PS1:
1462 /*
1463 * Substitute ! and !! here, before substitutions are done
1464 * so ! in expanded variables are not expanded.
1465 * NOTE: this is not what AT&T ksh does (it does it after
1466 * substitutions, POSIX doesn't say which is to be done.
1467 */
1468 {
1469 struct shf *shf;
1470 char * volatile ps1;
1471 Area *saved_atemp;
1472
1473 ps1 = str_val(global("PS1"));
1474 shf = shf_sopen(NULL, strlen(ps1) * 2,
1475 SHF_WR | SHF_DYNAMIC, NULL);
1476 while (*ps1)
1477 if (*ps1 != '!' || *++ps1 == '!')
1478 shf_putchar(*ps1++, shf);
1479 else
1480 shf_fprintf(shf, "%d",
1481 s ? s->line + 1 : 0);
1482 ps1 = shf_sclose(shf);
1483 saved_atemp = ATEMP;
1484 newenv(E_ERRH);
1485 if (kshsetjmp(e->jbuf)) {
1486 prompt = safe_prompt;
1487 /*
1488 * Don't print an error - assume it has already
1489 * been printed. Reason is we may have forked
1490 * to run a command and the child may be
1491 * unwinding its stack through this code as it
1492 * exits.
1493 */
1494 } else {
1495 char *cp = substitute(ps1, 0);
1496 strdupx(prompt, cp, saved_atemp);
1497 }
1498 quitenv(NULL);
1499 }
1500 break;
1501 /* command continuation */
1502 case PS2:
1503 prompt = str_val(global("PS2"));
1504 break;
1505 }
1506 }
1507
1508 static int
dopprompt(const char * cp,int ntruncate,bool doprint)1509 dopprompt(const char *cp, int ntruncate, bool doprint)
1510 {
1511 int columns = 0, lines = 0;
1512 bool indelimit = false;
1513 char delimiter = 0;
1514
1515 /*
1516 * Undocumented AT&T ksh feature:
1517 * If the second char in the prompt string is \r then the first
1518 * char is taken to be a non-printing delimiter and any chars
1519 * between two instances of the delimiter are not considered to
1520 * be part of the prompt length
1521 */
1522 if (*cp && cp[1] == '\r') {
1523 delimiter = *cp;
1524 cp += 2;
1525 }
1526 for (; *cp; cp++) {
1527 if (indelimit && *cp != delimiter)
1528 ;
1529 else if (*cp == '\n' || *cp == '\r') {
1530 lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
1531 columns = 0;
1532 } else if (*cp == '\t') {
1533 columns = (columns | 7) + 1;
1534 } else if (*cp == '\b') {
1535 if (columns > 0)
1536 columns--;
1537 } else if (*cp == delimiter)
1538 indelimit = !indelimit;
1539 else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
1540 const char *cp2;
1541 columns += utf_widthadj(cp, &cp2);
1542 if (doprint && (indelimit ||
1543 (ntruncate < (x_cols * lines + columns))))
1544 shf_write(cp, cp2 - cp, shl_out);
1545 cp = cp2 - /* loop increment */ 1;
1546 continue;
1547 } else
1548 columns++;
1549 if (doprint && (*cp != delimiter) &&
1550 (indelimit || (ntruncate < (x_cols * lines + columns))))
1551 shf_putc(*cp, shl_out);
1552 }
1553 if (doprint)
1554 shf_flush(shl_out);
1555 return (x_cols * lines + columns);
1556 }
1557
1558
1559 void
pprompt(const char * cp,int ntruncate)1560 pprompt(const char *cp, int ntruncate)
1561 {
1562 dopprompt(cp, ntruncate, true);
1563 }
1564
1565 int
promptlen(const char * cp)1566 promptlen(const char *cp)
1567 {
1568 return (dopprompt(cp, 0, false));
1569 }
1570
1571 /*
1572 * Read the variable part of a ${...} expression (i.e. up to but not
1573 * including the :[-+?=#%] or close-brace).
1574 */
1575 static char *
get_brace_var(XString * wsp,char * wp)1576 get_brace_var(XString *wsp, char *wp)
1577 {
1578 char c;
1579 enum parse_state {
1580 PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1581 PS_NUMBER, PS_VAR1
1582 } state = PS_INITIAL;
1583
1584 while (/* CONSTCOND */ 1) {
1585 c = getsc();
1586 /* State machine to figure out where the variable part ends. */
1587 switch (state) {
1588 case PS_INITIAL:
1589 if (c == '#' || c == '!' || c == '%') {
1590 state = PS_SAW_HASH;
1591 break;
1592 }
1593 /* FALLTHROUGH */
1594 case PS_SAW_HASH:
1595 if (ksh_isalphx(c))
1596 state = PS_IDENT;
1597 else if (ksh_isdigit(c))
1598 state = PS_NUMBER;
1599 else if (c == '#') {
1600 if (state == PS_SAW_HASH) {
1601 char c2;
1602
1603 c2 = getsc();
1604 ungetsc(c2);
1605 if (c2 != /*{*/ '}') {
1606 ungetsc(c);
1607 goto out;
1608 }
1609 }
1610 state = PS_VAR1;
1611 } else if (ctype(c, C_VAR1))
1612 state = PS_VAR1;
1613 else
1614 goto out;
1615 break;
1616 case PS_IDENT:
1617 if (!ksh_isalnux(c)) {
1618 if (c == '[') {
1619 char *tmp, *p;
1620
1621 if (!arraysub(&tmp))
1622 yyerror("missing ]\n");
1623 *wp++ = c;
1624 for (p = tmp; *p; ) {
1625 Xcheck(*wsp, wp);
1626 *wp++ = *p++;
1627 }
1628 afree(tmp, ATEMP);
1629 /* the ] */
1630 c = getsc();
1631 }
1632 goto out;
1633 }
1634 break;
1635 case PS_NUMBER:
1636 if (!ksh_isdigit(c))
1637 goto out;
1638 break;
1639 case PS_VAR1:
1640 goto out;
1641 }
1642 Xcheck(*wsp, wp);
1643 *wp++ = c;
1644 }
1645 out:
1646 /* end of variable part */
1647 *wp++ = '\0';
1648 ungetsc(c);
1649 return (wp);
1650 }
1651
1652 /*
1653 * Save an array subscript - returns true if matching bracket found, false
1654 * if eof or newline was found.
1655 * (Returned string double null terminated)
1656 */
1657 static bool
arraysub(char ** strp)1658 arraysub(char **strp)
1659 {
1660 XString ws;
1661 char *wp, c;
1662 /* we are just past the initial [ */
1663 unsigned int depth = 1;
1664
1665 Xinit(ws, wp, 32, ATEMP);
1666
1667 do {
1668 c = getsc();
1669 Xcheck(ws, wp);
1670 *wp++ = c;
1671 if (c == '[')
1672 depth++;
1673 else if (c == ']')
1674 depth--;
1675 } while (depth > 0 && c && c != '\n');
1676
1677 *wp++ = '\0';
1678 *strp = Xclose(ws, wp);
1679
1680 return (tobool(depth == 0));
1681 }
1682
1683 /* Unget a char: handles case when we are already at the start of the buffer */
1684 static void
ungetsc(int c)1685 ungetsc(int c)
1686 {
1687 struct sretrace_info *rp = retrace_info;
1688
1689 if (backslash_skip)
1690 backslash_skip--;
1691 /* Don't unget EOF... */
1692 if (source->str == null && c == '\0')
1693 return;
1694 while (rp) {
1695 if (Xlength(rp->xs, rp->xp))
1696 rp->xp--;
1697 rp = rp->next;
1698 }
1699 ungetsc_i(c);
1700 }
1701 static void
ungetsc_i(int c)1702 ungetsc_i(int c)
1703 {
1704 if (source->str > source->start)
1705 source->str--;
1706 else {
1707 Source *s;
1708
1709 s = pushs(SREREAD, source->areap);
1710 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1711 s->start = s->str = s->ugbuf;
1712 s->next = source;
1713 source = s;
1714 }
1715 }
1716
1717
1718 /* Called to get a char that isn't a \newline sequence. */
1719 static int
getsc_bn(void)1720 getsc_bn(void)
1721 {
1722 int c, c2;
1723
1724 if (ignore_backslash_newline)
1725 return (o_getsc_u());
1726
1727 if (backslash_skip == 1) {
1728 backslash_skip = 2;
1729 return (o_getsc_u());
1730 }
1731
1732 backslash_skip = 0;
1733
1734 while (/* CONSTCOND */ 1) {
1735 c = o_getsc_u();
1736 if (c == '\\') {
1737 if ((c2 = o_getsc_u()) == '\n')
1738 /* ignore the \newline; get the next char... */
1739 continue;
1740 ungetsc_i(c2);
1741 backslash_skip = 1;
1742 }
1743 return (c);
1744 }
1745 }
1746
1747 void
yyskiputf8bom(void)1748 yyskiputf8bom(void)
1749 {
1750 int c;
1751
1752 if ((unsigned char)(c = o_getsc_u()) != 0xEF) {
1753 ungetsc_i(c);
1754 return;
1755 }
1756 if ((unsigned char)(c = o_getsc_u()) != 0xBB) {
1757 ungetsc_i(c);
1758 ungetsc_i(0xEF);
1759 return;
1760 }
1761 if ((unsigned char)(c = o_getsc_u()) != 0xBF) {
1762 ungetsc_i(c);
1763 ungetsc_i(0xBB);
1764 ungetsc_i(0xEF);
1765 return;
1766 }
1767 UTFMODE |= 8;
1768 }
1769
1770 static Lex_state *
push_state_i(State_info * si,Lex_state * old_end)1771 push_state_i(State_info *si, Lex_state *old_end)
1772 {
1773 Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP);
1774
1775 news[0].ls_base = old_end;
1776 si->base = &news[0];
1777 si->end = &news[STATE_BSIZE];
1778 return (&news[1]);
1779 }
1780
1781 static Lex_state *
pop_state_i(State_info * si,Lex_state * old_end)1782 pop_state_i(State_info *si, Lex_state *old_end)
1783 {
1784 Lex_state *old_base = si->base;
1785
1786 si->base = old_end->ls_base - STATE_BSIZE;
1787 si->end = old_end->ls_base;
1788
1789 afree(old_base, ATEMP);
1790
1791 return (si->base + STATE_BSIZE - 1);
1792 }
1793
1794 static int
s_get(void)1795 s_get(void)
1796 {
1797 return (getsc());
1798 }
1799
1800 static void
s_put(int c)1801 s_put(int c)
1802 {
1803 ungetsc(c);
1804 }
1805