1 /* $OpenBSD: lex.c,v 1.44 2008/07/03 17:52:08 otto Exp $ */
2
3 /*-
4 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
5 * Thorsten Glaser <tg@mirbsd.org>
6 *
7 * Provided that these terms and disclaimer and all copyright notices
8 * are retained or reproduced in an accompanying document, permission
9 * is granted to deal in this work without restriction, including un-
10 * limited rights to use, publicly perform, distribute, sell, modify,
11 * merge, give away, or sublicence.
12 *
13 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
14 * the utmost extent permitted by applicable law, neither express nor
15 * implied; without malicious intent or gross negligence. In no event
16 * may a licensor, author or contributor be held liable for indirect,
17 * direct, other damage, loss, or other issues arising in any way out
18 * of dealing in the work, even if advised of the possibility of such
19 * damage or existence of a defect, except proven that it results out
20 * of said person's immediate fault when using the work as intended.
21 */
22
23 #include "sh.h"
24
25 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.118 2010/07/25 11:35:41 tg Exp $");
26
27 /*
28 * states while lexing word
29 */
30 #define SBASE 0 /* outside any lexical constructs */
31 #define SWORD 1 /* implicit quoting for substitute() */
32 #define SLETPAREN 2 /* inside (( )), implicit quoting */
33 #define SSQUOTE 3 /* inside '' */
34 #define SDQUOTE 4 /* inside "" */
35 #define SEQUOTE 5 /* inside $'' */
36 #define SBRACE 6 /* inside ${} */
37 #define SQBRACE 7 /* inside "${}" */
38 #define SCSPAREN 8 /* inside $() */
39 #define SBQUOTE 9 /* inside `` */
40 #define SASPAREN 10 /* inside $(( )) */
41 #define SHEREDELIM 11 /* parsing <<,<<- delimiter */
42 #define SHEREDQUOTE 12 /* parsing " in <<,<<- delimiter */
43 #define SPATTERN 13 /* parsing *(...|...) pattern (*+?@!) */
44 #define STBRACE 14 /* parsing ${...[#%]...} */
45 #define SLETARRAY 15 /* inside =( ), just copy */
46 #define SADELIM 16 /* like SBASE, looking for delimiter */
47 #define SHERESTRING 17 /* parsing <<< string */
48
49 /* Structure to keep track of the lexing state and the various pieces of info
50 * needed for each particular state. */
51 typedef struct lex_state Lex_state;
52 struct lex_state {
53 int ls_state;
54 union {
55 /* $(...) */
56 struct scsparen_info {
57 int nparen; /* count open parenthesis */
58 int csstate; /* XXX remove */
59 #define ls_scsparen ls_info.u_scsparen
60 } u_scsparen;
61
62 /* $((...)) */
63 struct sasparen_info {
64 int nparen; /* count open parenthesis */
65 int start; /* marks start of $(( in output str */
66 #define ls_sasparen ls_info.u_sasparen
67 } u_sasparen;
68
69 /* ((...)) */
70 struct sletparen_info {
71 int nparen; /* count open parenthesis */
72 #define ls_sletparen ls_info.u_sletparen
73 } u_sletparen;
74
75 /* `...` */
76 struct sbquote_info {
77 int indquotes; /* true if in double quotes: "`...`" */
78 #define ls_sbquote ls_info.u_sbquote
79 } u_sbquote;
80
81 #ifndef MKSH_SMALL
82 /* =(...) */
83 struct sletarray_info {
84 int nparen; /* count open parentheses */
85 #define ls_sletarray ls_info.u_sletarray
86 } u_sletarray;
87 #endif
88
89 /* ADELIM */
90 struct sadelim_info {
91 unsigned char nparen; /* count open parentheses */
92 #define SADELIM_BASH 0
93 #define SADELIM_MAKE 1
94 unsigned char style;
95 unsigned char delimiter;
96 unsigned char num;
97 unsigned char flags; /* ofs. into sadelim_flags[] */
98 #define ls_sadelim ls_info.u_sadelim
99 } u_sadelim;
100
101 /* $'...' */
102 struct sequote_info {
103 bool got_NUL; /* ignore rest of string */
104 #define ls_sequote ls_info.u_sequote
105 } u_sequote;
106
107 Lex_state *base; /* used to point to next state block */
108 } ls_info;
109 };
110
111 typedef struct {
112 Lex_state *base;
113 Lex_state *end;
114 } State_info;
115
116 static void readhere(struct ioword *);
117 static int getsc__(void);
118 static void getsc_line(Source *);
119 static int getsc_bn(void);
120 static int s_get(void);
121 static void s_put(int);
122 static char *get_brace_var(XString *, char *);
123 static int arraysub(char **);
124 static const char *ungetsc(int);
125 static void gethere(bool);
126 static Lex_state *push_state_(State_info *, Lex_state *);
127 static Lex_state *pop_state_(State_info *, Lex_state *);
128
129 static int dopprompt(const char *, int, bool);
130
131 static int backslash_skip;
132 static int ignore_backslash_newline;
133
134 /* optimised getsc_bn() */
135 #define _getsc() (*source->str != '\0' && *source->str != '\\' \
136 && !backslash_skip && !(source->flags & SF_FIRST) \
137 ? *source->str++ : getsc_bn())
138 /* optimised getsc__() */
139 #define _getsc_() ((*source->str != '\0') && !(source->flags & SF_FIRST) \
140 ? *source->str++ : getsc__())
141
142 #ifdef MKSH_SMALL
143 static int getsc(void);
144 static int getsc_(void);
145
146 static int
getsc(void)147 getsc(void)
148 {
149 return (_getsc());
150 }
151
152 static int
getsc_(void)153 getsc_(void)
154 {
155 return (_getsc_());
156 }
157 #else
158 /* !MKSH_SMALL: use them inline */
159 #define getsc() _getsc()
160 #define getsc_() _getsc_()
161 #endif
162
163 #define STATE_BSIZE 32
164
165 #define PUSH_STATE(s) do { \
166 if (++statep == state_info.end) \
167 statep = push_state_(&state_info, statep); \
168 state = statep->ls_state = (s); \
169 } while (0)
170
171 #define POP_STATE() do { \
172 if (--statep == state_info.base) \
173 statep = pop_state_(&state_info, statep); \
174 state = statep->ls_state; \
175 } while (0)
176
177 /**
178 * Lexical analyser
179 *
180 * tokens are not regular expressions, they are LL(1).
181 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
182 * hence the state stack.
183 */
184
185 int
yylex(int cf)186 yylex(int cf)
187 {
188 Lex_state states[STATE_BSIZE], *statep, *s2, *base;
189 State_info state_info;
190 int c, c2, state;
191 XString ws; /* expandable output word */
192 char *wp; /* output word pointer */
193 char *sp, *dp;
194
195 Again:
196 states[0].ls_state = -1;
197 states[0].ls_info.base = NULL;
198 statep = &states[1];
199 state_info.base = states;
200 state_info.end = &state_info.base[STATE_BSIZE];
201
202 Xinit(ws, wp, 64, ATEMP);
203
204 backslash_skip = 0;
205 ignore_backslash_newline = 0;
206
207 if (cf&ONEWORD)
208 state = SWORD;
209 else if (cf&LETEXPR) {
210 /* enclose arguments in (double) quotes */
211 *wp++ = OQUOTE;
212 state = SLETPAREN;
213 statep->ls_sletparen.nparen = 0;
214 #ifndef MKSH_SMALL
215 } else if (cf&LETARRAY) {
216 state = SLETARRAY;
217 statep->ls_sletarray.nparen = 0;
218 #endif
219 } else { /* normal lexing */
220 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
221 while ((c = getsc()) == ' ' || c == '\t')
222 ;
223 if (c == '#') {
224 ignore_backslash_newline++;
225 while ((c = getsc()) != '\0' && c != '\n')
226 ;
227 ignore_backslash_newline--;
228 }
229 ungetsc(c);
230 }
231 if (source->flags & SF_ALIAS) { /* trailing ' ' in alias definition */
232 source->flags &= ~SF_ALIAS;
233 cf |= ALIAS;
234 }
235
236 /* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */
237 statep->ls_state = state;
238
239 /* check for here string */
240 if (state == SHEREDELIM) {
241 c = getsc();
242 if (c == '<') {
243 state = SHERESTRING;
244 while ((c = getsc()) == ' ' || c == '\t')
245 ;
246 ungetsc(c);
247 c = '<';
248 goto accept_nonword;
249 }
250 ungetsc(c);
251 }
252
253 /* collect non-special or quoted characters to form word */
254 while (!((c = getsc()) == 0 ||
255 ((state == SBASE || state == SHEREDELIM || state == SHERESTRING) &&
256 ctype(c, C_LEX1)))) {
257 accept_nonword:
258 Xcheck(ws, wp);
259 switch (state) {
260 case SADELIM:
261 if (c == '(')
262 statep->ls_sadelim.nparen++;
263 else if (c == ')')
264 statep->ls_sadelim.nparen--;
265 else if (statep->ls_sadelim.nparen == 0 &&
266 (c == /*{*/ '}' || c == statep->ls_sadelim.delimiter)) {
267 *wp++ = ADELIM;
268 *wp++ = c;
269 if (c == /*{*/ '}' || --statep->ls_sadelim.num == 0)
270 POP_STATE();
271 if (c == /*{*/ '}')
272 POP_STATE();
273 break;
274 }
275 /* FALLTHROUGH */
276 case SBASE:
277 if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
278 *wp = EOS; /* temporary */
279 if (is_wdvarname(Xstring(ws, wp), false)) {
280 char *p, *tmp;
281
282 if (arraysub(&tmp)) {
283 *wp++ = CHAR;
284 *wp++ = c;
285 for (p = tmp; *p; ) {
286 Xcheck(ws, wp);
287 *wp++ = CHAR;
288 *wp++ = *p++;
289 }
290 afree(tmp, ATEMP);
291 break;
292 } else {
293 Source *s;
294
295 s = pushs(SREREAD,
296 source->areap);
297 s->start = s->str =
298 s->u.freeme = tmp;
299 s->next = source;
300 source = s;
301 }
302 }
303 *wp++ = CHAR;
304 *wp++ = c;
305 break;
306 }
307 /* FALLTHROUGH */
308 Sbase1: /* includes *(...|...) pattern (*+?@!) */
309 if (c == '*' || c == '@' || c == '+' || c == '?' ||
310 c == '!') {
311 c2 = getsc();
312 if (c2 == '(' /*)*/ ) {
313 *wp++ = OPAT;
314 *wp++ = c;
315 PUSH_STATE(SPATTERN);
316 break;
317 }
318 ungetsc(c2);
319 }
320 /* FALLTHROUGH */
321 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */
322 switch (c) {
323 case '\\':
324 getsc_qchar:
325 if ((c = getsc())) {
326 /* trailing \ is lost */
327 *wp++ = QCHAR;
328 *wp++ = c;
329 }
330 break;
331 case '\'':
332 open_ssquote:
333 *wp++ = OQUOTE;
334 ignore_backslash_newline++;
335 PUSH_STATE(SSQUOTE);
336 break;
337 case '"':
338 open_sdquote:
339 *wp++ = OQUOTE;
340 PUSH_STATE(SDQUOTE);
341 break;
342 default:
343 goto Subst;
344 }
345 break;
346
347 Subst:
348 switch (c) {
349 case '\\':
350 c = getsc();
351 switch (c) {
352 case '"':
353 if ((cf & HEREDOC))
354 goto heredocquote;
355 /* FALLTHROUGH */
356 case '\\':
357 case '$': case '`':
358 store_qchar:
359 *wp++ = QCHAR;
360 *wp++ = c;
361 break;
362 default:
363 heredocquote:
364 Xcheck(ws, wp);
365 if (c) {
366 /* trailing \ is lost */
367 *wp++ = CHAR;
368 *wp++ = '\\';
369 *wp++ = CHAR;
370 *wp++ = c;
371 }
372 break;
373 }
374 break;
375 case '$':
376 subst_dollar:
377 c = getsc();
378 if (c == '(') /*)*/ {
379 c = getsc();
380 if (c == '(') /*)*/ {
381 PUSH_STATE(SASPAREN);
382 statep->ls_sasparen.nparen = 2;
383 statep->ls_sasparen.start =
384 Xsavepos(ws, wp);
385 *wp++ = EXPRSUB;
386 } else {
387 ungetsc(c);
388 PUSH_STATE(SCSPAREN);
389 statep->ls_scsparen.nparen = 1;
390 statep->ls_scsparen.csstate = 0;
391 *wp++ = COMSUB;
392 }
393 } else if (c == '{') /*}*/ {
394 *wp++ = OSUBST;
395 *wp++ = '{'; /*}*/
396 wp = get_brace_var(&ws, wp);
397 c = getsc();
398 /* allow :# and :% (ksh88 compat) */
399 if (c == ':') {
400 *wp++ = CHAR;
401 *wp++ = c;
402 c = getsc();
403 if (c == ':') {
404 *wp++ = CHAR;
405 *wp++ = '0';
406 *wp++ = ADELIM;
407 *wp++ = ':';
408 PUSH_STATE(SBRACE);
409 PUSH_STATE(SADELIM);
410 statep->ls_sadelim.style = SADELIM_BASH;
411 statep->ls_sadelim.delimiter = ':';
412 statep->ls_sadelim.num = 1;
413 statep->ls_sadelim.nparen = 0;
414 break;
415 } else if (ksh_isdigit(c) ||
416 c == '('/*)*/ || c == ' ' ||
417 c == '$' /* XXX what else? */) {
418 /* substring subst. */
419 if (c != ' ') {
420 *wp++ = CHAR;
421 *wp++ = ' ';
422 }
423 ungetsc(c);
424 PUSH_STATE(SBRACE);
425 PUSH_STATE(SADELIM);
426 statep->ls_sadelim.style = SADELIM_BASH;
427 statep->ls_sadelim.delimiter = ':';
428 statep->ls_sadelim.num = 2;
429 statep->ls_sadelim.nparen = 0;
430 break;
431 }
432 } else if (c == '/') {
433 *wp++ = CHAR;
434 *wp++ = c;
435 if ((c = getsc()) == '/') {
436 *wp++ = ADELIM;
437 *wp++ = c;
438 } else
439 ungetsc(c);
440 PUSH_STATE(SBRACE);
441 PUSH_STATE(SADELIM);
442 statep->ls_sadelim.style = SADELIM_BASH;
443 statep->ls_sadelim.delimiter = '/';
444 statep->ls_sadelim.num = 1;
445 statep->ls_sadelim.nparen = 0;
446 break;
447 }
448 /* If this is a trim operation,
449 * treat (,|,) specially in STBRACE.
450 */
451 if (ctype(c, C_SUBOP2)) {
452 ungetsc(c);
453 PUSH_STATE(STBRACE);
454 } else {
455 ungetsc(c);
456 if (state == SDQUOTE)
457 PUSH_STATE(SQBRACE);
458 else
459 PUSH_STATE(SBRACE);
460 }
461 } else if (ksh_isalphx(c)) {
462 *wp++ = OSUBST;
463 *wp++ = 'X';
464 do {
465 Xcheck(ws, wp);
466 *wp++ = c;
467 c = getsc();
468 } while (ksh_isalnux(c));
469 *wp++ = '\0';
470 *wp++ = CSUBST;
471 *wp++ = 'X';
472 ungetsc(c);
473 } else if (ctype(c, C_VAR1 | C_DIGIT)) {
474 Xcheck(ws, wp);
475 *wp++ = OSUBST;
476 *wp++ = 'X';
477 *wp++ = c;
478 *wp++ = '\0';
479 *wp++ = CSUBST;
480 *wp++ = 'X';
481 } else if (c == '\'' && (state == SBASE)) {
482 /* XXX which other states are valid? */
483 *wp++ = OQUOTE;
484 ignore_backslash_newline++;
485 PUSH_STATE(SEQUOTE);
486 statep->ls_sequote.got_NUL = false;
487 break;
488 } else {
489 *wp++ = CHAR;
490 *wp++ = '$';
491 ungetsc(c);
492 }
493 break;
494 case '`':
495 subst_gravis:
496 PUSH_STATE(SBQUOTE);
497 *wp++ = COMSUB;
498 /* Need to know if we are inside double quotes
499 * since sh/AT&T-ksh translate the \" to " in
500 * "`...\"...`".
501 * This is not done in POSIX mode (section
502 * 3.2.3, Double Quotes: "The backquote shall
503 * retain its special meaning introducing the
504 * other form of command substitution (see
505 * 3.6.3). The portion of the quoted string
506 * from the initial backquote and the
507 * characters up to the next backquote that
508 * is not preceded by a backslash (having
509 * escape characters removed) defines that
510 * command whose output replaces `...` when
511 * the word is expanded."
512 * Section 3.6.3, Command Substitution:
513 * "Within the backquoted style of command
514 * substitution, backslash shall retain its
515 * literal meaning, except when followed by
516 * $ ` \.").
517 */
518 statep->ls_sbquote.indquotes = 0;
519 s2 = statep;
520 base = state_info.base;
521 while (1) {
522 for (; s2 != base; s2--) {
523 if (s2->ls_state == SDQUOTE) {
524 statep->ls_sbquote.indquotes = 1;
525 break;
526 }
527 }
528 if (s2 != base)
529 break;
530 if (!(s2 = s2->ls_info.base))
531 break;
532 base = s2-- - STATE_BSIZE;
533 }
534 break;
535 case QCHAR:
536 if (cf & LQCHAR) {
537 *wp++ = QCHAR;
538 *wp++ = getsc();
539 break;
540 }
541 /* FALLTHROUGH */
542 default:
543 store_char:
544 *wp++ = CHAR;
545 *wp++ = c;
546 }
547 break;
548
549 case SEQUOTE:
550 if (c == '\'') {
551 POP_STATE();
552 *wp++ = CQUOTE;
553 ignore_backslash_newline--;
554 } else if (c == '\\') {
555 if ((c2 = unbksl(true, s_get, s_put)) == -1)
556 c2 = s_get();
557 if (c2 == 0)
558 statep->ls_sequote.got_NUL = true;
559 if (!statep->ls_sequote.got_NUL) {
560 char ts[4];
561
562 if ((unsigned int)c2 < 0x100) {
563 *wp++ = QCHAR;
564 *wp++ = c2;
565 } else {
566 c = utf_wctomb(ts, c2 - 0x100);
567 ts[c] = 0;
568 for (c = 0; ts[c]; ++c) {
569 *wp++ = QCHAR;
570 *wp++ = ts[c];
571 }
572 }
573 }
574 } else if (!statep->ls_sequote.got_NUL) {
575 *wp++ = QCHAR;
576 *wp++ = c;
577 }
578 break;
579
580 case SSQUOTE:
581 if (c == '\'') {
582 POP_STATE();
583 *wp++ = CQUOTE;
584 ignore_backslash_newline--;
585 } else {
586 *wp++ = QCHAR;
587 *wp++ = c;
588 }
589 break;
590
591 case SDQUOTE:
592 if (c == '"') {
593 POP_STATE();
594 *wp++ = CQUOTE;
595 } else
596 goto Subst;
597 break;
598
599 case SCSPAREN: /* $( ... ) */
600 /* todo: deal with $(...) quoting properly
601 * kludge to partly fake quoting inside $(...): doesn't
602 * really work because nested $(...) or ${...} inside
603 * double quotes aren't dealt with.
604 */
605 switch (statep->ls_scsparen.csstate) {
606 case 0: /* normal */
607 switch (c) {
608 case '(':
609 statep->ls_scsparen.nparen++;
610 break;
611 case ')':
612 statep->ls_scsparen.nparen--;
613 break;
614 case '\\':
615 statep->ls_scsparen.csstate = 1;
616 break;
617 case '"':
618 statep->ls_scsparen.csstate = 2;
619 break;
620 case '\'':
621 statep->ls_scsparen.csstate = 4;
622 ignore_backslash_newline++;
623 break;
624 }
625 break;
626
627 case 1: /* backslash in normal mode */
628 case 3: /* backslash in double quotes */
629 --statep->ls_scsparen.csstate;
630 break;
631
632 case 2: /* double quotes */
633 if (c == '"')
634 statep->ls_scsparen.csstate = 0;
635 else if (c == '\\')
636 statep->ls_scsparen.csstate = 3;
637 break;
638
639 case 4: /* single quotes */
640 if (c == '\'') {
641 statep->ls_scsparen.csstate = 0;
642 ignore_backslash_newline--;
643 }
644 break;
645 }
646 if (statep->ls_scsparen.nparen == 0) {
647 POP_STATE();
648 *wp++ = 0; /* end of COMSUB */
649 } else
650 *wp++ = c;
651 break;
652
653 case SASPAREN: /* $(( ... )) */
654 /* XXX should nest using existing state machine
655 * (embed "...", $(...), etc.) */
656 if (c == '(')
657 statep->ls_sasparen.nparen++;
658 else if (c == ')') {
659 statep->ls_sasparen.nparen--;
660 if (statep->ls_sasparen.nparen == 1) {
661 /*(*/
662 if ((c2 = getsc()) == ')') {
663 POP_STATE();
664 /* end of EXPRSUB */
665 *wp++ = 0;
666 break;
667 } else {
668 char *s;
669
670 ungetsc(c2);
671 /* mismatched parenthesis -
672 * assume we were really
673 * parsing a $(...) expression
674 */
675 s = Xrestpos(ws, wp,
676 statep->ls_sasparen.start);
677 memmove(s + 1, s, wp - s);
678 *s++ = COMSUB;
679 *s = '('; /*)*/
680 wp++;
681 statep->ls_scsparen.nparen = 1;
682 statep->ls_scsparen.csstate = 0;
683 state = statep->ls_state =
684 SCSPAREN;
685 }
686 }
687 }
688 *wp++ = c;
689 break;
690
691 case SQBRACE:
692 if (c == '\\') {
693 /*
694 * perform POSIX "quote removal" if the back-
695 * slash is "special", i.e. same cases as the
696 * {case '\\':} in Subst: plus closing brace;
697 * in mksh code "quote removal" on '\c' means
698 * write QCHAR+c, otherwise CHAR+\+CHAR+c are
699 * emitted (in heredocquote:)
700 */
701 if ((c = getsc()) == '"' || c == '\\' ||
702 c == '$' || c == '`' || c == /*{*/'}')
703 goto store_qchar;
704 goto heredocquote;
705 }
706 goto common_SQBRACE;
707
708 case SBRACE:
709 if (c == '\'')
710 goto open_ssquote;
711 else if (c == '\\')
712 goto getsc_qchar;
713 common_SQBRACE:
714 if (c == '"')
715 goto open_sdquote;
716 else if (c == '$')
717 goto subst_dollar;
718 else if (c == '`')
719 goto subst_gravis;
720 else if (c != /*{*/ '}')
721 goto store_char;
722 POP_STATE();
723 *wp++ = CSUBST;
724 *wp++ = /*{*/ '}';
725 break;
726
727 case STBRACE:
728 /* Same as SBASE, except (,|,) treated specially */
729 if (c == /*{*/ '}') {
730 POP_STATE();
731 *wp++ = CSUBST;
732 *wp++ = /*{*/ '}';
733 } else if (c == '|') {
734 *wp++ = SPAT;
735 } else if (c == '(') {
736 *wp++ = OPAT;
737 *wp++ = ' '; /* simile for @ */
738 PUSH_STATE(SPATTERN);
739 } else
740 goto Sbase1;
741 break;
742
743 case SBQUOTE:
744 if (c == '`') {
745 *wp++ = 0;
746 POP_STATE();
747 } else if (c == '\\') {
748 switch (c = getsc()) {
749 case '\\':
750 case '$': case '`':
751 *wp++ = c;
752 break;
753 case '"':
754 if (statep->ls_sbquote.indquotes) {
755 *wp++ = c;
756 break;
757 }
758 /* FALLTHROUGH */
759 default:
760 if (c) {
761 /* trailing \ is lost */
762 *wp++ = '\\';
763 *wp++ = c;
764 }
765 break;
766 }
767 } else
768 *wp++ = c;
769 break;
770
771 case SWORD: /* ONEWORD */
772 goto Subst;
773
774 case SLETPAREN: /* LETEXPR: (( ... )) */
775 /*(*/
776 if (c == ')') {
777 if (statep->ls_sletparen.nparen > 0)
778 --statep->ls_sletparen.nparen;
779 else if ((c2 = getsc()) == /*(*/ ')') {
780 c = 0;
781 *wp++ = CQUOTE;
782 goto Done;
783 } else {
784 Source *s;
785
786 ungetsc(c2);
787 /* mismatched parenthesis -
788 * assume we were really
789 * parsing a $(...) expression
790 */
791 *wp = EOS;
792 sp = Xstring(ws, wp);
793 dp = wdstrip(sp, true, false);
794 s = pushs(SREREAD, source->areap);
795 s->start = s->str = s->u.freeme = dp;
796 s->next = source;
797 source = s;
798 return ('('/*)*/);
799 }
800 } else if (c == '(')
801 /* parenthesis inside quotes and backslashes
802 * are lost, but AT&T ksh doesn't count them
803 * either
804 */
805 ++statep->ls_sletparen.nparen;
806 goto Sbase2;
807
808 #ifndef MKSH_SMALL
809 case SLETARRAY: /* LETARRAY: =( ... ) */
810 if (c == '('/*)*/)
811 ++statep->ls_sletarray.nparen;
812 else if (c == /*(*/')')
813 if (statep->ls_sletarray.nparen-- == 0) {
814 c = 0;
815 goto Done;
816 }
817 *wp++ = CHAR;
818 *wp++ = c;
819 break;
820 #endif
821
822 case SHERESTRING: /* <<< delimiter */
823 if (c == '\\') {
824 c = getsc();
825 if (c) {
826 /* trailing \ is lost */
827 *wp++ = QCHAR;
828 *wp++ = c;
829 }
830 /* invoke quoting mode */
831 Xstring(ws, wp)[0] = QCHAR;
832 } else if (c == '$') {
833 if ((c2 = getsc()) == '\'') {
834 PUSH_STATE(SEQUOTE);
835 statep->ls_sequote.got_NUL = false;
836 goto sherestring_quoted;
837 }
838 ungetsc(c2);
839 goto sherestring_regular;
840 } else if (c == '\'') {
841 PUSH_STATE(SSQUOTE);
842 sherestring_quoted:
843 *wp++ = OQUOTE;
844 ignore_backslash_newline++;
845 /* invoke quoting mode */
846 Xstring(ws, wp)[0] = QCHAR;
847 } else if (c == '"') {
848 state = statep->ls_state = SHEREDQUOTE;
849 *wp++ = OQUOTE;
850 /* just don't IFS split; no quoting mode */
851 } else {
852 sherestring_regular:
853 *wp++ = CHAR;
854 *wp++ = c;
855 }
856 break;
857
858 case SHEREDELIM: /* <<,<<- delimiter */
859 /* XXX chuck this state (and the next) - use
860 * the existing states ($ and \`...` should be
861 * stripped of their specialness after the
862 * fact).
863 */
864 /* here delimiters need a special case since
865 * $ and `...` are not to be treated specially
866 */
867 if (c == '\\') {
868 c = getsc();
869 if (c) {
870 /* trailing \ is lost */
871 *wp++ = QCHAR;
872 *wp++ = c;
873 }
874 } else if (c == '$') {
875 if ((c2 = getsc()) == '\'') {
876 PUSH_STATE(SEQUOTE);
877 statep->ls_sequote.got_NUL = false;
878 goto sheredelim_quoted;
879 }
880 ungetsc(c2);
881 goto sheredelim_regular;
882 } else if (c == '\'') {
883 PUSH_STATE(SSQUOTE);
884 sheredelim_quoted:
885 *wp++ = OQUOTE;
886 ignore_backslash_newline++;
887 } else if (c == '"') {
888 state = statep->ls_state = SHEREDQUOTE;
889 *wp++ = OQUOTE;
890 } else {
891 sheredelim_regular:
892 *wp++ = CHAR;
893 *wp++ = c;
894 }
895 break;
896
897 case SHEREDQUOTE: /* " in <<,<<- delimiter */
898 if (c == '"') {
899 *wp++ = CQUOTE;
900 state = statep->ls_state =
901 /* dp[1] == '<' means here string */
902 Xstring(ws, wp)[1] == '<' ?
903 SHERESTRING : SHEREDELIM;
904 } else {
905 if (c == '\\') {
906 switch (c = getsc()) {
907 case '\\': case '"':
908 case '$': case '`':
909 break;
910 default:
911 if (c) {
912 /* trailing \ lost */
913 *wp++ = CHAR;
914 *wp++ = '\\';
915 }
916 break;
917 }
918 }
919 *wp++ = CHAR;
920 *wp++ = c;
921 }
922 break;
923
924 case SPATTERN: /* in *(...|...) pattern (*+?@!) */
925 if ( /*(*/ c == ')') {
926 *wp++ = CPAT;
927 POP_STATE();
928 } else if (c == '|') {
929 *wp++ = SPAT;
930 } else if (c == '(') {
931 *wp++ = OPAT;
932 *wp++ = ' '; /* simile for @ */
933 PUSH_STATE(SPATTERN);
934 } else
935 goto Sbase1;
936 break;
937 }
938 }
939 Done:
940 Xcheck(ws, wp);
941 if (statep != &states[1])
942 /* XXX figure out what is missing */
943 yyerror("no closing quote\n");
944
945 #ifndef MKSH_SMALL
946 if (state == SLETARRAY && statep->ls_sletarray.nparen != -1)
947 yyerror("%s: ')' missing\n", T_synerr);
948 #endif
949
950 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */
951 if (state == SHEREDELIM || state == SHERESTRING)
952 state = SBASE;
953
954 dp = Xstring(ws, wp);
955 if ((c == '<' || c == '>' || c == '&') && state == SBASE) {
956 struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
957
958 if (Xlength(ws, wp) == 0)
959 iop->unit = c == '<' ? 0 : 1;
960 else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) {
961 if (dp[c2] != CHAR)
962 goto no_iop;
963 if (!ksh_isdigit(dp[c2 + 1]))
964 goto no_iop;
965 iop->unit = (iop->unit * 10) + dp[c2 + 1] - '0';
966 }
967
968 if (iop->unit >= FDBASE)
969 goto no_iop;
970
971 if (c == '&') {
972 if ((c2 = getsc()) != '>') {
973 ungetsc(c2);
974 goto no_iop;
975 }
976 c = c2;
977 iop->flag = IOBASH;
978 } else
979 iop->flag = 0;
980
981 c2 = getsc();
982 /* <<, >>, <> are ok, >< is not */
983 if (c == c2 || (c == '<' && c2 == '>')) {
984 iop->flag |= c == c2 ?
985 (c == '>' ? IOCAT : IOHERE) : IORDWR;
986 if (iop->flag == IOHERE) {
987 if ((c2 = getsc()) == '-')
988 iop->flag |= IOSKIP;
989 else
990 ungetsc(c2);
991 }
992 } else if (c2 == '&')
993 iop->flag |= IODUP | (c == '<' ? IORDUP : 0);
994 else {
995 iop->flag |= c == '>' ? IOWRITE : IOREAD;
996 if (c == '>' && c2 == '|')
997 iop->flag |= IOCLOB;
998 else
999 ungetsc(c2);
1000 }
1001
1002 iop->name = NULL;
1003 iop->delim = NULL;
1004 iop->heredoc = NULL;
1005 Xfree(ws, wp); /* free word */
1006 yylval.iop = iop;
1007 return (REDIR);
1008 no_iop:
1009 ;
1010 }
1011
1012 if (wp == dp && state == SBASE) {
1013 Xfree(ws, wp); /* free word */
1014 /* no word, process LEX1 character */
1015 if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) {
1016 if ((c2 = getsc()) == c)
1017 c = (c == ';') ? BREAK :
1018 (c == '|') ? LOGOR :
1019 (c == '&') ? LOGAND :
1020 /* c == '(' ) */ MDPAREN;
1021 else if (c == '|' && c2 == '&')
1022 c = COPROC;
1023 else
1024 ungetsc(c2);
1025 } else if (c == '\n') {
1026 gethere(false);
1027 if (cf & CONTIN)
1028 goto Again;
1029 } else if (c == '\0')
1030 /* need here strings at EOF */
1031 gethere(true);
1032 return (c);
1033 }
1034
1035 *wp++ = EOS; /* terminate word */
1036 yylval.cp = Xclose(ws, wp);
1037 if (state == SWORD || state == SLETPAREN
1038 /* XXX ONEWORD? */
1039 #ifndef MKSH_SMALL
1040 || state == SLETARRAY
1041 #endif
1042 )
1043 return (LWORD);
1044
1045 /* unget terminator */
1046 ungetsc(c);
1047
1048 /*
1049 * note: the alias-vs-function code below depends on several
1050 * interna: starting from here, source->str is not modified;
1051 * the way getsc() and ungetsc() operate; etc.
1052 */
1053
1054 /* copy word to unprefixed string ident */
1055 sp = yylval.cp;
1056 dp = ident;
1057 if ((cf & HEREDELIM) && (sp[1] == '<'))
1058 while (dp < ident+IDENT) {
1059 if ((c = *sp++) == CHAR)
1060 *dp++ = *sp++;
1061 else if ((c != OQUOTE) && (c != CQUOTE))
1062 break;
1063 }
1064 else
1065 while (dp < ident+IDENT && (c = *sp++) == CHAR)
1066 *dp++ = *sp++;
1067 /* Make sure the ident array stays '\0' padded */
1068 memset(dp, 0, (ident+IDENT) - dp + 1);
1069 if (c != EOS)
1070 *ident = '\0'; /* word is not unquoted */
1071
1072 if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) {
1073 struct tbl *p;
1074 uint32_t h = hash(ident);
1075
1076 /* { */
1077 if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
1078 (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}')) {
1079 afree(yylval.cp, ATEMP);
1080 return (p->val.i);
1081 }
1082 if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
1083 (p->flag & ISSET)) {
1084 /*
1085 * this still points to the same character as the
1086 * ungetsc'd terminator from above
1087 */
1088 const char *cp = source->str;
1089
1090 /* prefer POSIX but not Korn functions over aliases */
1091 while (*cp == ' ' || *cp == '\t')
1092 /*
1093 * this is like getsc() without skipping
1094 * over Source boundaries (including not
1095 * parsing ungetsc'd characters that got
1096 * pushed into an SREREAD) which is what
1097 * we want here anyway: find out whether
1098 * the alias name is followed by a POSIX
1099 * function definition (only the opening
1100 * parenthesis is checked though)
1101 */
1102 ++cp;
1103 /* prefer functions over aliases */
1104 if (*cp == '(' /*)*/)
1105 /*
1106 * delete alias upon encountering function
1107 * definition
1108 */
1109 ktdelete(p);
1110 else {
1111 Source *s = source;
1112
1113 while (s && (s->flags & SF_HASALIAS))
1114 if (s->u.tblp == p)
1115 return (LWORD);
1116 else
1117 s = s->next;
1118 /* push alias expansion */
1119 s = pushs(SALIAS, source->areap);
1120 s->start = s->str = p->val.s;
1121 s->u.tblp = p;
1122 s->flags |= SF_HASALIAS;
1123 s->next = source;
1124 if (source->type == SEOF) {
1125 /* prevent infinite recursion at EOS */
1126 source->u.tblp = p;
1127 source->flags |= SF_HASALIAS;
1128 }
1129 source = s;
1130 afree(yylval.cp, ATEMP);
1131 goto Again;
1132 }
1133 }
1134 }
1135
1136 return (LWORD);
1137 }
1138
1139 static void
gethere(bool iseof)1140 gethere(bool iseof)
1141 {
1142 struct ioword **p;
1143
1144 for (p = heres; p < herep; p++)
1145 if (iseof && (*p)->delim[1] != '<')
1146 /* only here strings at EOF */
1147 return;
1148 else
1149 readhere(*p);
1150 herep = heres;
1151 }
1152
1153 /*
1154 * read "<<word" text into temp file
1155 */
1156
1157 static void
readhere(struct ioword * iop)1158 readhere(struct ioword *iop)
1159 {
1160 int c;
1161 char *volatile eof;
1162 char *eofp;
1163 int skiptabs;
1164 XString xs;
1165 char *xp;
1166 int xpos;
1167
1168 if (iop->delim[1] == '<') {
1169 /* process the here string */
1170 xp = iop->heredoc = evalstr(iop->delim, DOBLANK);
1171 c = strlen(xp) - 1;
1172 memmove(xp, xp + 1, c);
1173 xp[c] = '\n';
1174 return;
1175 }
1176
1177 eof = evalstr(iop->delim, 0);
1178
1179 if (!(iop->flag & IOEVAL))
1180 ignore_backslash_newline++;
1181
1182 Xinit(xs, xp, 256, ATEMP);
1183
1184 for (;;) {
1185 eofp = eof;
1186 skiptabs = iop->flag & IOSKIP;
1187 xpos = Xsavepos(xs, xp);
1188 while ((c = getsc()) != 0) {
1189 if (skiptabs) {
1190 if (c == '\t')
1191 continue;
1192 skiptabs = 0;
1193 }
1194 if (c != *eofp)
1195 break;
1196 Xcheck(xs, xp);
1197 Xput(xs, xp, c);
1198 eofp++;
1199 }
1200 /* Allow EOF here so commands with out trailing newlines
1201 * will work (eg, ksh -c '...', $(...), etc).
1202 */
1203 if (*eofp == '\0' && (c == 0 || c == '\n')) {
1204 xp = Xrestpos(xs, xp, xpos);
1205 break;
1206 }
1207 ungetsc(c);
1208 while ((c = getsc()) != '\n') {
1209 if (c == 0)
1210 yyerror("here document '%s' unclosed\n", eof);
1211 Xcheck(xs, xp);
1212 Xput(xs, xp, c);
1213 }
1214 Xcheck(xs, xp);
1215 Xput(xs, xp, c);
1216 }
1217 Xput(xs, xp, '\0');
1218 iop->heredoc = Xclose(xs, xp);
1219
1220 if (!(iop->flag & IOEVAL))
1221 ignore_backslash_newline--;
1222 }
1223
1224 void
yyerror(const char * fmt,...)1225 yyerror(const char *fmt, ...)
1226 {
1227 va_list va;
1228
1229 /* pop aliases and re-reads */
1230 while (source->type == SALIAS || source->type == SREREAD)
1231 source = source->next;
1232 source->str = null; /* zap pending input */
1233
1234 error_prefix(true);
1235 va_start(va, fmt);
1236 shf_vfprintf(shl_out, fmt, va);
1237 va_end(va);
1238 errorfz();
1239 }
1240
1241 /*
1242 * input for yylex with alias expansion
1243 */
1244
1245 Source *
pushs(int type,Area * areap)1246 pushs(int type, Area *areap)
1247 {
1248 Source *s;
1249
1250 s = alloc(sizeof(Source), areap);
1251 memset(s, 0, sizeof(Source));
1252 s->type = type;
1253 s->str = null;
1254 s->areap = areap;
1255 if (type == SFILE || type == SSTDIN)
1256 XinitN(s->xs, 256, s->areap);
1257 return (s);
1258 }
1259
1260 static int
getsc__(void)1261 getsc__(void)
1262 {
1263 Source *s = source;
1264 int c;
1265
1266 getsc_again:
1267 while ((c = *s->str++) == 0) {
1268 s->str = NULL; /* return 0 for EOF by default */
1269 switch (s->type) {
1270 case SEOF:
1271 s->str = null;
1272 return (0);
1273
1274 case SSTDIN:
1275 case SFILE:
1276 getsc_line(s);
1277 break;
1278
1279 case SWSTR:
1280 break;
1281
1282 case SSTRING:
1283 break;
1284
1285 case SWORDS:
1286 s->start = s->str = *s->u.strv++;
1287 s->type = SWORDSEP;
1288 break;
1289
1290 case SWORDSEP:
1291 if (*s->u.strv == NULL) {
1292 s->start = s->str = "\n";
1293 s->type = SEOF;
1294 } else {
1295 s->start = s->str = " ";
1296 s->type = SWORDS;
1297 }
1298 break;
1299
1300 case SALIAS:
1301 if (s->flags & SF_ALIASEND) {
1302 /* pass on an unused SF_ALIAS flag */
1303 source = s->next;
1304 source->flags |= s->flags & SF_ALIAS;
1305 s = source;
1306 } else if (*s->u.tblp->val.s &&
1307 (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) {
1308 source = s = s->next; /* pop source stack */
1309 /* Note that this alias ended with a space,
1310 * enabling alias expansion on the following
1311 * word.
1312 */
1313 s->flags |= SF_ALIAS;
1314 } else {
1315 /* At this point, we need to keep the current
1316 * alias in the source list so recursive
1317 * aliases can be detected and we also need
1318 * to return the next character. Do this
1319 * by temporarily popping the alias to get
1320 * the next character and then put it back
1321 * in the source list with the SF_ALIASEND
1322 * flag set.
1323 */
1324 source = s->next; /* pop source stack */
1325 source->flags |= s->flags & SF_ALIAS;
1326 c = getsc__();
1327 if (c) {
1328 s->flags |= SF_ALIASEND;
1329 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1330 s->start = s->str = s->ugbuf;
1331 s->next = source;
1332 source = s;
1333 } else {
1334 s = source;
1335 /* avoid reading eof twice */
1336 s->str = NULL;
1337 break;
1338 }
1339 }
1340 continue;
1341
1342 case SREREAD:
1343 if (s->start != s->ugbuf) /* yuck */
1344 afree(s->u.freeme, ATEMP);
1345 source = s = s->next;
1346 continue;
1347 }
1348 if (s->str == NULL) {
1349 s->type = SEOF;
1350 s->start = s->str = null;
1351 return ('\0');
1352 }
1353 if (s->flags & SF_ECHO) {
1354 shf_puts(s->str, shl_out);
1355 shf_flush(shl_out);
1356 }
1357 }
1358 /* check for UTF-8 byte order mark */
1359 if (s->flags & SF_FIRST) {
1360 s->flags &= ~SF_FIRST;
1361 if (((unsigned char)c == 0xEF) &&
1362 (((const unsigned char *)(s->str))[0] == 0xBB) &&
1363 (((const unsigned char *)(s->str))[1] == 0xBF)) {
1364 s->str += 2;
1365 UTFMODE = 1;
1366 goto getsc_again;
1367 }
1368 }
1369 return (c);
1370 }
1371
1372 static void
getsc_line(Source * s)1373 getsc_line(Source *s)
1374 {
1375 char *xp = Xstring(s->xs, xp), *cp;
1376 bool interactive = Flag(FTALKING) && s->type == SSTDIN;
1377 int have_tty = interactive && (s->flags & SF_TTY);
1378
1379 /* Done here to ensure nothing odd happens when a timeout occurs */
1380 XcheckN(s->xs, xp, LINE);
1381 *xp = '\0';
1382 s->start = s->str = xp;
1383
1384 if (have_tty && ksh_tmout) {
1385 ksh_tmout_state = TMOUT_READING;
1386 alarm(ksh_tmout);
1387 }
1388 if (interactive)
1389 change_winsz();
1390 if (have_tty && (
1391 #if !MKSH_S_NOVI
1392 Flag(FVI) ||
1393 #endif
1394 Flag(FEMACS) || Flag(FGMACS))) {
1395 int nread;
1396
1397 nread = x_read(xp, LINE);
1398 if (nread < 0) /* read error */
1399 nread = 0;
1400 xp[nread] = '\0';
1401 xp += nread;
1402 } else {
1403 if (interactive)
1404 pprompt(prompt, 0);
1405 else
1406 s->line++;
1407
1408 while (1) {
1409 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1410
1411 if (!p && shf_error(s->u.shf) &&
1412 shf_errno(s->u.shf) == EINTR) {
1413 shf_clearerr(s->u.shf);
1414 if (trap)
1415 runtraps(0);
1416 continue;
1417 }
1418 if (!p || (xp = p, xp[-1] == '\n'))
1419 break;
1420 /* double buffer size */
1421 xp++; /* move past NUL so doubling works... */
1422 XcheckN(s->xs, xp, Xlength(s->xs, xp));
1423 xp--; /* ...and move back again */
1424 }
1425 /* flush any unwanted input so other programs/builtins
1426 * can read it. Not very optimal, but less error prone
1427 * than flushing else where, dealing with redirections,
1428 * etc.
1429 * todo: reduce size of shf buffer (~128?) if SSTDIN
1430 */
1431 if (s->type == SSTDIN)
1432 shf_flush(s->u.shf);
1433 }
1434 /* XXX: temporary kludge to restore source after a
1435 * trap may have been executed.
1436 */
1437 source = s;
1438 if (have_tty && ksh_tmout) {
1439 ksh_tmout_state = TMOUT_EXECUTING;
1440 alarm(0);
1441 }
1442 cp = Xstring(s->xs, xp);
1443 #ifndef MKSH_SMALL
1444 if (interactive && *cp == '!' && cur_prompt == PS1) {
1445 int linelen;
1446
1447 linelen = Xlength(s->xs, xp);
1448 XcheckN(s->xs, xp, fc_e_n + /* NUL */ 1);
1449 /* reload after potential realloc */
1450 cp = Xstring(s->xs, xp);
1451 /* change initial '!' into space */
1452 *cp = ' ';
1453 /* NUL terminate the current string */
1454 *xp = '\0';
1455 /* move the actual string forward */
1456 memmove(cp + fc_e_n, cp, linelen + /* NUL */ 1);
1457 xp += fc_e_n;
1458 /* prepend it with "fc -e -" */
1459 memcpy(cp, fc_e_, fc_e_n);
1460 }
1461 #endif
1462 s->start = s->str = cp;
1463 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1464 /* Note: if input is all nulls, this is not eof */
1465 if (Xlength(s->xs, xp) == 0) {
1466 /* EOF */
1467 if (s->type == SFILE)
1468 shf_fdclose(s->u.shf);
1469 s->str = NULL;
1470 } else if (interactive && *s->str &&
1471 (cur_prompt != PS1 || !ctype(*s->str, C_IFS | C_IFSWS))) {
1472 histsave(&s->line, s->str, true, true);
1473 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
1474 } else if (interactive && cur_prompt == PS1) {
1475 cp = Xstring(s->xs, xp);
1476 while (*cp && ctype(*cp, C_IFSWS))
1477 ++cp;
1478 if (!*cp)
1479 histsync();
1480 #endif
1481 }
1482 if (interactive)
1483 set_prompt(PS2, NULL);
1484 }
1485
1486 void
set_prompt(int to,Source * s)1487 set_prompt(int to, Source *s)
1488 {
1489 cur_prompt = to;
1490
1491 switch (to) {
1492 case PS1: /* command */
1493 /* Substitute ! and !! here, before substitutions are done
1494 * so ! in expanded variables are not expanded.
1495 * NOTE: this is not what AT&T ksh does (it does it after
1496 * substitutions, POSIX doesn't say which is to be done.
1497 */
1498 {
1499 struct shf *shf;
1500 char * volatile ps1;
1501 Area *saved_atemp;
1502
1503 ps1 = str_val(global("PS1"));
1504 shf = shf_sopen(NULL, strlen(ps1) * 2,
1505 SHF_WR | SHF_DYNAMIC, NULL);
1506 while (*ps1)
1507 if (*ps1 != '!' || *++ps1 == '!')
1508 shf_putchar(*ps1++, shf);
1509 else
1510 shf_fprintf(shf, "%d",
1511 s ? s->line + 1 : 0);
1512 ps1 = shf_sclose(shf);
1513 saved_atemp = ATEMP;
1514 newenv(E_ERRH);
1515 if (sigsetjmp(e->jbuf, 0)) {
1516 prompt = safe_prompt;
1517 /* Don't print an error - assume it has already
1518 * been printed. Reason is we may have forked
1519 * to run a command and the child may be
1520 * unwinding its stack through this code as it
1521 * exits.
1522 */
1523 } else {
1524 char *cp = substitute(ps1, 0);
1525 strdupx(prompt, cp, saved_atemp);
1526 }
1527 quitenv(NULL);
1528 }
1529 break;
1530 case PS2: /* command continuation */
1531 prompt = str_val(global("PS2"));
1532 break;
1533 }
1534 }
1535
1536 static int
dopprompt(const char * cp,int ntruncate,bool doprint)1537 dopprompt(const char *cp, int ntruncate, bool doprint)
1538 {
1539 int columns = 0, lines = 0, indelimit = 0;
1540 char delimiter = 0;
1541
1542 /* Undocumented AT&T ksh feature:
1543 * If the second char in the prompt string is \r then the first char
1544 * is taken to be a non-printing delimiter and any chars between two
1545 * instances of the delimiter are not considered to be part of the
1546 * prompt length
1547 */
1548 if (*cp && cp[1] == '\r') {
1549 delimiter = *cp;
1550 cp += 2;
1551 }
1552 for (; *cp; cp++) {
1553 if (indelimit && *cp != delimiter)
1554 ;
1555 else if (*cp == '\n' || *cp == '\r') {
1556 lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
1557 columns = 0;
1558 } else if (*cp == '\t') {
1559 columns = (columns | 7) + 1;
1560 } else if (*cp == '\b') {
1561 if (columns > 0)
1562 columns--;
1563 } else if (*cp == delimiter)
1564 indelimit = !indelimit;
1565 else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
1566 const char *cp2;
1567 columns += utf_widthadj(cp, &cp2);
1568 if (doprint && (indelimit ||
1569 (ntruncate < (x_cols * lines + columns))))
1570 shf_write(cp, cp2 - cp, shl_out);
1571 cp = cp2 - /* loop increment */ 1;
1572 continue;
1573 } else
1574 columns++;
1575 if (doprint && (*cp != delimiter) &&
1576 (indelimit || (ntruncate < (x_cols * lines + columns))))
1577 shf_putc(*cp, shl_out);
1578 }
1579 if (doprint)
1580 shf_flush(shl_out);
1581 return (x_cols * lines + columns);
1582 }
1583
1584
1585 void
pprompt(const char * cp,int ntruncate)1586 pprompt(const char *cp, int ntruncate)
1587 {
1588 dopprompt(cp, ntruncate, true);
1589 }
1590
1591 int
promptlen(const char * cp)1592 promptlen(const char *cp)
1593 {
1594 return (dopprompt(cp, 0, false));
1595 }
1596
1597 /* Read the variable part of a ${...} expression (ie, up to but not including
1598 * the :[-+?=#%] or close-brace.
1599 */
1600 static char *
get_brace_var(XString * wsp,char * wp)1601 get_brace_var(XString *wsp, char *wp)
1602 {
1603 enum parse_state {
1604 PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1605 PS_NUMBER, PS_VAR1
1606 } state;
1607 char c;
1608
1609 state = PS_INITIAL;
1610 while (1) {
1611 c = getsc();
1612 /* State machine to figure out where the variable part ends. */
1613 switch (state) {
1614 case PS_INITIAL:
1615 if (c == '#' || c == '!' || c == '%') {
1616 state = PS_SAW_HASH;
1617 break;
1618 }
1619 /* FALLTHROUGH */
1620 case PS_SAW_HASH:
1621 if (ksh_isalphx(c))
1622 state = PS_IDENT;
1623 else if (ksh_isdigit(c))
1624 state = PS_NUMBER;
1625 else if (ctype(c, C_VAR1))
1626 state = PS_VAR1;
1627 else
1628 goto out;
1629 break;
1630 case PS_IDENT:
1631 if (!ksh_isalnux(c)) {
1632 if (c == '[') {
1633 char *tmp, *p;
1634
1635 if (!arraysub(&tmp))
1636 yyerror("missing ]\n");
1637 *wp++ = c;
1638 for (p = tmp; *p; ) {
1639 Xcheck(*wsp, wp);
1640 *wp++ = *p++;
1641 }
1642 afree(tmp, ATEMP);
1643 c = getsc(); /* the ] */
1644 }
1645 goto out;
1646 }
1647 break;
1648 case PS_NUMBER:
1649 if (!ksh_isdigit(c))
1650 goto out;
1651 break;
1652 case PS_VAR1:
1653 goto out;
1654 }
1655 Xcheck(*wsp, wp);
1656 *wp++ = c;
1657 }
1658 out:
1659 *wp++ = '\0'; /* end of variable part */
1660 ungetsc(c);
1661 return (wp);
1662 }
1663
1664 /*
1665 * Save an array subscript - returns true if matching bracket found, false
1666 * if eof or newline was found.
1667 * (Returned string double null terminated)
1668 */
1669 static int
arraysub(char ** strp)1670 arraysub(char **strp)
1671 {
1672 XString ws;
1673 char *wp;
1674 char c;
1675 int depth = 1; /* we are just past the initial [ */
1676
1677 Xinit(ws, wp, 32, ATEMP);
1678
1679 do {
1680 c = getsc();
1681 Xcheck(ws, wp);
1682 *wp++ = c;
1683 if (c == '[')
1684 depth++;
1685 else if (c == ']')
1686 depth--;
1687 } while (depth > 0 && c && c != '\n');
1688
1689 *wp++ = '\0';
1690 *strp = Xclose(ws, wp);
1691
1692 return (depth == 0 ? 1 : 0);
1693 }
1694
1695 /* Unget a char: handles case when we are already at the start of the buffer */
1696 static const char *
ungetsc(int c)1697 ungetsc(int c)
1698 {
1699 if (backslash_skip)
1700 backslash_skip--;
1701 /* Don't unget eof... */
1702 if (source->str == null && c == '\0')
1703 return (source->str);
1704 if (source->str > source->start)
1705 source->str--;
1706 else {
1707 Source *s;
1708
1709 s = pushs(SREREAD, source->areap);
1710 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1711 s->start = s->str = s->ugbuf;
1712 s->next = source;
1713 source = s;
1714 }
1715 return (source->str);
1716 }
1717
1718
1719 /* Called to get a char that isn't a \newline sequence. */
1720 static int
getsc_bn(void)1721 getsc_bn(void)
1722 {
1723 int c, c2;
1724
1725 if (ignore_backslash_newline)
1726 return (getsc_());
1727
1728 if (backslash_skip == 1) {
1729 backslash_skip = 2;
1730 return (getsc_());
1731 }
1732
1733 backslash_skip = 0;
1734
1735 while (1) {
1736 c = getsc_();
1737 if (c == '\\') {
1738 if ((c2 = getsc_()) == '\n')
1739 /* ignore the \newline; get the next char... */
1740 continue;
1741 ungetsc(c2);
1742 backslash_skip = 1;
1743 }
1744 return (c);
1745 }
1746 }
1747
1748 static Lex_state *
push_state_(State_info * si,Lex_state * old_end)1749 push_state_(State_info *si, Lex_state *old_end)
1750 {
1751 Lex_state *news = alloc(STATE_BSIZE * sizeof(Lex_state), ATEMP);
1752
1753 news[0].ls_info.base = old_end;
1754 si->base = &news[0];
1755 si->end = &news[STATE_BSIZE];
1756 return (&news[1]);
1757 }
1758
1759 static Lex_state *
pop_state_(State_info * si,Lex_state * old_end)1760 pop_state_(State_info *si, Lex_state *old_end)
1761 {
1762 Lex_state *old_base = si->base;
1763
1764 si->base = old_end->ls_info.base - STATE_BSIZE;
1765 si->end = old_end->ls_info.base;
1766
1767 afree(old_base, ATEMP);
1768
1769 return (si->base + STATE_BSIZE - 1);
1770 }
1771
1772 static int
s_get(void)1773 s_get(void)
1774 {
1775 return (getsc());
1776 }
1777
1778 static void
s_put(int c)1779 s_put(int c)
1780 {
1781 ungetsc(c);
1782 }
1783