1 /* GNU SED, a batch stream editor.
2 Copyright (C) 1989,90,91,92,93,94,95,98,99,2002,2003,2004,2005,2006,2008
3 Free Software Foundation, Inc.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18
19 /* compile.c: translate sed source into internal form */
20
21 #include "sed.h"
22 #include <stdio.h>
23 #include <ctype.h>
24
25 #ifdef HAVE_STRINGS_H
26 # include <strings.h>
27 # ifdef HAVE_MEMORY_H
28 # include <memory.h>
29 # endif
30 #else
31 # include <string.h>
32 #endif /* HAVE_STRINGS_H */
33
34 #ifdef HAVE_STDLIB_H
35 # include <stdlib.h>
36 #endif
37 #ifndef EXIT_FAILURE
38 # define EXIT_FAILURE 1
39 #endif
40
41 #ifdef HAVE_SYS_TYPES_H
42 # include <sys/types.h>
43 #endif
44
45 #include <obstack.h>
46
47
48 #define YMAP_LENGTH 256 /*XXX shouldn't this be (UCHAR_MAX+1)?*/
49 #define VECTOR_ALLOC_INCREMENT 40
50
51 /* let's not confuse text editors that have only dumb bracket-matching... */
52 #define OPEN_BRACKET '['
53 #define CLOSE_BRACKET ']'
54 #define OPEN_BRACE '{'
55 #define CLOSE_BRACE '}'
56
57 struct prog_info {
58 /* When we're reading a script command from a string, `prog.base'
59 points to the first character in the string, 'prog.cur' points
60 to the current character in the string, and 'prog.end' points
61 to the end of the string. This allows us to compile script
62 strings that contain nulls. */
63 const unsigned char *base;
64 const unsigned char *cur;
65 const unsigned char *end;
66
67 /* This is the current script file. If it is NULL, we are reading
68 from a string stored at `prog.cur' instead. If both `prog.file'
69 and `prog.cur' are NULL, we're in trouble! */
70 FILE *file;
71 };
72
73 /* Information used to give out useful and informative error messages. */
74 struct error_info {
75 /* This is the name of the current script file. */
76 const char *name;
77
78 /* This is the number of the current script line that we're compiling. */
79 countT line;
80
81 /* This is the index of the "-e" expressions on the command line. */
82 countT string_expr_count;
83 };
84
85
86 /* Label structure used to resolve GOTO's, labels, and block beginnings. */
87 struct sed_label {
88 countT v_index; /* index of vector element being referenced */
89 char *name; /* NUL-terminated name of the label */
90 struct error_info err_info; /* track where `{}' blocks start */
91 struct sed_label *next; /* linked list (stack) */
92 };
93
94 struct special_files {
95 struct output outf;
96 FILE **pfp;
97 };
98
99 FILE *my_stdin, *my_stdout, *my_stderr;
100 struct special_files special_files[] = {
101 { { "/dev/stdin", false, NULL, NULL }, &my_stdin },
102 { { "/dev/stdout", false, NULL, NULL }, &my_stdout },
103 { { "/dev/stderr", false, NULL, NULL }, &my_stderr },
104 { { NULL, false, NULL, NULL }, NULL }
105 };
106
107
108 /* Where we are in the processing of the input. */
109 static struct prog_info prog;
110 static struct error_info cur_input;
111
112 /* Information about labels and jumps-to-labels. This is used to do
113 the required backpatching after we have compiled all the scripts. */
114 static struct sed_label *jumps = NULL;
115 static struct sed_label *labels = NULL;
116
117 /* We wish to detect #n magic only in the first input argument;
118 this flag tracks when we have consumed the first file of input. */
119 static bool first_script = true;
120
121 /* Allow for scripts like "sed -e 'i\' -e foo": */
122 static struct buffer *pending_text = NULL;
123 static struct text_buf *old_text_buf = NULL;
124
125 /* Information about block start positions. This is used to backpatch
126 block end positions. */
127 static struct sed_label *blocks = NULL;
128
129 /* Use an obstack for compilation. */
130 static struct obstack obs;
131
132 /* Various error messages we may want to print */
133 static const char errors[] =
134 "multiple `!'s\0"
135 "unexpected `,'\0"
136 "invalid usage of +N or ~N as first address\0"
137 "unmatched `{'\0"
138 "unexpected `}'\0"
139 "extra characters after command\0"
140 "expected \\ after `a', `c' or `i'\0"
141 "`}' doesn't want any addresses\0"
142 ": doesn't want any addresses\0"
143 "comments don't accept any addresses\0"
144 "missing command\0"
145 "command only uses one address\0"
146 "unterminated address regex\0"
147 "unterminated `s' command\0"
148 "unterminated `y' command\0"
149 "unknown option to `s'\0"
150 "multiple `p' options to `s' command\0"
151 "multiple `g' options to `s' command\0"
152 "multiple number options to `s' command\0"
153 "number option to `s' command may not be zero\0"
154 "strings for `y' command are different lengths\0"
155 "delimiter character is not a single-byte character\0"
156 "expected newer version of sed\0"
157 "invalid usage of line address 0\0"
158 "unknown command: `%c'";
159
160 #define BAD_BANG (errors)
161 #define BAD_COMMA (BAD_BANG + sizeof(N_("multiple `!'s")))
162 #define BAD_STEP (BAD_COMMA + sizeof(N_("unexpected `,'")))
163 #define EXCESS_OPEN_BRACE (BAD_STEP + sizeof(N_("invalid usage of +N or ~N as first address")))
164 #define EXCESS_CLOSE_BRACE (EXCESS_OPEN_BRACE + sizeof(N_("unmatched `{'")))
165 #define EXCESS_JUNK (EXCESS_CLOSE_BRACE + sizeof(N_("unexpected `}'")))
166 #define EXPECTED_SLASH (EXCESS_JUNK + sizeof(N_("extra characters after command")))
167 #define NO_CLOSE_BRACE_ADDR (EXPECTED_SLASH + sizeof(N_("expected \\ after `a', `c' or `i'")))
168 #define NO_COLON_ADDR (NO_CLOSE_BRACE_ADDR + sizeof(N_("`}' doesn't want any addresses")))
169 #define NO_SHARP_ADDR (NO_COLON_ADDR + sizeof(N_(": doesn't want any addresses")))
170 #define NO_COMMAND (NO_SHARP_ADDR + sizeof(N_("comments don't accept any addresses")))
171 #define ONE_ADDR (NO_COMMAND + sizeof(N_("missing command")))
172 #define UNTERM_ADDR_RE (ONE_ADDR + sizeof(N_("command only uses one address")))
173 #define UNTERM_S_CMD (UNTERM_ADDR_RE + sizeof(N_("unterminated address regex")))
174 #define UNTERM_Y_CMD (UNTERM_S_CMD + sizeof(N_("unterminated `s' command")))
175 #define UNKNOWN_S_OPT (UNTERM_Y_CMD + sizeof(N_("unterminated `y' command")))
176 #define EXCESS_P_OPT (UNKNOWN_S_OPT + sizeof(N_("unknown option to `s'")))
177 #define EXCESS_G_OPT (EXCESS_P_OPT + sizeof(N_("multiple `p' options to `s' command")))
178 #define EXCESS_N_OPT (EXCESS_G_OPT + sizeof(N_("multiple `g' options to `s' command")))
179 #define ZERO_N_OPT (EXCESS_N_OPT + sizeof(N_("multiple number options to `s' command")))
180 #define Y_CMD_LEN (ZERO_N_OPT + sizeof(N_("number option to `s' command may not be zero")))
181 #define BAD_DELIM (Y_CMD_LEN + sizeof(N_("strings for `y' command are different lengths")))
182 #define ANCIENT_VERSION (BAD_DELIM + sizeof(N_("delimiter character is not a single-byte character")))
183 #define INVALID_LINE_0 (ANCIENT_VERSION + sizeof(N_("expected newer version of sed")))
184 #define UNKNOWN_CMD (INVALID_LINE_0 + sizeof(N_("invalid usage of line address 0")))
185 #define END_ERRORS (UNKNOWN_CMD + sizeof(N_("unknown command: `%c'")))
186
187 static struct output *file_read = NULL;
188 static struct output *file_write = NULL;
189
190
191 /* Complain about an unknown command and exit. */
192 void
bad_command(ch)193 bad_command(ch)
194 char ch;
195 {
196 const char *msg = _(UNKNOWN_CMD);
197 char *unknown_cmd = xmalloc(strlen(msg));
198 sprintf(unknown_cmd, msg, ch);
199 bad_prog(unknown_cmd);
200 }
201
202 /* Complain about a programming error and exit. */
203 void
bad_prog(why)204 bad_prog(why)
205 const char *why;
206 {
207 if (cur_input.name)
208 fprintf(stderr, _("%s: file %s line %lu: %s\n"),
209 myname, cur_input.name, CAST(unsigned long)cur_input.line, why);
210 else
211 fprintf(stderr, _("%s: -e expression #%lu, char %lu: %s\n"),
212 myname,
213 CAST(unsigned long)cur_input.string_expr_count,
214 CAST(unsigned long)(prog.cur-prog.base),
215 why);
216 exit(EXIT_FAILURE);
217 }
218
219
220 /* Read the next character from the program. Return EOF if there isn't
221 anything to read. Keep cur_input.line up to date, so error messages
222 can be meaningful. */
223 static int inchar P_((void));
224 static int
inchar()225 inchar()
226 {
227 int ch = EOF;
228
229 if (prog.cur)
230 {
231 if (prog.cur < prog.end)
232 ch = *prog.cur++;
233 }
234 else if (prog.file)
235 {
236 if (!feof(prog.file))
237 ch = getc(prog.file);
238 }
239 if (ch == '\n')
240 ++cur_input.line;
241 return ch;
242 }
243
244 /* unget `ch' so the next call to inchar will return it. */
245 static void savchar P_((int ch));
246 static void
savchar(ch)247 savchar(ch)
248 int ch;
249 {
250 if (ch == EOF)
251 return;
252 if (ch == '\n' && cur_input.line > 0)
253 --cur_input.line;
254 if (prog.cur)
255 {
256 if (prog.cur <= prog.base || *--prog.cur != ch)
257 panic("Called savchar() with unexpected pushback (%x)",
258 CAST(unsigned char)ch);
259 }
260 else
261 ungetc(ch, prog.file);
262 }
263
264 /* Read the next non-blank character from the program. */
265 static int in_nonblank P_((void));
266 static int
in_nonblank()267 in_nonblank()
268 {
269 int ch;
270 do
271 ch = inchar();
272 while (ISBLANK(ch));
273 return ch;
274 }
275
276 /* Read an integer value from the program. */
277 static countT in_integer P_((int ch));
278 static countT
in_integer(ch)279 in_integer(ch)
280 int ch;
281 {
282 countT num = 0;
283
284 while (ISDIGIT(ch))
285 {
286 num = num * 10 + ch - '0';
287 ch = inchar();
288 }
289 savchar(ch);
290 return num;
291 }
292
293 static int add_then_next P_((struct buffer *b, int ch));
294 static int
add_then_next(b,ch)295 add_then_next(b, ch)
296 struct buffer *b;
297 int ch;
298 {
299 add1_buffer(b, ch);
300 return inchar();
301 }
302
303 static char * convert_number P_((char *, char *, const char *, int, int, int));
304 static char *
convert_number(result,buf,bufend,base,maxdigits,default_char)305 convert_number(result, buf, bufend, base, maxdigits, default_char)
306 char *result;
307 char *buf;
308 const char *bufend;
309 int base;
310 int maxdigits;
311 int default_char;
312 {
313 int n = 0;
314 char *p;
315
316 for (p=buf; p < bufend && maxdigits-- > 0; ++p)
317 {
318 int d = -1;
319 switch (*p)
320 {
321 case '0': d = 0x0; break;
322 case '1': d = 0x1; break;
323 case '2': d = 0x2; break;
324 case '3': d = 0x3; break;
325 case '4': d = 0x4; break;
326 case '5': d = 0x5; break;
327 case '6': d = 0x6; break;
328 case '7': d = 0x7; break;
329 case '8': d = 0x8; break;
330 case '9': d = 0x9; break;
331 case 'A': case 'a': d = 0xa; break;
332 case 'B': case 'b': d = 0xb; break;
333 case 'C': case 'c': d = 0xc; break;
334 case 'D': case 'd': d = 0xd; break;
335 case 'E': case 'e': d = 0xe; break;
336 case 'F': case 'f': d = 0xf; break;
337 }
338 if (d < 0 || base <= d)
339 break;
340 n = n * base + d;
341 }
342 if (p == buf)
343 *result = default_char;
344 else
345 *result = n;
346 return p;
347 }
348
349
350 /* Read in a filename for a `r', `w', or `s///w' command. */
351 static struct buffer *read_filename P_((void));
352 static struct buffer *
read_filename()353 read_filename()
354 {
355 struct buffer *b;
356 int ch;
357
358 b = init_buffer();
359 ch = in_nonblank();
360 while (ch != EOF && ch != '\n')
361 {
362 #if 0 /*XXX ZZZ 1998-09-12 kpp: added, then had second thoughts*/
363 if (posixicity == POSIXLY_EXTENDED)
364 if (ch == ';' || ch == '#')
365 {
366 savchar(ch);
367 break;
368 }
369 #endif
370 ch = add_then_next(b, ch);
371 }
372 add1_buffer(b, '\0');
373 return b;
374 }
375
376 static struct output *get_openfile P_((struct output **file_ptrs, const char *mode, int fail));
377 static struct output *
get_openfile(file_ptrs,mode,fail)378 get_openfile(file_ptrs, mode, fail)
379 struct output **file_ptrs;
380 const char *mode;
381 int fail;
382 {
383 struct buffer *b;
384 char *file_name;
385 struct output *p;
386
387 b = read_filename();
388 file_name = get_buffer(b);
389 for (p=*file_ptrs; p; p=p->link)
390 if (strcmp(p->name, file_name) == 0)
391 break;
392
393 if (posixicity == POSIXLY_EXTENDED)
394 {
395 /* Check whether it is a special file (stdin, stdout or stderr) */
396 struct special_files *special = special_files;
397
398 /* std* sometimes are not constants, so they
399 cannot be used in the initializer for special_files */
400 my_stdin = stdin; my_stdout = stdout; my_stderr = stderr;
401 for (special = special_files; special->outf.name; special++)
402 if (strcmp(special->outf.name, file_name) == 0)
403 {
404 special->outf.fp = *special->pfp;
405 free_buffer (b);
406 return &special->outf;
407 }
408 }
409
410 if (!p)
411 {
412 p = OB_MALLOC(&obs, 1, struct output);
413 p->name = ck_strdup(file_name);
414 p->fp = ck_fopen(p->name, mode, fail);
415 p->missing_newline = false;
416 p->link = *file_ptrs;
417 *file_ptrs = p;
418 }
419 free_buffer(b);
420 return p;
421 }
422
423
424 static struct sed_cmd *next_cmd_entry P_((struct vector **vectorp));
425 static struct sed_cmd *
next_cmd_entry(vectorp)426 next_cmd_entry(vectorp)
427 struct vector **vectorp;
428 {
429 struct sed_cmd *cmd;
430 struct vector *v;
431
432 v = *vectorp;
433 if (v->v_length == v->v_allocated)
434 {
435 v->v_allocated += VECTOR_ALLOC_INCREMENT;
436 v->v = REALLOC(v->v, v->v_allocated, struct sed_cmd);
437 }
438
439 cmd = v->v + v->v_length;
440 cmd->a1 = NULL;
441 cmd->a2 = NULL;
442 cmd->range_state = RANGE_INACTIVE;
443 cmd->addr_bang = false;
444 cmd->cmd = '\0'; /* something invalid, to catch bugs early */
445
446 *vectorp = v;
447 return cmd;
448 }
449
450 static int snarf_char_class P_((struct buffer *b, mbstate_t *cur_stat));
451 static int
snarf_char_class(b,cur_stat)452 snarf_char_class(b, cur_stat)
453 struct buffer *b;
454 mbstate_t *cur_stat;
455 {
456 int ch;
457 int state = 0;
458 int delim;
459 bool pending_mb = 0;
460
461 ch = inchar();
462 if (ch == '^')
463 ch = add_then_next(b, ch);
464 if (ch == CLOSE_BRACKET)
465 ch = add_then_next(b, ch);
466
467 /* States are:
468 0 outside a collation element, character class or collation class
469 1 after the bracket
470 2 after the opening ./:/=
471 3 after the closing ./:/= */
472
473 for (;; ch = add_then_next (b, ch))
474 {
475 pending_mb = BRLEN (ch, cur_stat) != 1;
476
477 switch (ch)
478 {
479 case EOF:
480 case '\n':
481 return ch;
482
483 case '.':
484 case ':':
485 case '=':
486 if (pending_mb)
487 continue;
488
489 if (state == 1)
490 {
491 delim = ch;
492 state = 2;
493 }
494 else if (state == 2 && ch == delim)
495 state = 3;
496 else
497 break;
498
499 continue;
500
501 case OPEN_BRACKET:
502 if (pending_mb)
503 continue;
504
505 if (state == 0)
506 state = 1;
507 continue;
508
509 case CLOSE_BRACKET:
510 if (pending_mb)
511 continue;
512
513 if (state == 0 || state == 1)
514 return ch;
515 else if (state == 3)
516 state = 0;
517
518 break;
519
520 default:
521 break;
522 }
523
524 /* Getting a character different from .=: whilst in state 1
525 goes back to state 0, getting a character different from ]
526 whilst in state 3 goes back to state 2. */
527 state &= ~1;
528 }
529 }
530
531 static struct buffer *match_slash P_((int slash, int regex));
532 static struct buffer *
match_slash(slash,regex)533 match_slash(slash, regex)
534 int slash;
535 int regex;
536 {
537 struct buffer *b;
538 int ch;
539 mbstate_t cur_stat;
540
541 memset (&cur_stat, 0, sizeof (mbstate_t));
542
543 /* We allow only 1 byte characters for a slash. */
544 if (BRLEN (slash, &cur_stat) == -2)
545 bad_prog (BAD_DELIM);
546
547 memset (&cur_stat, 0, sizeof (mbstate_t));
548
549 b = init_buffer();
550 while ((ch = inchar()) != EOF && ch != '\n')
551 {
552 bool pending_mb = !MBSINIT (&cur_stat);
553 if (BRLEN (ch, &cur_stat) == 1 && !pending_mb)
554 {
555 if (ch == slash)
556 return b;
557 else if (ch == '\\')
558 {
559 ch = inchar();
560 if (ch == EOF)
561 break;
562 #ifndef REG_PERL
563 else if (ch == 'n' && regex)
564 ch = '\n';
565 #endif
566 else if (ch != '\n' && (ch != slash || (!regex && ch == '&')))
567 add1_buffer(b, '\\');
568 }
569 else if (ch == OPEN_BRACKET && regex)
570 {
571 add1_buffer(b, ch);
572 ch = snarf_char_class(b, &cur_stat);
573 if (ch != CLOSE_BRACKET)
574 break;
575 }
576 }
577
578 add1_buffer(b, ch);
579 }
580
581 if (ch == '\n')
582 savchar(ch); /* for proper line number in error report */
583 free_buffer(b);
584 return NULL;
585 }
586
587 static int mark_subst_opts P_((struct subst *cmd));
588 static int
mark_subst_opts(cmd)589 mark_subst_opts(cmd)
590 struct subst *cmd;
591 {
592 int flags = 0;
593 int ch;
594
595 cmd->global = false;
596 cmd->print = false;
597 cmd->eval = false;
598 cmd->numb = 0;
599 cmd->outf = NULL;
600
601 for (;;)
602 switch ( (ch = in_nonblank()) )
603 {
604 case 'i': /* GNU extension */
605 case 'I': /* GNU extension */
606 if (posixicity == POSIXLY_BASIC)
607 bad_prog(_(UNKNOWN_S_OPT));
608 flags |= REG_ICASE;
609 break;
610
611 #ifdef REG_PERL
612 case 's': /* GNU extension */
613 case 'S': /* GNU extension */
614 if (posixicity == POSIXLY_BASIC)
615 bad_prog(_(UNKNOWN_S_OPT));
616 if (extended_regexp_flags & REG_PERL)
617 flags |= REG_DOTALL;
618 break;
619
620 case 'x': /* GNU extension */
621 case 'X': /* GNU extension */
622 if (posixicity == POSIXLY_BASIC)
623 bad_prog(_(UNKNOWN_S_OPT));
624 if (extended_regexp_flags & REG_PERL)
625 flags |= REG_EXTENDED;
626 break;
627 #endif
628
629 case 'm': /* GNU extension */
630 case 'M': /* GNU extension */
631 if (posixicity == POSIXLY_BASIC)
632 bad_prog(_(UNKNOWN_S_OPT));
633 flags |= REG_NEWLINE;
634 break;
635
636 case 'e':
637 cmd->eval = true;
638 break;
639
640 case 'p':
641 if (cmd->print)
642 bad_prog(_(EXCESS_P_OPT));
643 cmd->print |= (1 << cmd->eval); /* 1=before eval, 2=after */
644 break;
645
646 case 'g':
647 if (cmd->global)
648 bad_prog(_(EXCESS_G_OPT));
649 cmd->global = true;
650 break;
651
652 case 'w':
653 cmd->outf = get_openfile(&file_write, "w", true);
654 return flags;
655
656 case '0': case '1': case '2': case '3': case '4':
657 case '5': case '6': case '7': case '8': case '9':
658 if (cmd->numb)
659 bad_prog(_(EXCESS_N_OPT));
660 cmd->numb = in_integer(ch);
661 if (!cmd->numb)
662 bad_prog(_(ZERO_N_OPT));
663 break;
664
665 case CLOSE_BRACE:
666 case '#':
667 savchar(ch);
668 /* Fall Through */
669 case EOF:
670 case '\n':
671 case ';':
672 return flags;
673
674 case '\r':
675 if (inchar() == '\n')
676 return flags;
677 /* FALLTHROUGH */
678
679 default:
680 bad_prog(_(UNKNOWN_S_OPT));
681 /*NOTREACHED*/
682 }
683 }
684
685
686 /* read in a label for a `:', `b', or `t' command */
687 static char *read_label P_((void));
688 static char *
read_label()689 read_label()
690 {
691 struct buffer *b;
692 int ch;
693 char *ret;
694
695 b = init_buffer();
696 ch = in_nonblank();
697
698 while (ch != EOF && ch != '\n'
699 && !ISBLANK(ch) && ch != ';' && ch != CLOSE_BRACE && ch != '#')
700 ch = add_then_next (b, ch);
701
702 savchar(ch);
703 add1_buffer(b, '\0');
704 ret = ck_strdup(get_buffer(b));
705 free_buffer(b);
706 return ret;
707 }
708
709 /* Store a label (or label reference) created by a `:', `b', or `t'
710 command so that the jump to/from the label can be backpatched after
711 compilation is complete, or a reference created by a `{' to be
712 backpatched when the corresponding `}' is found. */
713 static struct sed_label *setup_label
714 P_((struct sed_label *, countT, char *, const struct error_info *));
715 static struct sed_label *
setup_label(list,idx,name,err_info)716 setup_label(list, idx, name, err_info)
717 struct sed_label *list;
718 countT idx;
719 char *name;
720 const struct error_info *err_info;
721 {
722 struct sed_label *ret = OB_MALLOC(&obs, 1, struct sed_label);
723 ret->v_index = idx;
724 ret->name = name;
725 if (err_info)
726 MEMCPY(&ret->err_info, err_info, sizeof (ret->err_info));
727 ret->next = list;
728 return ret;
729 }
730
731 static struct sed_label *release_label P_((struct sed_label *list_head));
732 static struct sed_label *
release_label(list_head)733 release_label(list_head)
734 struct sed_label *list_head;
735 {
736 struct sed_label *ret;
737
738 if (!list_head)
739 return NULL;
740 ret = list_head->next;
741
742 FREE(list_head->name);
743
744 #if 0
745 /* We use obstacks */
746 FREE(list_head);
747 #endif
748 return ret;
749 }
750
751 static struct replacement *new_replacement P_((char *, size_t,
752 enum replacement_types));
753 static struct replacement *
new_replacement(text,length,type)754 new_replacement(text, length, type)
755 char *text;
756 size_t length;
757 enum replacement_types type;
758 {
759 struct replacement *r = OB_MALLOC(&obs, 1, struct replacement);
760
761 r->prefix = text;
762 r->prefix_length = length;
763 r->subst_id = -1;
764 r->repl_type = type;
765
766 /* r-> next = NULL; */
767 return r;
768 }
769
770 static void setup_replacement P_((struct subst *, const char *, size_t));
771 static void
setup_replacement(sub,text,length)772 setup_replacement(sub, text, length)
773 struct subst *sub;
774 const char *text;
775 size_t length;
776 {
777 char *base;
778 char *p;
779 char *text_end;
780 enum replacement_types repl_type = REPL_ASIS, save_type = REPL_ASIS;
781 struct replacement root;
782 struct replacement *tail;
783
784 sub->max_id = 0;
785 base = MEMDUP(text, length, char);
786 length = normalize_text(base, length, TEXT_REPLACEMENT);
787
788 text_end = base + length;
789 tail = &root;
790
791 for (p=base; p<text_end; ++p)
792 {
793 if (*p == '\\')
794 {
795 /* Preceding the backslash may be some literal text: */
796 tail = tail->next =
797 new_replacement(base, CAST(size_t)(p - base), repl_type);
798
799 repl_type = save_type;
800
801 /* Skip the backslash and look for a numeric back-reference,
802 or a case-munging escape if not in POSIX mode: */
803 ++p;
804 if (p == text_end)
805 ++tail->prefix_length;
806
807 else if (posixicity == POSIXLY_BASIC && !ISDIGIT (*p))
808 {
809 p[-1] = *p;
810 ++tail->prefix_length;
811 }
812
813 else
814 switch (*p)
815 {
816 case '0': case '1': case '2': case '3': case '4':
817 case '5': case '6': case '7': case '8': case '9':
818 tail->subst_id = *p - '0';
819 if (sub->max_id < tail->subst_id)
820 sub->max_id = tail->subst_id;
821 break;
822
823 case 'L':
824 repl_type = REPL_LOWERCASE;
825 save_type = REPL_LOWERCASE;
826 break;
827
828 case 'U':
829 repl_type = REPL_UPPERCASE;
830 save_type = REPL_UPPERCASE;
831 break;
832
833 case 'E':
834 repl_type = REPL_ASIS;
835 save_type = REPL_ASIS;
836 break;
837
838 case 'l':
839 save_type = repl_type;
840 repl_type |= REPL_LOWERCASE_FIRST;
841 break;
842
843 case 'u':
844 save_type = repl_type;
845 repl_type |= REPL_UPPERCASE_FIRST;
846 break;
847
848 default:
849 p[-1] = *p;
850 ++tail->prefix_length;
851 }
852
853 base = p + 1;
854 }
855 else if (*p == '&')
856 {
857 /* Preceding the ampersand may be some literal text: */
858 tail = tail->next =
859 new_replacement(base, CAST(size_t)(p - base), repl_type);
860
861 repl_type = save_type;
862 tail->subst_id = 0;
863 base = p + 1;
864 }
865 }
866 /* There may be some trailing literal text: */
867 if (base < text_end)
868 tail = tail->next =
869 new_replacement(base, CAST(size_t)(text_end - base), repl_type);
870
871 tail->next = NULL;
872 sub->replacement = root.next;
873 }
874
875 static void read_text P_((struct text_buf *buf, int leadin_ch));
876 static void
read_text(buf,leadin_ch)877 read_text(buf, leadin_ch)
878 struct text_buf *buf;
879 int leadin_ch;
880 {
881 int ch;
882
883 /* Should we start afresh (as opposed to continue a partial text)? */
884 if (buf)
885 {
886 if (pending_text)
887 free_buffer(pending_text);
888 pending_text = init_buffer();
889 buf->text = NULL;
890 buf->text_length = 0;
891 old_text_buf = buf;
892 }
893 /* assert(old_text_buf != NULL); */
894
895 if (leadin_ch == EOF)
896 return;
897
898 if (leadin_ch != '\n')
899 add1_buffer(pending_text, leadin_ch);
900
901 ch = inchar();
902 while (ch != EOF && ch != '\n')
903 {
904 if (ch == '\\')
905 {
906 ch = inchar();
907 if (ch != EOF)
908 add1_buffer (pending_text, '\\');
909 }
910
911 if (ch == EOF)
912 {
913 add1_buffer (pending_text, '\n');
914 return;
915 }
916
917 ch = add_then_next (pending_text, ch);
918 }
919
920 add1_buffer(pending_text, '\n');
921 if (!buf)
922 buf = old_text_buf;
923 buf->text_length = normalize_text (get_buffer (pending_text),
924 size_buffer (pending_text), TEXT_BUFFER);
925 buf->text = MEMDUP(get_buffer(pending_text), buf->text_length, char);
926 free_buffer(pending_text);
927 pending_text = NULL;
928 }
929
930
931 /* Try to read an address for a sed command. If it succeeds,
932 return non-zero and store the resulting address in `*addr'.
933 If the input doesn't look like an address read nothing
934 and return zero. */
935 static bool compile_address P_((struct addr *addr, int ch));
936 static bool
compile_address(addr,ch)937 compile_address(addr, ch)
938 struct addr *addr;
939 int ch;
940 {
941 addr->addr_type = ADDR_IS_NULL;
942 addr->addr_step = 0;
943 addr->addr_number = ~(countT)0; /* extremely unlikely to ever match */
944 addr->addr_regex = NULL;
945
946 if (ch == '/' || ch == '\\')
947 {
948 int flags = 0;
949 struct buffer *b;
950 addr->addr_type = ADDR_IS_REGEX;
951 if (ch == '\\')
952 ch = inchar();
953 if ( !(b = match_slash(ch, true)) )
954 bad_prog(_(UNTERM_ADDR_RE));
955
956 for(;;)
957 {
958 ch = in_nonblank();
959 if (posixicity == POSIXLY_BASIC)
960 goto posix_address_modifier;
961 switch(ch)
962 {
963 case 'I': /* GNU extension */
964 flags |= REG_ICASE;
965 break;
966
967 #ifdef REG_PERL
968 case 'S': /* GNU extension */
969 if (extended_regexp_flags & REG_PERL)
970 flags |= REG_DOTALL;
971 break;
972
973 case 'X': /* GNU extension */
974 if (extended_regexp_flags & REG_PERL)
975 flags |= REG_EXTENDED;
976 break;
977 #endif
978
979 case 'M': /* GNU extension */
980 flags |= REG_NEWLINE;
981 break;
982
983 default:
984 posix_address_modifier:
985 savchar (ch);
986 addr->addr_regex = compile_regex (b, flags, 0);
987 free_buffer(b);
988 return true;
989 }
990 }
991 }
992 else if (ISDIGIT(ch))
993 {
994 addr->addr_number = in_integer(ch);
995 addr->addr_type = ADDR_IS_NUM;
996 ch = in_nonblank();
997 if (ch != '~' || posixicity == POSIXLY_BASIC)
998 {
999 savchar(ch);
1000 }
1001 else
1002 {
1003 countT step = in_integer(in_nonblank());
1004 if (step > 0)
1005 {
1006 addr->addr_step = step;
1007 addr->addr_type = ADDR_IS_NUM_MOD;
1008 }
1009 }
1010 }
1011 else if ((ch == '+' || ch == '~') && posixicity != POSIXLY_BASIC)
1012 {
1013 addr->addr_step = in_integer(in_nonblank());
1014 if (addr->addr_step==0)
1015 ; /* default to ADDR_IS_NULL; forces matching to stop on next line */
1016 else if (ch == '+')
1017 addr->addr_type = ADDR_IS_STEP;
1018 else
1019 addr->addr_type = ADDR_IS_STEP_MOD;
1020 }
1021 else if (ch == '$')
1022 {
1023 addr->addr_type = ADDR_IS_LAST;
1024 }
1025 else
1026 return false;
1027
1028 return true;
1029 }
1030
1031 /* Read a program (or a subprogram within `{' `}' pairs) in and store
1032 the compiled form in `*vector'. Return a pointer to the new vector. */
1033 static struct vector *compile_program P_((struct vector *));
1034 static struct vector *
compile_program(vector)1035 compile_program(vector)
1036 struct vector *vector;
1037 {
1038 struct sed_cmd *cur_cmd;
1039 struct buffer *b;
1040 int ch;
1041
1042 if (!vector)
1043 {
1044 vector = MALLOC(1, struct vector);
1045 vector->v = NULL;
1046 vector->v_allocated = 0;
1047 vector->v_length = 0;
1048
1049 obstack_init (&obs);
1050 }
1051 if (pending_text)
1052 read_text(NULL, '\n');
1053
1054 for (;;)
1055 {
1056 struct addr a;
1057
1058 while ((ch=inchar()) == ';' || ISSPACE(ch))
1059 ;
1060 if (ch == EOF)
1061 break;
1062
1063 cur_cmd = next_cmd_entry(&vector);
1064 if (compile_address(&a, ch))
1065 {
1066 if (a.addr_type == ADDR_IS_STEP
1067 || a.addr_type == ADDR_IS_STEP_MOD)
1068 bad_prog(_(BAD_STEP));
1069
1070 cur_cmd->a1 = MEMDUP(&a, 1, struct addr);
1071 ch = in_nonblank();
1072 if (ch == ',')
1073 {
1074 if (!compile_address(&a, in_nonblank()))
1075 bad_prog(_(BAD_COMMA));
1076
1077 cur_cmd->a2 = MEMDUP(&a, 1, struct addr);
1078 ch = in_nonblank();
1079 }
1080
1081 if ((cur_cmd->a1->addr_type == ADDR_IS_NUM
1082 && cur_cmd->a1->addr_number == 0)
1083 && ((!cur_cmd->a2 || cur_cmd->a2->addr_type != ADDR_IS_REGEX)
1084 || posixicity == POSIXLY_BASIC))
1085 bad_prog(_(INVALID_LINE_0));
1086 }
1087 if (ch == '!')
1088 {
1089 cur_cmd->addr_bang = true;
1090 ch = in_nonblank();
1091 if (ch == '!')
1092 bad_prog(_(BAD_BANG));
1093 }
1094
1095 /* Do not accept extended commands in --posix mode. Also,
1096 a few commands only accept one address in that mode. */
1097 if (posixicity == POSIXLY_BASIC)
1098 switch (ch)
1099 {
1100 case 'e': case 'v': case 'z': case 'L':
1101 case 'Q': case 'T': case 'R': case 'W':
1102 bad_command(ch);
1103
1104 case 'a': case 'i': case 'l':
1105 case '=': case 'r':
1106 if (cur_cmd->a2)
1107 bad_prog(_(ONE_ADDR));
1108 }
1109
1110 cur_cmd->cmd = ch;
1111 switch (ch)
1112 {
1113 case '#':
1114 if (cur_cmd->a1)
1115 bad_prog(_(NO_SHARP_ADDR));
1116 ch = inchar();
1117 if (ch=='n' && first_script && cur_input.line < 2)
1118 if ( (prog.base && prog.cur==2+prog.base)
1119 || (prog.file && !prog.base && 2==ftell(prog.file)))
1120 no_default_output = true;
1121 while (ch != EOF && ch != '\n')
1122 ch = inchar();
1123 continue; /* restart the for (;;) loop */
1124
1125 case 'v':
1126 /* This is an extension. Programs needing GNU sed might start
1127 * with a `v' command so that other seds will stop.
1128 * We compare the version and ignore POSIXLY_CORRECT.
1129 */
1130 {
1131 char *version = read_label ();
1132 char *compared_version;
1133 compared_version = (*version == '\0') ? "4.0" : version;
1134 if (strverscmp (compared_version, SED_FEATURE_VERSION) > 0)
1135 bad_prog(_(ANCIENT_VERSION));
1136
1137 free (version);
1138 posixicity = POSIXLY_EXTENDED;
1139 }
1140 continue;
1141
1142 case '{':
1143 blocks = setup_label(blocks, vector->v_length, NULL, &cur_input);
1144 cur_cmd->addr_bang = !cur_cmd->addr_bang;
1145 break;
1146
1147 case '}':
1148 if (!blocks)
1149 bad_prog(_(EXCESS_CLOSE_BRACE));
1150 if (cur_cmd->a1)
1151 bad_prog(_(NO_CLOSE_BRACE_ADDR));
1152 ch = in_nonblank();
1153 if (ch == CLOSE_BRACE || ch == '#')
1154 savchar(ch);
1155 else if (ch != EOF && ch != '\n' && ch != ';')
1156 bad_prog(_(EXCESS_JUNK));
1157
1158 vector->v[blocks->v_index].x.jump_index = vector->v_length;
1159 blocks = release_label(blocks); /* done with this entry */
1160 break;
1161
1162 case 'e':
1163 ch = in_nonblank();
1164 if (ch == EOF || ch == '\n')
1165 {
1166 cur_cmd->x.cmd_txt.text_length = 0;
1167 break;
1168 }
1169 else
1170 goto read_text_to_slash;
1171
1172 case 'a':
1173 case 'i':
1174 case 'c':
1175 ch = in_nonblank();
1176
1177 read_text_to_slash:
1178 if (ch == EOF)
1179 bad_prog(_(EXPECTED_SLASH));
1180
1181 if (ch == '\\')
1182 ch = inchar();
1183 else
1184 {
1185 if (posixicity == POSIXLY_BASIC)
1186 bad_prog(_(EXPECTED_SLASH));
1187 savchar(ch);
1188 ch = '\n';
1189 }
1190
1191 read_text(&cur_cmd->x.cmd_txt, ch);
1192 break;
1193
1194 case ':':
1195 if (cur_cmd->a1)
1196 bad_prog(_(NO_COLON_ADDR));
1197 labels = setup_label(labels, vector->v_length, read_label(), NULL);
1198 break;
1199
1200 case 'T':
1201 case 'b':
1202 case 't':
1203 jumps = setup_label(jumps, vector->v_length, read_label(), NULL);
1204 break;
1205
1206 case 'Q':
1207 case 'q':
1208 if (cur_cmd->a2)
1209 bad_prog(_(ONE_ADDR));
1210 /* Fall through */
1211
1212 case 'L':
1213 case 'l':
1214 ch = in_nonblank();
1215 if (ISDIGIT(ch) && posixicity != POSIXLY_BASIC)
1216 {
1217 cur_cmd->x.int_arg = in_integer(ch);
1218 ch = in_nonblank();
1219 }
1220 else
1221 cur_cmd->x.int_arg = -1;
1222
1223 if (ch == CLOSE_BRACE || ch == '#')
1224 savchar(ch);
1225 else if (ch != EOF && ch != '\n' && ch != ';')
1226 bad_prog(_(EXCESS_JUNK));
1227
1228 break;
1229
1230 case '=':
1231 case 'd':
1232 case 'D':
1233 case 'g':
1234 case 'G':
1235 case 'h':
1236 case 'H':
1237 case 'n':
1238 case 'N':
1239 case 'p':
1240 case 'P':
1241 case 'z':
1242 case 'x':
1243 ch = in_nonblank();
1244 if (ch == CLOSE_BRACE || ch == '#')
1245 savchar(ch);
1246 else if (ch != EOF && ch != '\n' && ch != ';')
1247 bad_prog(_(EXCESS_JUNK));
1248 break;
1249
1250 case 'r':
1251 b = read_filename();
1252 cur_cmd->x.fname = ck_strdup(get_buffer(b));
1253 free_buffer(b);
1254 break;
1255
1256 case 'R':
1257 cur_cmd->x.fp = get_openfile(&file_read, read_mode, false)->fp;
1258 break;
1259
1260 case 'W':
1261 case 'w':
1262 cur_cmd->x.outf = get_openfile(&file_write, "w", true);
1263 break;
1264
1265 case 's':
1266 {
1267 struct buffer *b2;
1268 int flags;
1269 int slash;
1270
1271 slash = inchar();
1272 if ( !(b = match_slash(slash, true)) )
1273 bad_prog(_(UNTERM_S_CMD));
1274 if ( !(b2 = match_slash(slash, false)) )
1275 bad_prog(_(UNTERM_S_CMD));
1276
1277 cur_cmd->x.cmd_subst = OB_MALLOC(&obs, 1, struct subst);
1278 setup_replacement(cur_cmd->x.cmd_subst,
1279 get_buffer(b2), size_buffer(b2));
1280 free_buffer(b2);
1281
1282 flags = mark_subst_opts(cur_cmd->x.cmd_subst);
1283 cur_cmd->x.cmd_subst->regx =
1284 compile_regex(b, flags, cur_cmd->x.cmd_subst->max_id + 1);
1285 free_buffer(b);
1286 }
1287 break;
1288
1289 case 'y':
1290 {
1291 size_t len, dest_len;
1292 int slash;
1293 struct buffer *b2;
1294 char *src_buf, *dest_buf;
1295
1296 slash = inchar();
1297 if ( !(b = match_slash(slash, false)) )
1298 bad_prog(_(UNTERM_Y_CMD));
1299 src_buf = get_buffer(b);
1300 len = normalize_text(src_buf, size_buffer (b), TEXT_BUFFER);
1301
1302 if ( !(b2 = match_slash(slash, false)) )
1303 bad_prog(_(UNTERM_Y_CMD));
1304 dest_buf = get_buffer(b2);
1305 dest_len = normalize_text(dest_buf, size_buffer (b2), TEXT_BUFFER);
1306
1307 if (mb_cur_max > 1)
1308 {
1309 int i, j, idx, src_char_num;
1310 size_t *src_lens = MALLOC(len, size_t);
1311 char **trans_pairs;
1312 size_t mbclen;
1313 mbstate_t cur_stat;
1314
1315 /* Enumerate how many character the source buffer has. */
1316 memset(&cur_stat, 0, sizeof(mbstate_t));
1317 for (i = 0, j = 0; i < len;)
1318 {
1319 mbclen = MBRLEN (src_buf + i, len - i, &cur_stat);
1320 /* An invalid sequence, or a truncated multibyte character.
1321 We treat it as a singlebyte character. */
1322 if (mbclen == (size_t) -1 || mbclen == (size_t) -2
1323 || mbclen == 0)
1324 mbclen = 1;
1325 src_lens[j++] = mbclen;
1326 i += mbclen;
1327 }
1328 src_char_num = j;
1329
1330 memset(&cur_stat, 0, sizeof(mbstate_t));
1331 idx = 0;
1332
1333 /* trans_pairs = {src(0), dest(0), src(1), dest(1), ..., NULL}
1334 src(i) : pointer to i-th source character.
1335 dest(i) : pointer to i-th destination character.
1336 NULL : terminator */
1337 trans_pairs = MALLOC(2 * src_char_num + 1, char*);
1338 cur_cmd->x.translatemb = trans_pairs;
1339 for (i = 0; i < src_char_num; i++)
1340 {
1341 if (idx >= dest_len)
1342 bad_prog(_(Y_CMD_LEN));
1343
1344 /* Set the i-th source character. */
1345 trans_pairs[2 * i] = MALLOC(src_lens[i] + 1, char);
1346 strncpy(trans_pairs[2 * i], src_buf, src_lens[i]);
1347 trans_pairs[2 * i][src_lens[i]] = '\0';
1348 src_buf += src_lens[i]; /* Forward to next character. */
1349
1350 /* Fetch the i-th destination character. */
1351 mbclen = MBRLEN (dest_buf + idx, dest_len - idx, &cur_stat);
1352 /* An invalid sequence, or a truncated multibyte character.
1353 We treat it as a singlebyte character. */
1354 if (mbclen == (size_t) -1 || mbclen == (size_t) -2
1355 || mbclen == 0)
1356 mbclen = 1;
1357
1358 /* Set the i-th destination character. */
1359 trans_pairs[2 * i + 1] = MALLOC(mbclen + 1, char);
1360 strncpy(trans_pairs[2 * i + 1], dest_buf + idx, mbclen);
1361 trans_pairs[2 * i + 1][mbclen] = '\0';
1362 idx += mbclen; /* Forward to next character. */
1363 }
1364 trans_pairs[2 * i] = NULL;
1365 if (idx != dest_len)
1366 bad_prog(_(Y_CMD_LEN));
1367 }
1368 else
1369 {
1370 unsigned char *translate =
1371 OB_MALLOC(&obs, YMAP_LENGTH, unsigned char);
1372 unsigned char *ustring = CAST(unsigned char *)src_buf;
1373
1374 if (len != dest_len)
1375 bad_prog(_(Y_CMD_LEN));
1376
1377 for (len = 0; len < YMAP_LENGTH; len++)
1378 translate[len] = len;
1379
1380 while (dest_len--)
1381 translate[*ustring++] = (unsigned char)*dest_buf++;
1382
1383 cur_cmd->x.translate = translate;
1384 }
1385
1386 if ((ch = in_nonblank()) != EOF && ch != '\n' && ch != ';')
1387 bad_prog(_(EXCESS_JUNK));
1388
1389 free_buffer(b);
1390 free_buffer(b2);
1391 }
1392 break;
1393
1394 case EOF:
1395 bad_prog(_(NO_COMMAND));
1396 /*NOTREACHED*/
1397
1398 default:
1399 bad_command (ch);
1400 /*NOTREACHED*/
1401 }
1402
1403 /* this is buried down here so that "continue" statements will miss it */
1404 ++vector->v_length;
1405 }
1406 return vector;
1407 }
1408
1409
1410 /* deal with \X escapes */
1411 size_t
normalize_text(buf,len,buftype)1412 normalize_text(buf, len, buftype)
1413 char *buf;
1414 size_t len;
1415 enum text_types buftype;
1416 {
1417 const char *bufend = buf + len;
1418 char *p = buf;
1419 char *q = buf;
1420
1421 /* This variable prevents normalizing text within bracket
1422 subexpressions when conforming to POSIX. If 0, we
1423 are not within a bracket expression. If -1, we are within a
1424 bracket expression but are not within [.FOO.], [=FOO=],
1425 or [:FOO:]. Otherwise, this is the '.', '=', or ':'
1426 respectively within these three types of subexpressions. */
1427 int bracket_state = 0;
1428
1429 int mbclen;
1430 mbstate_t cur_stat;
1431 memset(&cur_stat, 0, sizeof(mbstate_t));
1432
1433 while (p < bufend)
1434 {
1435 int c;
1436 mbclen = MBRLEN (p, bufend - p, &cur_stat);
1437 if (mbclen != 1)
1438 {
1439 /* An invalid sequence, or a truncated multibyte character.
1440 We treat it as a singlebyte character. */
1441 if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
1442 mbclen = 1;
1443
1444 memmove (q, p, mbclen);
1445 q += mbclen;
1446 p += mbclen;
1447 continue;
1448 }
1449
1450 if (*p == '\\' && p+1 < bufend && bracket_state == 0)
1451 switch ( (c = *++p) )
1452 {
1453 #if defined __STDC__ && __STDC__-0
1454 case 'a': *q++ = '\a'; p++; continue;
1455 #else /* Not STDC; we'll just assume ASCII */
1456 case 'a': *q++ = '\007'; p++; continue;
1457 #endif
1458 /* case 'b': *q++ = '\b'; p++; continue; --- conflicts with \b RE */
1459 case 'f': *q++ = '\f'; p++; continue;
1460 case '\n': /*fall through */
1461 case 'n': *q++ = '\n'; p++; continue;
1462 case 'r': *q++ = '\r'; p++; continue;
1463 case 't': *q++ = '\t'; p++; continue;
1464 case 'v': *q++ = '\v'; p++; continue;
1465
1466 case 'd': /* decimal byte */
1467 p = convert_number(q, p+1, bufend, 10, 3, 'd');
1468 q++;
1469 continue;
1470
1471 case 'x': /* hexadecimal byte */
1472 p = convert_number(q, p+1, bufend, 16, 2, 'x');
1473 q++;
1474 continue;
1475
1476 #ifdef REG_PERL
1477 case '0': case '1': case '2': case '3':
1478 case '4': case '5': case '6': case '7':
1479 if ((extended_regexp_flags & REG_PERL)
1480 && p+1 < bufend
1481 && p[1] >= '0' && p[1] <= '9')
1482 {
1483 p = convert_number(q, p, bufend, 8, 3, *p);
1484 q++;
1485 }
1486 else
1487 {
1488 /* we just pass the \ up one level for interpretation */
1489 if (buftype != TEXT_BUFFER)
1490 *q++ = '\\';
1491 }
1492
1493 continue;
1494
1495 case 'o': /* octal byte */
1496 if (!(extended_regexp_flags & REG_PERL))
1497 {
1498 p = convert_number(q, p+1, bufend, 8, 3, 'o');
1499 q++;
1500 }
1501 else
1502 {
1503 /* we just pass the \ up one level for interpretation */
1504 if (buftype != TEXT_BUFFER)
1505 *q++ = '\\';
1506 }
1507
1508 continue;
1509 #else
1510 case 'o': /* octal byte */
1511 p = convert_number(q, p+1, bufend, 8, 3, 'o');
1512 q++;
1513 continue;
1514 #endif
1515
1516 case 'c':
1517 if (++p < bufend)
1518 {
1519 *q++ = toupper(*p) ^ 0x40;
1520 p++;
1521 continue;
1522 }
1523 else
1524 {
1525 /* we just pass the \ up one level for interpretation */
1526 if (buftype != TEXT_BUFFER)
1527 *q++ = '\\';
1528 continue;
1529 }
1530
1531 default:
1532 /* we just pass the \ up one level for interpretation */
1533 if (buftype != TEXT_BUFFER)
1534 *q++ = '\\';
1535 break;
1536 }
1537 else if (buftype == TEXT_REGEX && posixicity != POSIXLY_EXTENDED)
1538 switch (*p)
1539 {
1540 case '[':
1541 if (!bracket_state)
1542 bracket_state = -1;
1543 break;
1544
1545 case ':':
1546 case '.':
1547 case '=':
1548 if (bracket_state == -1 && p[-1] == '[')
1549 bracket_state = *p;
1550 break;
1551
1552 case ']':
1553 if (bracket_state == 0)
1554 ;
1555 else if (bracket_state == -1)
1556 bracket_state = 0;
1557 else if (p[-2] != bracket_state && p[-1] == bracket_state)
1558 bracket_state = -1;
1559 break;
1560 }
1561
1562 *q++ = *p++;
1563 }
1564 return (size_t)(q - buf);
1565 }
1566
1567
1568 /* `str' is a string (from the command line) that contains a sed command.
1569 Compile the command, and add it to the end of `cur_program'. */
1570 struct vector *
compile_string(cur_program,str,len)1571 compile_string(cur_program, str, len)
1572 struct vector *cur_program;
1573 char *str;
1574 size_t len;
1575 {
1576 static countT string_expr_count = 0;
1577 struct vector *ret;
1578
1579 prog.file = NULL;
1580 prog.base = CAST(unsigned char *)str;
1581 prog.cur = prog.base;
1582 prog.end = prog.cur + len;
1583
1584 cur_input.line = 0;
1585 cur_input.name = NULL;
1586 cur_input.string_expr_count = ++string_expr_count;
1587
1588 ret = compile_program(cur_program);
1589 prog.base = NULL;
1590 prog.cur = NULL;
1591 prog.end = NULL;
1592
1593 first_script = false;
1594 return ret;
1595 }
1596
1597 /* `cmdfile' is the name of a file containing sed commands.
1598 Read them in and add them to the end of `cur_program'.
1599 */
1600 struct vector *
compile_file(cur_program,cmdfile)1601 compile_file(cur_program, cmdfile)
1602 struct vector *cur_program;
1603 const char *cmdfile;
1604 {
1605 struct vector *ret;
1606
1607 prog.file = stdin;
1608 if (cmdfile[0] != '-' || cmdfile[1] != '\0')
1609 prog.file = ck_fopen(cmdfile, "rt", true);
1610
1611 cur_input.line = 1;
1612 cur_input.name = cmdfile;
1613 cur_input.string_expr_count = 0;
1614
1615 ret = compile_program(cur_program);
1616 if (prog.file != stdin)
1617 ck_fclose(prog.file);
1618 prog.file = NULL;
1619
1620 first_script = false;
1621 return ret;
1622 }
1623
1624 /* Make any checks which require the whole program to have been read.
1625 In particular: this backpatches the jump targets.
1626 Any cleanup which can be done after these checks is done here also. */
1627 void
check_final_program(program)1628 check_final_program(program)
1629 struct vector *program;
1630 {
1631 struct sed_label *go;
1632 struct sed_label *lbl;
1633
1634 /* do all "{"s have a corresponding "}"? */
1635 if (blocks)
1636 {
1637 /* update info for error reporting: */
1638 MEMCPY(&cur_input, &blocks->err_info, sizeof (cur_input));
1639 bad_prog(_(EXCESS_OPEN_BRACE));
1640 }
1641
1642 /* was the final command an unterminated a/c/i command? */
1643 if (pending_text)
1644 {
1645 old_text_buf->text_length = size_buffer(pending_text);
1646 if (old_text_buf->text_length)
1647 old_text_buf->text = MEMDUP(get_buffer(pending_text),
1648 old_text_buf->text_length, char);
1649 free_buffer(pending_text);
1650 pending_text = NULL;
1651 }
1652
1653 for (go = jumps; go; go = release_label(go))
1654 {
1655 for (lbl = labels; lbl; lbl = lbl->next)
1656 if (strcmp(lbl->name, go->name) == 0)
1657 break;
1658 if (lbl)
1659 {
1660 program->v[go->v_index].x.jump_index = lbl->v_index;
1661 }
1662 else
1663 {
1664 if (*go->name)
1665 panic(_("can't find label for jump to `%s'"), go->name);
1666 program->v[go->v_index].x.jump_index = program->v_length;
1667 }
1668 }
1669 jumps = NULL;
1670
1671 for (lbl = labels; lbl; lbl = release_label(lbl))
1672 ;
1673 labels = NULL;
1674
1675 /* There is no longer a need to track file names: */
1676 {
1677 struct output *p;
1678
1679 for (p=file_read; p; p=p->link)
1680 if (p->name)
1681 {
1682 FREE(p->name);
1683 p->name = NULL;
1684 }
1685
1686 for (p=file_write; p; p=p->link)
1687 if (p->name)
1688 {
1689 FREE(p->name);
1690 p->name = NULL;
1691 }
1692 }
1693 }
1694
1695 /* Rewind all resources which were allocated in this module. */
1696 void
rewind_read_files()1697 rewind_read_files()
1698 {
1699 struct output *p;
1700
1701 for (p=file_read; p; p=p->link)
1702 if (p->fp)
1703 rewind(p->fp);
1704 }
1705
1706 /* Release all resources which were allocated in this module. */
1707 void
finish_program(program)1708 finish_program(program)
1709 struct vector *program;
1710 {
1711 /* close all files... */
1712 {
1713 struct output *p, *q;
1714
1715 for (p=file_read; p; p=q)
1716 {
1717 if (p->fp)
1718 ck_fclose(p->fp);
1719 q = p->link;
1720 #if 0
1721 /* We use obstacks. */
1722 FREE(p);
1723 #endif
1724 }
1725
1726 for (p=file_write; p; p=q)
1727 {
1728 if (p->fp)
1729 ck_fclose(p->fp);
1730 q = p->link;
1731 #if 0
1732 /* We use obstacks. */
1733 FREE(p);
1734 #endif
1735 }
1736 file_read = file_write = NULL;
1737 }
1738
1739 #ifdef DEBUG_LEAKS
1740 obstack_free (&obs, NULL);
1741 #endif /*DEBUG_LEAKS*/
1742 }
1743