1 /* GNU SED, a batch stream editor.
2 Copyright (C) 1989,90,91,92,93,94,95,98,99,2002,2003,2004,2005,2006,2008,2009
3 Free Software Foundation, Inc.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18
19 #undef EXPERIMENTAL_DASH_N_OPTIMIZATION /*don't use -- is very buggy*/
20 #define INITIAL_BUFFER_SIZE 50
21 #define FREAD_BUFFER_SIZE 8192
22
23 #include "sed.h"
24
25 #include <stddef.h>
26 #include <stdio.h>
27 #include <ctype.h>
28
29 #include <errno.h>
30 #ifndef errno
31 extern int errno;
32 #endif
33
34 #ifndef BOOTSTRAP
35 #include <selinux/selinux.h>
36 #include <selinux/context.h>
37 #endif
38
39 #ifdef HAVE_UNISTD_H
40 # include <unistd.h>
41 #endif
42
43 #ifndef BOOTSTRAP
44 #include "acl.h"
45 #endif
46
47 #ifdef __GNUC__
48 # if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__-0 >= 7)
49 /* silence warning about unused parameter even for "gcc -W -Wunused" */
50 # define UNUSED __attribute__((unused))
51 # endif
52 #endif
53 #ifndef UNUSED
54 # define UNUSED
55 #endif
56
57 #ifdef HAVE_STRINGS_H
58 # include <strings.h>
59 #else
60 # include <string.h>
61 #endif /*HAVE_STRINGS_H*/
62 #ifdef HAVE_MEMORY_H
63 # include <memory.h>
64 #endif
65
66 #ifndef HAVE_STRCHR
67 # define strchr index
68 # define strrchr rindex
69 #endif
70
71 #ifdef HAVE_STDLIB_H
72 # include <stdlib.h>
73 #endif
74 #ifndef EXIT_SUCCESS
75 # define EXIT_SUCCESS 0
76 #endif
77
78 #ifdef HAVE_SYS_TYPES_H
79 # include <sys/types.h>
80 #endif
81
82 #include <sys/stat.h>
83 #include "stat-macros.h"
84
85
86 /* Sed operates a line at a time. */
87 struct line {
88 char *text; /* Pointer to line allocated by malloc. */
89 char *active; /* Pointer to non-consumed part of text. */
90 size_t length; /* Length of text (or active, if used). */
91 size_t alloc; /* Allocated space for active. */
92 bool chomped; /* Was a trailing newline dropped? */
93 #ifdef HAVE_MBRTOWC
94 mbstate_t mbstate;
95 #endif
96 };
97
98 #ifdef HAVE_MBRTOWC
99 #define SIZEOF_LINE offsetof (struct line, mbstate)
100 #else
101 #define SIZEOF_LINE (sizeof (struct line))
102 #endif
103
104 /* A queue of text to write out at the end of a cycle
105 (filled by the "a", "r" and "R" commands.) */
106 struct append_queue {
107 const char *fname;
108 char *text;
109 size_t textlen;
110 struct append_queue *next;
111 bool free;
112 };
113
114 /* State information for the input stream. */
115 struct input {
116 /* The list of yet-to-be-opened files. It is invalid for file_list
117 to be NULL. When *file_list is NULL we are currently processing
118 the last file. */
119
120 char **file_list;
121
122 /* Count of files we failed to open. */
123 countT bad_count;
124
125 /* Current input line number (over all files). */
126 countT line_number;
127
128 /* True if we'll reset line numbers and addresses before
129 starting to process the next (possibly the first) file. */
130 bool reset_at_next_file;
131
132 /* Function to read one line. If FP is NULL, read_fn better not
133 be one which uses fp; in particular, read_always_fail() is
134 recommended. */
135 bool (*read_fn) P_((struct input *)); /* read one line */
136
137 char *out_file_name;
138
139 const char *in_file_name;
140
141 /* Owner and mode to be set just before closing the file. */
142 struct stat st;
143
144 /* if NULL, none of the following are valid */
145 FILE *fp;
146
147 bool no_buffering;
148 };
149
150
151 /* Have we done any replacements lately? This is used by the `t' command. */
152 static bool replaced = false;
153
154 /* The current output file (stdout if -i is not being used. */
155 static struct output output_file;
156
157 /* The `current' input line. */
158 static struct line line;
159
160 /* An input line used to accumulate the result of the s and e commands. */
161 static struct line s_accum;
162
163 /* An input line that's been stored by later use by the program */
164 static struct line hold;
165
166 /* The buffered input look-ahead. The only field that should be
167 used outside of read_mem_line() or line_init() is buffer.length. */
168 static struct line buffer;
169
170 static struct append_queue *append_head = NULL;
171 static struct append_queue *append_tail = NULL;
172
173
174 #ifdef BOOTSTRAP
175 /* We can't be sure that the system we're boostrapping on has
176 memchr(), and ../lib/memchr.c requires configuration knowledge
177 about how many bits are in a `long'. This implementation
178 is far from ideal, but it should get us up-and-limping well
179 enough to run the configure script, which is all that matters.
180 */
181 # ifdef memchr
182 # undef memchr
183 # endif
184 # define memchr bootstrap_memchr
185
186 static VOID *bootstrap_memchr P_((const VOID *s, int c, size_t n));
187 static VOID *
bootstrap_memchr(s,c,n)188 bootstrap_memchr(s, c, n)
189 const VOID *s;
190 int c;
191 size_t n;
192 {
193 char *p;
194
195 for (p=(char *)s; n-- > 0; ++p)
196 if (*p == c)
197 return p;
198 return CAST(VOID *)0;
199 }
200 #endif /*BOOTSTRAP*/
201
202 /* increase a struct line's length, making some attempt at
203 keeping realloc() calls under control by padding for future growth. */
204 static void resize_line P_((struct line *, size_t));
205 static void
resize_line(lb,len)206 resize_line(lb, len)
207 struct line *lb;
208 size_t len;
209 {
210 int inactive;
211 inactive = lb->active - lb->text;
212
213 /* If the inactive part has got to more than two thirds of the buffer,
214 * remove it. */
215 if (inactive > lb->alloc * 2)
216 {
217 MEMMOVE(lb->text, lb->active, lb->length);
218 lb->alloc += lb->active - lb->text;
219 lb->active = lb->text;
220 inactive = 0;
221
222 if (lb->alloc > len)
223 return;
224 }
225
226 lb->alloc *= 2;
227 if (lb->alloc < len)
228 lb->alloc = len;
229 if (lb->alloc < INITIAL_BUFFER_SIZE)
230 lb->alloc = INITIAL_BUFFER_SIZE;
231
232 lb->text = REALLOC(lb->text, inactive + lb->alloc, char);
233 lb->active = lb->text + inactive;
234 }
235
236 /* Append `length' bytes from `string' to the line `to'. */
237 static void str_append P_((struct line *, const char *, size_t));
238 static void
str_append(to,string,length)239 str_append(to, string, length)
240 struct line *to;
241 const char *string;
242 size_t length;
243 {
244 size_t new_length = to->length + length;
245
246 if (to->alloc < new_length)
247 resize_line(to, new_length);
248 MEMCPY(to->active + to->length, string, length);
249 to->length = new_length;
250
251 #ifdef HAVE_MBRTOWC
252 if (mb_cur_max > 1 && !is_utf8)
253 while (length)
254 {
255 size_t n = MBRLEN (string, length, &to->mbstate);
256
257 /* An invalid sequence is treated like a singlebyte character. */
258 if (n == (size_t) -1)
259 {
260 memset (&to->mbstate, 0, sizeof (to->mbstate));
261 n = 1;
262 }
263
264 if (n > 0)
265 {
266 string += n;
267 length -= n;
268 }
269 else
270 break;
271 }
272 #endif
273 }
274
275 static void str_append_modified P_((struct line *, const char *, size_t,
276 enum replacement_types));
277 static void
str_append_modified(to,string,length,type)278 str_append_modified(to, string, length, type)
279 struct line *to;
280 const char *string;
281 size_t length;
282 enum replacement_types type;
283 {
284 #ifdef HAVE_MBRTOWC
285 mbstate_t from_stat;
286
287 if (type == REPL_ASIS)
288 {
289 str_append(to, string, length);
290 return;
291 }
292
293 if (to->alloc - to->length < length * mb_cur_max)
294 resize_line(to, to->length + length * mb_cur_max);
295
296 MEMCPY (&from_stat, &to->mbstate, sizeof(mbstate_t));
297 while (length)
298 {
299 wchar_t wc;
300 int n = MBRTOWC (&wc, string, length, &from_stat);
301
302 /* An invalid sequence is treated like a singlebyte character. */
303 if (n == -1)
304 {
305 memset (&to->mbstate, 0, sizeof (from_stat));
306 n = 1;
307 }
308
309 if (n > 0)
310 string += n, length -= n;
311 else
312 {
313 /* Incomplete sequence, copy it manually. */
314 str_append(to, string, length);
315 return;
316 }
317
318 /* Convert the first character specially... */
319 if (type & (REPL_UPPERCASE_FIRST | REPL_LOWERCASE_FIRST))
320 {
321 if (type & REPL_UPPERCASE_FIRST)
322 wc = towupper(wc);
323 else
324 wc = towlower(wc);
325
326 type &= ~(REPL_LOWERCASE_FIRST | REPL_UPPERCASE_FIRST);
327 if (type == REPL_ASIS)
328 {
329 n = WCRTOMB (to->active + to->length, wc, &to->mbstate);
330 to->length += n;
331 str_append(to, string, length);
332 return;
333 }
334 }
335
336 else if (type & REPL_UPPERCASE)
337 wc = towupper(wc);
338 else
339 wc = towlower(wc);
340
341 /* Copy the new wide character to the end of the string. */
342 n = WCRTOMB (to->active + to->length, wc, &to->mbstate);
343 to->length += n;
344 if (n == -1)
345 {
346 fprintf (stderr, "Case conversion produced an invalid character!");
347 abort ();
348 }
349 }
350 #else
351 size_t old_length = to->length;
352 char *start, *end;
353
354 str_append(to, string, length);
355 start = to->active + old_length;
356 end = start + length;
357
358 /* Now do the required modifications. First \[lu]... */
359 if (type & REPL_UPPERCASE_FIRST)
360 {
361 *start = toupper(*start);
362 start++;
363 type &= ~REPL_UPPERCASE_FIRST;
364 }
365 else if (type & REPL_LOWERCASE_FIRST)
366 {
367 *start = tolower(*start);
368 start++;
369 type &= ~REPL_LOWERCASE_FIRST;
370 }
371
372 if (type == REPL_ASIS)
373 return;
374
375 /* ...and then \[LU] */
376 if (type == REPL_UPPERCASE)
377 for (; start != end; start++)
378 *start = toupper(*start);
379 else
380 for (; start != end; start++)
381 *start = tolower(*start);
382 #endif
383 }
384
385 /* Initialize a "struct line" buffer. Copy multibyte state from `state'
386 if not null. */
387 static void line_init P_((struct line *, struct line *, size_t initial_size));
388 static void
line_init(buf,state,initial_size)389 line_init(buf, state, initial_size)
390 struct line *buf;
391 struct line *state;
392 size_t initial_size;
393 {
394 buf->text = MALLOC(initial_size, char);
395 buf->active = buf->text;
396 buf->alloc = initial_size;
397 buf->length = 0;
398 buf->chomped = true;
399
400 #ifdef HAVE_MBRTOWC
401 if (state)
402 memcpy (&buf->mbstate, &state->mbstate, sizeof (buf->mbstate));
403 else
404 memset (&buf->mbstate, 0, sizeof (buf->mbstate));
405 #endif
406 }
407
408 /* Reset a "struct line" buffer to length zero. Copy multibyte state from
409 `state' if not null. */
410 static void line_reset P_((struct line *, struct line *));
411 static void
line_reset(buf,state)412 line_reset(buf, state)
413 struct line *buf, *state;
414 {
415 if (buf->alloc == 0)
416 line_init(buf, state, INITIAL_BUFFER_SIZE);
417 else
418 {
419 buf->length = 0;
420 #ifdef HAVE_MBRTOWC
421 if (state)
422 memcpy (&buf->mbstate, &state->mbstate, sizeof (buf->mbstate));
423 else
424 memset (&buf->mbstate, 0, sizeof (buf->mbstate));
425 #endif
426 }
427 }
428
429 /* Copy the contents of the line `from' into the line `to'.
430 This destroys the old contents of `to'.
431 Copy the multibyte state if `state' is true. */
432 static void line_copy P_((struct line *from, struct line *to, int state));
433 static void
line_copy(from,to,state)434 line_copy(from, to, state)
435 struct line *from;
436 struct line *to;
437 int state;
438 {
439 /* Remove the inactive portion in the destination buffer. */
440 to->alloc += to->active - to->text;
441
442 if (to->alloc < from->length)
443 {
444 to->alloc *= 2;
445 if (to->alloc < from->length)
446 to->alloc = from->length;
447 if (to->alloc < INITIAL_BUFFER_SIZE)
448 to->alloc = INITIAL_BUFFER_SIZE;
449 /* Use FREE()+MALLOC() instead of REALLOC() to
450 avoid unnecessary copying of old text. */
451 FREE(to->text);
452 to->text = MALLOC(to->alloc, char);
453 }
454
455 to->active = to->text;
456 to->length = from->length;
457 to->chomped = from->chomped;
458 MEMCPY(to->active, from->active, from->length);
459
460 #ifdef HAVE_MBRTOWC
461 if (state)
462 MEMCPY(&to->mbstate, &from->mbstate, sizeof (from->mbstate));
463 #endif
464 }
465
466 /* Append the contents of the line `from' to the line `to'.
467 Copy the multibyte state if `state' is true. */
468 static void line_append P_((struct line *from, struct line *to, int state));
469 static void
line_append(from,to,state)470 line_append(from, to, state)
471 struct line *from;
472 struct line *to;
473 int state;
474 {
475 str_append(to, "\n", 1);
476 str_append(to, from->active, from->length);
477 to->chomped = from->chomped;
478
479 #ifdef HAVE_MBRTOWC
480 if (state)
481 MEMCPY (&to->mbstate, &from->mbstate, sizeof (from->mbstate));
482 #endif
483 }
484
485 /* Exchange two "struct line" buffers.
486 Copy the multibyte state if `state' is true. */
487 static void line_exchange P_((struct line *a, struct line *b, int state));
488 static void
line_exchange(a,b,state)489 line_exchange(a, b, state)
490 struct line *a;
491 struct line *b;
492 int state;
493 {
494 struct line t;
495
496 if (state)
497 {
498 MEMCPY(&t, a, sizeof (struct line));
499 MEMCPY( a, b, sizeof (struct line));
500 MEMCPY( b, &t, sizeof (struct line));
501 }
502 else
503 {
504 MEMCPY(&t, a, SIZEOF_LINE);
505 MEMCPY( a, b, SIZEOF_LINE);
506 MEMCPY( b, &t, SIZEOF_LINE);
507 }
508 }
509
510
511 /* dummy function to simplify read_pattern_space() */
512 static bool read_always_fail P_((struct input *));
513 static bool
read_always_fail(input)514 read_always_fail(input)
515 struct input *input UNUSED;
516 {
517 return false;
518 }
519
520 static bool read_file_line P_((struct input *));
521 static bool
read_file_line(input)522 read_file_line(input)
523 struct input *input;
524 {
525 static char *b;
526 static size_t blen;
527
528 long result = ck_getline (&b, &blen, input->fp);
529 if (result <= 0)
530 return false;
531
532 /* Remove the trailing new-line that is left by getline. */
533 if (b[result - 1] == '\n')
534 --result;
535 else
536 line.chomped = false;
537
538 str_append(&line, b, result);
539 return true;
540 }
541
542
543 static inline void output_missing_newline P_((struct output *));
544 static inline void
output_missing_newline(outf)545 output_missing_newline(outf)
546 struct output *outf;
547 {
548 if (outf->missing_newline)
549 {
550 ck_fwrite("\n", 1, 1, outf->fp);
551 outf->missing_newline = false;
552 }
553 }
554
555 static inline void flush_output P_((FILE *));
556 static inline void
flush_output(fp)557 flush_output(fp)
558 FILE *fp;
559 {
560 if (fp != stdout || unbuffered_output)
561 ck_fflush(fp);
562 }
563
564 static void output_line P_((const char *, size_t, int, struct output *));
565 static void
output_line(text,length,nl,outf)566 output_line(text, length, nl, outf)
567 const char *text;
568 size_t length;
569 int nl;
570 struct output *outf;
571 {
572 if (!text)
573 return;
574
575 output_missing_newline(outf);
576 if (length)
577 ck_fwrite(text, 1, length, outf->fp);
578 if (nl)
579 ck_fwrite("\n", 1, 1, outf->fp);
580 else
581 outf->missing_newline = true;
582
583 flush_output(outf->fp);
584 }
585
586 static struct append_queue *next_append_slot P_((void));
587 static struct append_queue *
next_append_slot()588 next_append_slot()
589 {
590 struct append_queue *n = MALLOC(1, struct append_queue);
591
592 n->fname = NULL;
593 n->text = NULL;
594 n->textlen = 0;
595 n->next = NULL;
596 n->free = false;
597
598 if (append_tail)
599 append_tail->next = n;
600 else
601 append_head = n;
602 return append_tail = n;
603 }
604
605 static void release_append_queue P_((void));
606 static void
release_append_queue()607 release_append_queue()
608 {
609 struct append_queue *p, *q;
610
611 for (p=append_head; p; p=q)
612 {
613 if (p->free)
614 FREE(p->text);
615
616 q = p->next;
617 FREE(p);
618 }
619 append_head = append_tail = NULL;
620 }
621
622 static void dump_append_queue P_((void));
623 static void
dump_append_queue()624 dump_append_queue()
625 {
626 struct append_queue *p;
627
628 output_missing_newline(&output_file);
629 for (p=append_head; p; p=p->next)
630 {
631 if (p->text)
632 ck_fwrite(p->text, 1, p->textlen, output_file.fp);
633
634 if (p->fname)
635 {
636 char buf[FREAD_BUFFER_SIZE];
637 size_t cnt;
638 FILE *fp;
639
640 /* "If _fname_ does not exist or cannot be read, it shall
641 be treated as if it were an empty file, causing no error
642 condition." IEEE Std 1003.2-1992
643 So, don't fail. */
644 fp = ck_fopen(p->fname, read_mode, false);
645 if (fp)
646 {
647 while ((cnt = ck_fread(buf, 1, sizeof buf, fp)) > 0)
648 ck_fwrite(buf, 1, cnt, output_file.fp);
649 ck_fclose(fp);
650 }
651 }
652 }
653
654 flush_output(output_file.fp);
655 release_append_queue();
656 }
657
658
659 /* Compute the name of the backup file for in-place editing */
660 static char *get_backup_file_name P_((const char *));
661 static char *
get_backup_file_name(name)662 get_backup_file_name(name)
663 const char *name;
664 {
665 char *old_asterisk, *asterisk, *backup, *p;
666 int name_length = strlen(name), backup_length = strlen(in_place_extension);
667
668 /* Compute the length of the backup file */
669 for (asterisk = in_place_extension - 1, old_asterisk = asterisk + 1;
670 (asterisk = strchr(old_asterisk, '*'));
671 old_asterisk = asterisk + 1)
672 backup_length += name_length - 1;
673
674 p = backup = xmalloc(backup_length + 1);
675
676 /* Each iteration gobbles up to an asterisk */
677 for (asterisk = in_place_extension - 1, old_asterisk = asterisk + 1;
678 (asterisk = strchr(old_asterisk, '*'));
679 old_asterisk = asterisk + 1)
680 {
681 MEMCPY (p, old_asterisk, asterisk - old_asterisk);
682 p += asterisk - old_asterisk;
683 strcpy (p, name);
684 p += name_length;
685 }
686
687 /* Tack on what's after the last asterisk */
688 strcpy (p, old_asterisk);
689 return backup;
690 }
691
692 /* Initialize a struct input for the named file. */
693 static void open_next_file P_((const char *name, struct input *));
694 static void
open_next_file(name,input)695 open_next_file(name, input)
696 const char *name;
697 struct input *input;
698 {
699 buffer.length = 0;
700
701 if (name[0] == '-' && name[1] == '\0' && !in_place_extension)
702 {
703 clearerr(stdin); /* clear any stale EOF indication */
704 input->fp = ck_fdopen (fileno (stdin), "stdin", read_mode, false);
705 }
706 else if ( ! (input->fp = ck_fopen(name, read_mode, false)) )
707 {
708 const char *ptr = strerror(errno);
709 fprintf(stderr, _("%s: can't read %s: %s\n"), myname, name, ptr);
710 input->read_fn = read_always_fail; /* a redundancy */
711 ++input->bad_count;
712 return;
713 }
714
715 input->read_fn = read_file_line;
716
717 if (in_place_extension)
718 {
719 int input_fd;
720 char *tmpdir, *p;
721 #ifndef BOOTSTRAP
722 security_context_t old_fscreatecon;
723 int reset_fscreatecon = 0;
724 memset (&old_fscreatecon, 0, sizeof (old_fscreatecon));
725 #endif
726
727 if (follow_symlinks)
728 input->in_file_name = follow_symlink (name);
729 else
730 input->in_file_name = name;
731
732 /* get the base name */
733 tmpdir = ck_strdup(input->in_file_name);
734 if ((p = strrchr(tmpdir, '/')))
735 *p = 0;
736 else
737 strcpy(tmpdir, ".");
738
739 if (isatty (fileno (input->fp)))
740 panic(_("couldn't edit %s: is a terminal"), input->in_file_name);
741
742 input_fd = fileno (input->fp);
743 fstat (input_fd, &input->st);
744 if (!S_ISREG (input->st.st_mode))
745 panic(_("couldn't edit %s: not a regular file"), input->in_file_name);
746
747 #ifndef BOOTSTRAP
748 if (is_selinux_enabled ())
749 {
750 security_context_t con;
751 if (getfilecon (input->in_file_name, &con) != -1)
752 {
753 /* Save and restore the old context for the sake of w and W
754 commands. */
755 reset_fscreatecon = getfscreatecon (&old_fscreatecon) >= 0;
756 if (setfscreatecon (con) < 0)
757 fprintf (stderr, _("%s: warning: failed to set default file creation context to %s: %s"),
758 myname, con, strerror (errno));
759 freecon (con);
760 }
761 else
762 {
763 if (errno != ENOSYS)
764 fprintf (stderr, _("%s: warning: failed to get security context of %s: %s"),
765 myname, input->in_file_name, strerror (errno));
766 }
767 }
768 #endif
769
770 output_file.fp = ck_mkstemp (&input->out_file_name, tmpdir, "sed");
771 output_file.missing_newline = false;
772 free (tmpdir);
773
774 #ifndef BOOTSTRAP
775 if (reset_fscreatecon)
776 {
777 setfscreatecon (old_fscreatecon);
778 freecon (old_fscreatecon);
779 }
780 #endif
781
782 if (!output_file.fp)
783 panic(_("couldn't open temporary file %s: %s"), input->out_file_name, strerror(errno));
784 }
785 else
786 output_file.fp = stdout;
787 }
788
789
790 /* Clean up an input stream that we are done with. */
791 static void closedown P_((struct input *));
792 static void
closedown(input)793 closedown(input)
794 struct input *input;
795 {
796 input->read_fn = read_always_fail;
797 if (!input->fp)
798 return;
799
800 if (in_place_extension && output_file.fp != NULL)
801 {
802 const char *target_name;
803 int input_fd, output_fd;
804
805 target_name = input->in_file_name;
806 input_fd = fileno (input->fp);
807 output_fd = fileno (output_file.fp);
808 copy_acl (input->in_file_name, input_fd,
809 input->out_file_name, output_fd,
810 input->st.st_mode);
811 #ifdef HAVE_FCHOWN
812 if (fchown (output_fd, input->st.st_uid, input->st.st_gid) == -1)
813 fchown (output_fd, -1, input->st.st_gid);
814 #endif
815
816 ck_fclose (input->fp);
817 ck_fclose (output_file.fp);
818 if (strcmp(in_place_extension, "*") != 0)
819 {
820 char *backup_file_name = get_backup_file_name(target_name);
821 ck_rename (target_name, backup_file_name, input->out_file_name);
822 free (backup_file_name);
823 }
824
825 ck_rename (input->out_file_name, target_name, input->out_file_name);
826 free (input->out_file_name);
827 }
828 else
829 ck_fclose (input->fp);
830
831 input->fp = NULL;
832 }
833
834 /* Reset range commands so that they are marked as non-matching */
835 static void reset_addresses P_((struct vector *));
836 static void
reset_addresses(vec)837 reset_addresses(vec)
838 struct vector *vec;
839 {
840 struct sed_cmd *cur_cmd;
841 int n;
842
843 for (cur_cmd = vec->v, n = vec->v_length; n--; cur_cmd++)
844 if (cur_cmd->a1
845 && cur_cmd->a1->addr_type == ADDR_IS_NUM
846 && cur_cmd->a1->addr_number == 0)
847 cur_cmd->range_state = RANGE_ACTIVE;
848 else
849 cur_cmd->range_state = RANGE_INACTIVE;
850 }
851
852 /* Read in the next line of input, and store it in the pattern space.
853 Return zero if there is nothing left to input. */
854 static bool read_pattern_space P_((struct input *, struct vector *, int));
855 static bool
read_pattern_space(input,the_program,append)856 read_pattern_space(input, the_program, append)
857 struct input *input;
858 struct vector *the_program;
859 int append;
860 {
861 if (append_head) /* redundant test to optimize for common case */
862 dump_append_queue();
863 replaced = false;
864 if (!append)
865 line.length = 0;
866 line.chomped = true; /* default, until proved otherwise */
867
868 while ( ! (*input->read_fn)(input) )
869 {
870 closedown(input);
871
872 if (!*input->file_list)
873 return false;
874
875 if (input->reset_at_next_file)
876 {
877 input->line_number = 0;
878 hold.length = 0;
879 reset_addresses (the_program);
880 rewind_read_files ();
881
882 /* If doing in-place editing, we will never append the
883 new-line to this file; but if the output goes to stdout,
884 we might still have to output the missing new-line. */
885 if (in_place_extension)
886 output_file.missing_newline = false;
887
888 input->reset_at_next_file = separate_files;
889 }
890
891 open_next_file (*input->file_list++, input);
892 }
893
894 ++input->line_number;
895 return true;
896 }
897
898
899 static bool last_file_with_data_p P_((struct input *));
900 static bool
last_file_with_data_p(input)901 last_file_with_data_p(input)
902 struct input *input;
903 {
904 for (;;)
905 {
906 int ch;
907
908 closedown(input);
909 if (!*input->file_list)
910 return true;
911 open_next_file(*input->file_list++, input);
912 if (input->fp)
913 {
914 if ((ch = getc(input->fp)) != EOF)
915 {
916 ungetc(ch, input->fp);
917 return false;
918 }
919 }
920 }
921 }
922
923 /* Determine if we match the `$' address. */
924 static bool test_eof P_((struct input *));
925 static bool
test_eof(input)926 test_eof(input)
927 struct input *input;
928 {
929 int ch;
930
931 if (buffer.length)
932 return false;
933 if (!input->fp)
934 return separate_files || last_file_with_data_p(input);
935 if (feof(input->fp))
936 return separate_files || last_file_with_data_p(input);
937 if ((ch = getc(input->fp)) == EOF)
938 return separate_files || last_file_with_data_p(input);
939 ungetc(ch, input->fp);
940 return false;
941 }
942
943 /* Return non-zero if the current line matches the address
944 pointed to by `addr'. */
945 static bool match_an_address_p P_((struct addr *, struct input *));
946 static bool
match_an_address_p(addr,input)947 match_an_address_p(addr, input)
948 struct addr *addr;
949 struct input *input;
950 {
951 switch (addr->addr_type)
952 {
953 case ADDR_IS_NULL:
954 return true;
955
956 case ADDR_IS_REGEX:
957 return match_regex(addr->addr_regex, line.active, line.length, 0, NULL, 0);
958
959 case ADDR_IS_NUM_MOD:
960 return (input->line_number >= addr->addr_number
961 && ((input->line_number - addr->addr_number) % addr->addr_step) == 0);
962
963 case ADDR_IS_STEP:
964 case ADDR_IS_STEP_MOD:
965 /* reminder: these are only meaningful for a2 addresses */
966 /* a2->addr_number needs to be recomputed each time a1 address
967 matches for the step and step_mod types */
968 return (addr->addr_number <= input->line_number);
969
970 case ADDR_IS_LAST:
971 return test_eof(input);
972
973 /* ADDR_IS_NUM is handled in match_address_p. */
974 case ADDR_IS_NUM:
975 default:
976 panic("INTERNAL ERROR: bad address type");
977 }
978 /*NOTREACHED*/
979 return false;
980 }
981
982 /* return non-zero if current address is valid for cmd */
983 static bool match_address_p P_((struct sed_cmd *, struct input *));
984 static bool
match_address_p(cmd,input)985 match_address_p(cmd, input)
986 struct sed_cmd *cmd;
987 struct input *input;
988 {
989 if (!cmd->a1)
990 return true;
991
992 if (cmd->range_state != RANGE_ACTIVE)
993 {
994 /* Find if we are going to activate a range. Handle ADDR_IS_NUM
995 specially: it represent an "absolute" state, it should not
996 be computed like regexes. */
997 if (cmd->a1->addr_type == ADDR_IS_NUM)
998 {
999 if (!cmd->a2)
1000 return (input->line_number == cmd->a1->addr_number);
1001
1002 if (cmd->range_state == RANGE_CLOSED
1003 || input->line_number < cmd->a1->addr_number)
1004 return false;
1005 }
1006 else
1007 {
1008 if (!cmd->a2)
1009 return match_an_address_p(cmd->a1, input);
1010
1011 if (!match_an_address_p(cmd->a1, input))
1012 return false;
1013 }
1014
1015 /* Ok, start a new range. */
1016 cmd->range_state = RANGE_ACTIVE;
1017 switch (cmd->a2->addr_type)
1018 {
1019 case ADDR_IS_REGEX:
1020 /* Always include at least two lines. */
1021 return true;
1022 case ADDR_IS_NUM:
1023 /* Same handling as below, but always include at least one line. */
1024 if (input->line_number >= cmd->a2->addr_number)
1025 cmd->range_state = RANGE_CLOSED;
1026 return true;
1027 case ADDR_IS_STEP:
1028 cmd->a2->addr_number = input->line_number + cmd->a2->addr_step;
1029 return true;
1030 case ADDR_IS_STEP_MOD:
1031 cmd->a2->addr_number = input->line_number + cmd->a2->addr_step
1032 - (input->line_number%cmd->a2->addr_step);
1033 return true;
1034 default:
1035 break;
1036 }
1037 }
1038
1039 /* cmd->range_state == RANGE_ACTIVE. Check if the range is
1040 ending; also handle ADDR_IS_NUM specially in this case. */
1041
1042 if (cmd->a2->addr_type == ADDR_IS_NUM)
1043 {
1044 /* If the second address is a line number, and if we got past
1045 that line, fail to match (it can happen when you jump
1046 over such addresses with `b' and `t'. Use RANGE_CLOSED
1047 so that the range is not re-enabled anymore. */
1048 if (input->line_number >= cmd->a2->addr_number)
1049 cmd->range_state = RANGE_CLOSED;
1050
1051 return (input->line_number <= cmd->a2->addr_number);
1052 }
1053
1054 /* Other addresses are treated as usual. */
1055 if (match_an_address_p(cmd->a2, input))
1056 cmd->range_state = RANGE_CLOSED;
1057
1058 return true;
1059 }
1060
1061
1062 static void do_list P_((int line_len));
1063 static void
do_list(line_len)1064 do_list(line_len)
1065 int line_len;
1066 {
1067 unsigned char *p = CAST(unsigned char *)line.active;
1068 countT len = line.length;
1069 countT width = 0;
1070 char obuf[180]; /* just in case we encounter a 512-bit char (;-) */
1071 char *o;
1072 size_t olen;
1073 FILE *fp = output_file.fp;
1074
1075 output_missing_newline(&output_file);
1076 for (; len--; ++p) {
1077 o = obuf;
1078
1079 /* Some locales define 8-bit characters as printable. This makes the
1080 testsuite fail at 8to7.sed because the `l' command in fact will not
1081 convert the 8-bit characters. */
1082 #if defined isascii || defined HAVE_ISASCII
1083 if (isascii(*p) && ISPRINT(*p)) {
1084 #else
1085 if (ISPRINT(*p)) {
1086 #endif
1087 *o++ = *p;
1088 if (*p == '\\')
1089 *o++ = '\\';
1090 } else {
1091 *o++ = '\\';
1092 switch (*p) {
1093 #if defined __STDC__ && __STDC__-0
1094 case '\a': *o++ = 'a'; break;
1095 #else /* Not STDC; we'll just assume ASCII */
1096 case 007: *o++ = 'a'; break;
1097 #endif
1098 case '\b': *o++ = 'b'; break;
1099 case '\f': *o++ = 'f'; break;
1100 case '\n': *o++ = 'n'; break;
1101 case '\r': *o++ = 'r'; break;
1102 case '\t': *o++ = 't'; break;
1103 case '\v': *o++ = 'v'; break;
1104 default:
1105 sprintf(o, "%03o", *p);
1106 o += strlen(o);
1107 break;
1108 }
1109 }
1110 olen = o - obuf;
1111 if (width+olen >= line_len && line_len > 0) {
1112 ck_fwrite("\\\n", 1, 2, fp);
1113 width = 0;
1114 }
1115 ck_fwrite(obuf, 1, olen, fp);
1116 width += olen;
1117 }
1118 ck_fwrite("$\n", 1, 2, fp);
1119 flush_output (fp);
1120 }
1121
1122
1123 static enum replacement_types append_replacement P_((struct line *, struct replacement *,
1124 struct re_registers *,
1125 enum replacement_types));
1126 static enum replacement_types
append_replacement(buf,p,regs,repl_mod)1127 append_replacement (buf, p, regs, repl_mod)
1128 struct line *buf;
1129 struct replacement *p;
1130 struct re_registers *regs;
1131 enum replacement_types repl_mod;
1132 {
1133 for (; p; p=p->next)
1134 {
1135 int i = p->subst_id;
1136 enum replacement_types curr_type;
1137
1138 /* Apply a \[lu] modifier that was given earlier, but which we
1139 have not had yet the occasion to apply. But don't do it
1140 if this replacement has a modifier of its own. */
1141 curr_type = (p->repl_type & REPL_MODIFIERS)
1142 ? p->repl_type
1143 : p->repl_type | repl_mod;
1144
1145 repl_mod = 0;
1146 if (p->prefix_length)
1147 {
1148 str_append_modified(buf, p->prefix, p->prefix_length,
1149 curr_type);
1150 curr_type &= ~REPL_MODIFIERS;
1151 }
1152
1153 if (0 <= i)
1154 {
1155 if (regs->end[i] == regs->start[i] && p->repl_type & REPL_MODIFIERS)
1156 /* Save this modifier, we shall apply it later.
1157 e.g. in s/()([a-z])/\u\1\2/
1158 the \u modifier is applied to \2, not \1 */
1159 repl_mod = curr_type & REPL_MODIFIERS;
1160
1161 else if (regs->end[i] != regs->start[i])
1162 str_append_modified(buf, line.active + regs->start[i],
1163 CAST(size_t)(regs->end[i] - regs->start[i]),
1164 curr_type);
1165 }
1166 }
1167
1168 return repl_mod;
1169 }
1170
1171 static void do_subst P_((struct subst *));
1172 static void
do_subst(sub)1173 do_subst(sub)
1174 struct subst *sub;
1175 {
1176 size_t start = 0; /* where to start scan for (next) match in LINE */
1177 size_t last_end = 0; /* where did the last successful match end in LINE */
1178 countT count = 0; /* number of matches found */
1179 bool again = true;
1180
1181 static struct re_registers regs;
1182
1183 line_reset(&s_accum, &line);
1184
1185 /* The first part of the loop optimizes s/xxx// when xxx is at the
1186 start, and s/xxx$// */
1187 if (!match_regex(sub->regx, line.active, line.length, start,
1188 ®s, sub->max_id + 1))
1189 return;
1190
1191 if (!sub->replacement && sub->numb <= 1)
1192 {
1193 if (regs.start[0] == 0 && !sub->global)
1194 {
1195 /* We found a match, set the `replaced' flag. */
1196 replaced = true;
1197
1198 line.active += regs.end[0];
1199 line.length -= regs.end[0];
1200 line.alloc -= regs.end[0];
1201 goto post_subst;
1202 }
1203 else if (regs.end[0] == line.length)
1204 {
1205 /* We found a match, set the `replaced' flag. */
1206 replaced = true;
1207
1208 line.length = regs.start[0];
1209 goto post_subst;
1210 }
1211 }
1212
1213 do
1214 {
1215 enum replacement_types repl_mod = 0;
1216
1217 size_t offset = regs.start[0];
1218 size_t matched = regs.end[0] - regs.start[0];
1219
1220 /* Copy stuff to the left of this match into the output string. */
1221 if (start < offset)
1222 str_append(&s_accum, line.active + start, offset - start);
1223
1224 /* If we're counting up to the Nth match, are we there yet?
1225 And even if we are there, there is another case we have to
1226 skip: are we matching an empty string immediately following
1227 another match?
1228
1229 This latter case avoids that baaaac, when passed through
1230 s,a*,x,g, gives `xbxxcx' instead of xbxcx. This behavior is
1231 unacceptable because it is not consistently applied (for
1232 example, `baaaa' gives `xbx', not `xbxx'). */
1233 if ((matched > 0 || count == 0 || offset > last_end)
1234 && ++count >= sub->numb)
1235 {
1236 /* We found a match, set the `replaced' flag. */
1237 replaced = true;
1238
1239 /* Now expand the replacement string into the output string. */
1240 repl_mod = append_replacement (&s_accum, sub->replacement, ®s, repl_mod);
1241 again = sub->global;
1242 }
1243 else
1244 {
1245 /* The match was not replaced. Copy the text until its
1246 end; if it was vacuous, skip over one character and
1247 add that character to the output. */
1248 if (matched == 0)
1249 {
1250 if (start < line.length)
1251 matched = 1;
1252 else
1253 break;
1254 }
1255
1256 str_append(&s_accum, line.active + offset, matched);
1257 }
1258
1259 /* Start after the match. last_end is the real end of the matched
1260 substring, excluding characters that were skipped in case the RE
1261 matched the empty string. */
1262 start = offset + matched;
1263 last_end = regs.end[0];
1264 }
1265 while (again
1266 && start <= line.length
1267 && match_regex(sub->regx, line.active, line.length, start,
1268 ®s, sub->max_id + 1));
1269
1270 /* Copy stuff to the right of the last match into the output string. */
1271 if (start < line.length)
1272 str_append(&s_accum, line.active + start, line.length-start);
1273 s_accum.chomped = line.chomped;
1274
1275 /* Exchange line and s_accum. This can be much cheaper
1276 than copying s_accum.active into line.text (for huge lines). */
1277 line_exchange(&line, &s_accum, false);
1278
1279 /* Finish up. */
1280 if (count < sub->numb)
1281 return;
1282
1283 post_subst:
1284 if (sub->print & 1)
1285 output_line(line.active, line.length, line.chomped, &output_file);
1286
1287 if (sub->eval)
1288 {
1289 #ifdef HAVE_POPEN
1290 FILE *pipe_fp;
1291 line_reset(&s_accum, NULL);
1292
1293 str_append (&line, "", 1);
1294 pipe_fp = popen(line.active, "r");
1295
1296 if (pipe_fp != NULL)
1297 {
1298 while (!feof (pipe_fp))
1299 {
1300 char buf[4096];
1301 int n = fread (buf, sizeof(char), 4096, pipe_fp);
1302 if (n > 0)
1303 str_append(&s_accum, buf, n);
1304 }
1305
1306 pclose (pipe_fp);
1307
1308 /* Exchange line and s_accum. This can be much cheaper than copying
1309 s_accum.active into line.text (for huge lines). See comment above
1310 for 'g' as to while the third argument is incorrect anyway. */
1311 line_exchange(&line, &s_accum, true);
1312 if (line.length &&
1313 line.active[line.length - 1] == '\n')
1314 line.length--;
1315 }
1316 else
1317 panic(_("error in subprocess"));
1318 #else
1319 panic(_("option `e' not supported"));
1320 #endif
1321 }
1322
1323 if (sub->print & 2)
1324 output_line(line.active, line.length, line.chomped, &output_file);
1325 if (sub->outf)
1326 output_line(line.active, line.length, line.chomped, sub->outf);
1327 }
1328
1329 #ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION
1330 /* Used to attempt a simple-minded optimization. */
1331
1332 static countT branches;
1333
1334 static countT count_branches P_((struct vector *));
1335 static countT
count_branches(program)1336 count_branches(program)
1337 struct vector *program;
1338 {
1339 struct sed_cmd *cur_cmd = program->v;
1340 countT isn_cnt = program->v_length;
1341 countT cnt = 0;
1342
1343 while (isn_cnt-- > 0)
1344 {
1345 switch (cur_cmd->cmd)
1346 {
1347 case 'b':
1348 case 't':
1349 case 'T':
1350 case '{':
1351 ++cnt;
1352 }
1353 }
1354 return cnt;
1355 }
1356
1357 static struct sed_cmd *shrink_program P_((struct vector *, struct sed_cmd *));
1358 static struct sed_cmd *
shrink_program(vec,cur_cmd)1359 shrink_program(vec, cur_cmd)
1360 struct vector *vec;
1361 struct sed_cmd *cur_cmd;
1362 {
1363 struct sed_cmd *v = vec->v;
1364 struct sed_cmd *last_cmd = v + vec->v_length;
1365 struct sed_cmd *p;
1366 countT cmd_cnt;
1367
1368 for (p=v; p < cur_cmd; ++p)
1369 if (p->cmd != '#')
1370 MEMCPY(v++, p, sizeof *v);
1371 cmd_cnt = v - vec->v;
1372
1373 for (; p < last_cmd; ++p)
1374 if (p->cmd != '#')
1375 MEMCPY(v++, p, sizeof *v);
1376 vec->v_length = v - vec->v;
1377
1378 return (0 < vec->v_length) ? (vec->v + cmd_cnt) : CAST(struct sed_cmd *)0;
1379 }
1380 #endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/
1381
1382 /* Execute the program `vec' on the current input line.
1383 Return exit status if caller should quit, -1 otherwise. */
1384 static int execute_program P_((struct vector *, struct input *));
1385 static int
execute_program(vec,input)1386 execute_program(vec, input)
1387 struct vector *vec;
1388 struct input *input;
1389 {
1390 struct sed_cmd *cur_cmd;
1391 struct sed_cmd *end_cmd;
1392
1393 cur_cmd = vec->v;
1394 end_cmd = vec->v + vec->v_length;
1395 while (cur_cmd < end_cmd)
1396 {
1397 if (match_address_p(cur_cmd, input) != cur_cmd->addr_bang)
1398 {
1399 switch (cur_cmd->cmd)
1400 {
1401 case 'a':
1402 {
1403 struct append_queue *aq = next_append_slot();
1404 aq->text = cur_cmd->x.cmd_txt.text;
1405 aq->textlen = cur_cmd->x.cmd_txt.text_length;
1406 }
1407 break;
1408
1409 case '{':
1410 case 'b':
1411 cur_cmd = vec->v + cur_cmd->x.jump_index;
1412 continue;
1413
1414 case '}':
1415 case '#':
1416 case ':':
1417 /* Executing labels and block-ends are easy. */
1418 break;
1419
1420 case 'c':
1421 if (cur_cmd->range_state != RANGE_ACTIVE)
1422 output_line(cur_cmd->x.cmd_txt.text,
1423 cur_cmd->x.cmd_txt.text_length - 1, true,
1424 &output_file);
1425 /* POSIX.2 is silent about c starting a new cycle,
1426 but it seems to be expected (and make sense). */
1427 /* Fall Through */
1428 case 'd':
1429 return -1;
1430
1431 case 'D':
1432 {
1433 char *p = memchr(line.active, '\n', line.length);
1434 if (!p)
1435 return -1;
1436
1437 ++p;
1438 line.alloc -= p - line.active;
1439 line.length -= p - line.active;
1440 line.active += p - line.active;
1441
1442 /* reset to start next cycle without reading a new line: */
1443 cur_cmd = vec->v;
1444 continue;
1445 }
1446
1447 case 'e': {
1448 #ifdef HAVE_POPEN
1449 FILE *pipe_fp;
1450 int cmd_length = cur_cmd->x.cmd_txt.text_length;
1451 line_reset(&s_accum, NULL);
1452
1453 if (!cmd_length)
1454 {
1455 str_append (&line, "", 1);
1456 pipe_fp = popen(line.active, "r");
1457 }
1458 else
1459 {
1460 cur_cmd->x.cmd_txt.text[cmd_length - 1] = 0;
1461 pipe_fp = popen(cur_cmd->x.cmd_txt.text, "r");
1462 output_missing_newline(&output_file);
1463 }
1464
1465 if (pipe_fp != NULL)
1466 {
1467 char buf[4096];
1468 int n;
1469 while (!feof (pipe_fp))
1470 if ((n = fread (buf, sizeof(char), 4096, pipe_fp)) > 0)
1471 {
1472 if (!cmd_length)
1473 str_append(&s_accum, buf, n);
1474 else
1475 ck_fwrite(buf, 1, n, output_file.fp);
1476 }
1477
1478 pclose (pipe_fp);
1479 if (!cmd_length)
1480 {
1481 /* Store into pattern space for plain `e' commands */
1482 if (s_accum.length &&
1483 s_accum.active[s_accum.length - 1] == '\n')
1484 s_accum.length--;
1485
1486 /* Exchange line and s_accum. This can be much
1487 cheaper than copying s_accum.active into line.text
1488 (for huge lines). See comment above for 'g' as
1489 to while the third argument is incorrect anyway. */
1490 line_exchange(&line, &s_accum, true);
1491 }
1492 else
1493 flush_output(output_file.fp);
1494
1495 }
1496 else
1497 panic(_("error in subprocess"));
1498 #else
1499 panic(_("`e' command not supported"));
1500 #endif
1501 break;
1502 }
1503
1504 case 'g':
1505 /* We do not have a really good choice for the third parameter.
1506 The problem is that hold space and the input file might as
1507 well have different states; copying it from hold space means
1508 that subsequent input might be read incorrectly, while
1509 keeping it as in pattern space means that commands operating
1510 on the moved buffer might consider a wrong character set.
1511 We keep it true because it's what sed <= 4.1.5 did. */
1512 line_copy(&hold, &line, true);
1513 break;
1514
1515 case 'G':
1516 /* We do not have a really good choice for the third parameter.
1517 The problem is that hold space and pattern space might as
1518 well have different states. So, true is as wrong as false.
1519 We keep it true because it's what sed <= 4.1.5 did, but
1520 we could consider having line_ap. */
1521 line_append(&hold, &line, true);
1522 break;
1523
1524 case 'h':
1525 /* Here, it is ok to have true. */
1526 line_copy(&line, &hold, true);
1527 break;
1528
1529 case 'H':
1530 /* See comment above for 'G' regarding the third parameter. */
1531 line_append(&line, &hold, true);
1532 break;
1533
1534 case 'i':
1535 output_line(cur_cmd->x.cmd_txt.text,
1536 cur_cmd->x.cmd_txt.text_length - 1,
1537 true, &output_file);
1538 break;
1539
1540 case 'l':
1541 do_list(cur_cmd->x.int_arg == -1
1542 ? lcmd_out_line_len
1543 : cur_cmd->x.int_arg);
1544 break;
1545
1546 case 'L':
1547 output_missing_newline(&output_file);
1548 fmt(line.active, line.active + line.length,
1549 cur_cmd->x.int_arg == -1
1550 ? lcmd_out_line_len
1551 : cur_cmd->x.int_arg,
1552 output_file.fp);
1553 flush_output(output_file.fp);
1554 break;
1555
1556 case 'n':
1557 if (!no_default_output)
1558 output_line(line.active, line.length, line.chomped, &output_file);
1559 if (test_eof(input) || !read_pattern_space(input, vec, false))
1560 return -1;
1561 break;
1562
1563 case 'N':
1564 str_append(&line, "\n", 1);
1565
1566 if (test_eof(input) || !read_pattern_space(input, vec, true))
1567 {
1568 line.length--;
1569 if (posixicity == POSIXLY_EXTENDED && !no_default_output)
1570 output_line(line.active, line.length, line.chomped,
1571 &output_file);
1572 return -1;
1573 }
1574 break;
1575
1576 case 'p':
1577 output_line(line.active, line.length, line.chomped, &output_file);
1578 break;
1579
1580 case 'P':
1581 {
1582 char *p = memchr(line.active, '\n', line.length);
1583 output_line(line.active, p ? p - line.active : line.length,
1584 p ? true : line.chomped, &output_file);
1585 }
1586 break;
1587
1588 case 'q':
1589 if (!no_default_output)
1590 output_line(line.active, line.length, line.chomped, &output_file);
1591 dump_append_queue();
1592
1593 case 'Q':
1594 return cur_cmd->x.int_arg == -1 ? 0 : cur_cmd->x.int_arg;
1595
1596 case 'r':
1597 if (cur_cmd->x.fname)
1598 {
1599 struct append_queue *aq = next_append_slot();
1600 aq->fname = cur_cmd->x.fname;
1601 }
1602 break;
1603
1604 case 'R':
1605 if (cur_cmd->x.fp && !feof (cur_cmd->x.fp))
1606 {
1607 struct append_queue *aq;
1608 size_t buflen;
1609 char *text = NULL;
1610 int result;
1611
1612 result = ck_getline (&text, &buflen, cur_cmd->x.fp);
1613 if (result != EOF)
1614 {
1615 aq = next_append_slot();
1616 aq->free = true;
1617 aq->text = text;
1618 aq->textlen = result;
1619 }
1620 }
1621 break;
1622
1623 case 's':
1624 do_subst(cur_cmd->x.cmd_subst);
1625 break;
1626
1627 case 't':
1628 if (replaced)
1629 {
1630 replaced = false;
1631 cur_cmd = vec->v + cur_cmd->x.jump_index;
1632 continue;
1633 }
1634 break;
1635
1636 case 'T':
1637 if (!replaced)
1638 {
1639 cur_cmd = vec->v + cur_cmd->x.jump_index;
1640 continue;
1641 }
1642 else
1643 replaced = false;
1644 break;
1645
1646 case 'w':
1647 if (cur_cmd->x.fp)
1648 output_line(line.active, line.length,
1649 line.chomped, cur_cmd->x.outf);
1650 break;
1651
1652 case 'W':
1653 if (cur_cmd->x.fp)
1654 {
1655 char *p = memchr(line.active, '\n', line.length);
1656 output_line(line.active, p ? p - line.active : line.length,
1657 p ? true : line.chomped, cur_cmd->x.outf);
1658 }
1659 break;
1660
1661 case 'x':
1662 /* See comment above for 'g' regarding the third parameter. */
1663 line_exchange(&line, &hold, false);
1664 break;
1665
1666 case 'y':
1667 {
1668 #ifdef HAVE_MBRTOWC
1669 if (mb_cur_max > 1)
1670 {
1671 int idx, prev_idx; /* index in the input line. */
1672 char **trans;
1673 mbstate_t mbstate;
1674 memset(&mbstate, 0, sizeof(mbstate_t));
1675 for (idx = 0; idx < line.length;)
1676 {
1677 int mbclen, i;
1678 mbclen = MBRLEN (line.active + idx, line.length - idx,
1679 &mbstate);
1680 /* An invalid sequence, or a truncated multibyte
1681 character. We treat it as a singlebyte character.
1682 */
1683 if (mbclen == (size_t) -1 || mbclen == (size_t) -2
1684 || mbclen == 0)
1685 mbclen = 1;
1686
1687 trans = cur_cmd->x.translatemb;
1688 /* `i' indicate i-th translate pair. */
1689 for (i = 0; trans[2*i] != NULL; i++)
1690 {
1691 if (strncmp(line.active + idx, trans[2*i], mbclen) == 0)
1692 {
1693 bool move_remain_buffer = false;
1694 int trans_len = strlen(trans[2*i+1]);
1695
1696 if (mbclen < trans_len)
1697 {
1698 int new_len;
1699 new_len = line.length + 1 + trans_len - mbclen;
1700 /* We must extend the line buffer. */
1701 if (line.alloc < new_len)
1702 {
1703 /* And we must resize the buffer. */
1704 resize_line(&line, new_len);
1705 }
1706 move_remain_buffer = true;
1707 }
1708 else if (mbclen > trans_len)
1709 {
1710 /* We must truncate the line buffer. */
1711 move_remain_buffer = true;
1712 }
1713 prev_idx = idx;
1714 if (move_remain_buffer)
1715 {
1716 int move_len, move_offset;
1717 char *move_from, *move_to;
1718 /* Move the remaining with \0. */
1719 move_from = line.active + idx + mbclen;
1720 move_to = line.active + idx + trans_len;
1721 move_len = line.length + 1 - idx - mbclen;
1722 move_offset = trans_len - mbclen;
1723 memmove(move_to, move_from, move_len);
1724 line.length += move_offset;
1725 idx += move_offset;
1726 }
1727 strncpy(line.active + prev_idx, trans[2*i+1],
1728 trans_len);
1729 break;
1730 }
1731 }
1732 idx += mbclen;
1733 }
1734 }
1735 else
1736 #endif /* HAVE_MBRTOWC */
1737 {
1738 unsigned char *p, *e;
1739 p = CAST(unsigned char *)line.active;
1740 for (e=p+line.length; p<e; ++p)
1741 *p = cur_cmd->x.translate[*p];
1742 }
1743 }
1744 break;
1745
1746 case 'z':
1747 line.length = 0;
1748 break;
1749
1750 case '=':
1751 output_missing_newline(&output_file);
1752 fprintf(output_file.fp, "%lu\n",
1753 CAST(unsigned long)input->line_number);
1754 flush_output(output_file.fp);
1755 break;
1756
1757 default:
1758 panic("INTERNAL ERROR: Bad cmd %c", cur_cmd->cmd);
1759 }
1760 }
1761
1762 #ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION
1763 /* If our top-level program consists solely of commands with
1764 ADDR_IS_NUM addresses then once we past the last mentioned
1765 line we should be able to quit if no_default_output is true,
1766 or otherwise quickly copy input to output. Now whether this
1767 optimization is a win or not depends on how cheaply we can
1768 implement this for the cases where it doesn't help, as
1769 compared against how much time is saved. One semantic
1770 difference (which I think is an improvement) is that *this*
1771 version will terminate after printing line two in the script
1772 "yes | sed -n 2p".
1773
1774 Don't use this when in-place editing is active, because line
1775 numbers restart each time then. */
1776 else if (!separate_files)
1777 {
1778 if (cur_cmd->a1->addr_type == ADDR_IS_NUM
1779 && (cur_cmd->a2
1780 ? cur_cmd->range_state == RANGE_CLOSED
1781 : cur_cmd->a1->addr_number < input->line_number))
1782 {
1783 /* Skip this address next time */
1784 cur_cmd->addr_bang = !cur_cmd->addr_bang;
1785 cur_cmd->a1->addr_type = ADDR_IS_NULL;
1786 if (cur_cmd->a2)
1787 cur_cmd->a2->addr_type = ADDR_IS_NULL;
1788
1789 /* can we make an optimization? */
1790 if (cur_cmd->addr_bang)
1791 {
1792 if (cur_cmd->cmd == 'b' || cur_cmd->cmd == 't'
1793 || cur_cmd->cmd == 'T' || cur_cmd->cmd == '}')
1794 branches--;
1795
1796 cur_cmd->cmd = '#'; /* replace with no-op */
1797 if (branches == 0)
1798 cur_cmd = shrink_program(vec, cur_cmd);
1799 if (!cur_cmd && no_default_output)
1800 return 0;
1801 end_cmd = vec->v + vec->v_length;
1802 if (!cur_cmd)
1803 cur_cmd = end_cmd;
1804 continue;
1805 }
1806 }
1807 }
1808 #endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/
1809
1810 /* this is buried down here so that a "continue" statement can skip it */
1811 ++cur_cmd;
1812 }
1813
1814 if (!no_default_output)
1815 output_line(line.active, line.length, line.chomped, &output_file);
1816 return -1;
1817 }
1818
1819
1820
1821 /* Apply the compiled script to all the named files. */
1822 int
process_files(the_program,argv)1823 process_files(the_program, argv)
1824 struct vector *the_program;
1825 char **argv;
1826 {
1827 static char dash[] = "-";
1828 static char *stdin_argv[2] = { dash, NULL };
1829 struct input input;
1830 int status;
1831
1832 line_init(&line, NULL, INITIAL_BUFFER_SIZE);
1833 line_init(&hold, NULL, 0);
1834 line_init(&buffer, NULL, 0);
1835
1836 #ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION
1837 branches = count_branches(the_program);
1838 #endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/
1839 input.reset_at_next_file = true;
1840 if (argv && *argv)
1841 input.file_list = argv;
1842 else if (in_place_extension)
1843 panic(_("no input files"));
1844 else
1845 input.file_list = stdin_argv;
1846
1847 input.bad_count = 0;
1848 input.line_number = 0;
1849 input.read_fn = read_always_fail;
1850 input.fp = NULL;
1851
1852 status = EXIT_SUCCESS;
1853 while (read_pattern_space(&input, the_program, false))
1854 {
1855 status = execute_program(the_program, &input);
1856 if (status == -1)
1857 status = EXIT_SUCCESS;
1858 else
1859 break;
1860 }
1861 closedown(&input);
1862
1863 #ifdef DEBUG_LEAKS
1864 /* We're about to exit, so these free()s are redundant.
1865 But if we're running under a memory-leak detecting
1866 implementation of malloc(), we want to explicitly
1867 deallocate in order to avoid extraneous noise from
1868 the allocator. */
1869 release_append_queue();
1870 FREE(buffer.text);
1871 FREE(hold.text);
1872 FREE(line.text);
1873 FREE(s_accum.text);
1874 #endif /*DEBUG_LEAKS*/
1875
1876 if (input.bad_count)
1877 status = 2;
1878
1879 return status;
1880 }
1881