• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*  GNU SED, a batch stream editor.
2     Copyright (C) 1989,90,91,92,93,94,95,98,99,2002,2003,2004,2005,2006,2008,2009
3     Free Software Foundation, Inc.
4 
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 3, or (at your option)
8     any later version.
9 
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14 
15     You should have received a copy of the GNU General Public License
16     along with this program; if not, write to the Free Software
17     Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18 
19 #undef EXPERIMENTAL_DASH_N_OPTIMIZATION	/*don't use -- is very buggy*/
20 #define INITIAL_BUFFER_SIZE	50
21 #define FREAD_BUFFER_SIZE	8192
22 
23 #include "sed.h"
24 
25 #include <stddef.h>
26 #include <stdio.h>
27 #include <ctype.h>
28 
29 #include <errno.h>
30 #ifndef errno
31 extern int errno;
32 #endif
33 
34 #ifndef BOOTSTRAP
35 #include <selinux/selinux.h>
36 #include <selinux/context.h>
37 #endif
38 
39 #ifdef HAVE_UNISTD_H
40 # include <unistd.h>
41 #endif
42 
43 #ifndef BOOTSTRAP
44 #include "acl.h"
45 #endif
46 
47 #ifdef __GNUC__
48 # if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__-0 >= 7)
49    /* silence warning about unused parameter even for "gcc -W -Wunused" */
50 #  define UNUSED	__attribute__((unused))
51 # endif
52 #endif
53 #ifndef UNUSED
54 # define UNUSED
55 #endif
56 
57 #ifdef HAVE_STRINGS_H
58 # include <strings.h>
59 #else
60 # include <string.h>
61 #endif /*HAVE_STRINGS_H*/
62 #ifdef HAVE_MEMORY_H
63 # include <memory.h>
64 #endif
65 
66 #ifndef HAVE_STRCHR
67 # define strchr index
68 # define strrchr rindex
69 #endif
70 
71 #ifdef HAVE_STDLIB_H
72 # include <stdlib.h>
73 #endif
74 #ifndef EXIT_SUCCESS
75 # define EXIT_SUCCESS 0
76 #endif
77 
78 #ifdef HAVE_SYS_TYPES_H
79 # include <sys/types.h>
80 #endif
81 
82 #include <sys/stat.h>
83 #include "stat-macros.h"
84 
85 
86 /* Sed operates a line at a time. */
87 struct line {
88   char *text;		/* Pointer to line allocated by malloc. */
89   char *active;		/* Pointer to non-consumed part of text. */
90   size_t length;	/* Length of text (or active, if used). */
91   size_t alloc;		/* Allocated space for active. */
92   bool chomped;		/* Was a trailing newline dropped? */
93 #ifdef HAVE_MBRTOWC
94   mbstate_t mbstate;
95 #endif
96 };
97 
98 #ifdef HAVE_MBRTOWC
99 #define SIZEOF_LINE	offsetof (struct line, mbstate)
100 #else
101 #define SIZEOF_LINE	(sizeof (struct line))
102 #endif
103 
104 /* A queue of text to write out at the end of a cycle
105    (filled by the "a", "r" and "R" commands.) */
106 struct append_queue {
107   const char *fname;
108   char *text;
109   size_t textlen;
110   struct append_queue *next;
111   bool free;
112 };
113 
114 /* State information for the input stream. */
115 struct input {
116   /* The list of yet-to-be-opened files.  It is invalid for file_list
117      to be NULL.  When *file_list is NULL we are currently processing
118      the last file.  */
119 
120   char **file_list;
121 
122   /* Count of files we failed to open. */
123   countT bad_count;
124 
125   /* Current input line number (over all files).  */
126   countT line_number;
127 
128   /* True if we'll reset line numbers and addresses before
129      starting to process the next (possibly the first) file.  */
130   bool reset_at_next_file;
131 
132   /* Function to read one line.  If FP is NULL, read_fn better not
133      be one which uses fp; in particular, read_always_fail() is
134      recommended. */
135   bool (*read_fn) P_((struct input *));	/* read one line */
136 
137   char *out_file_name;
138 
139   const char *in_file_name;
140 
141   /* Owner and mode to be set just before closing the file.  */
142   struct stat st;
143 
144   /* if NULL, none of the following are valid */
145   FILE *fp;
146 
147   bool no_buffering;
148 };
149 
150 
151 /* Have we done any replacements lately?  This is used by the `t' command. */
152 static bool replaced = false;
153 
154 /* The current output file (stdout if -i is not being used. */
155 static struct output output_file;
156 
157 /* The `current' input line. */
158 static struct line line;
159 
160 /* An input line used to accumulate the result of the s and e commands. */
161 static struct line s_accum;
162 
163 /* An input line that's been stored by later use by the program */
164 static struct line hold;
165 
166 /* The buffered input look-ahead.  The only field that should be
167    used outside of read_mem_line() or line_init() is buffer.length. */
168 static struct line buffer;
169 
170 static struct append_queue *append_head = NULL;
171 static struct append_queue *append_tail = NULL;
172 
173 
174 #ifdef BOOTSTRAP
175 /* We can't be sure that the system we're boostrapping on has
176    memchr(), and ../lib/memchr.c requires configuration knowledge
177    about how many bits are in a `long'.  This implementation
178    is far from ideal, but it should get us up-and-limping well
179    enough to run the configure script, which is all that matters.
180 */
181 # ifdef memchr
182 #  undef memchr
183 # endif
184 # define memchr bootstrap_memchr
185 
186 static VOID *bootstrap_memchr P_((const VOID *s, int c, size_t n));
187 static VOID *
bootstrap_memchr(s,c,n)188 bootstrap_memchr(s, c, n)
189   const VOID *s;
190   int c;
191   size_t n;
192 {
193   char *p;
194 
195   for (p=(char *)s; n-- > 0; ++p)
196     if (*p == c)
197       return p;
198   return CAST(VOID *)0;
199 }
200 #endif /*BOOTSTRAP*/
201 
202 /* increase a struct line's length, making some attempt at
203    keeping realloc() calls under control by padding for future growth.  */
204 static void resize_line P_((struct line *, size_t));
205 static void
resize_line(lb,len)206 resize_line(lb, len)
207   struct line *lb;
208   size_t len;
209 {
210   int inactive;
211   inactive = lb->active - lb->text;
212 
213   /* If the inactive part has got to more than two thirds of the buffer,
214    * remove it. */
215   if (inactive > lb->alloc * 2)
216     {
217       MEMMOVE(lb->text, lb->active, lb->length);
218       lb->alloc += lb->active - lb->text;
219       lb->active = lb->text;
220       inactive = 0;
221 
222       if (lb->alloc > len)
223 	return;
224     }
225 
226   lb->alloc *= 2;
227   if (lb->alloc < len)
228     lb->alloc = len;
229   if (lb->alloc < INITIAL_BUFFER_SIZE)
230     lb->alloc = INITIAL_BUFFER_SIZE;
231 
232   lb->text = REALLOC(lb->text, inactive + lb->alloc, char);
233   lb->active = lb->text + inactive;
234 }
235 
236 /* Append `length' bytes from `string' to the line `to'. */
237 static void str_append P_((struct line *, const char *, size_t));
238 static void
str_append(to,string,length)239 str_append(to, string, length)
240   struct line *to;
241   const char *string;
242   size_t length;
243 {
244   size_t new_length = to->length + length;
245 
246   if (to->alloc < new_length)
247     resize_line(to, new_length);
248   MEMCPY(to->active + to->length, string, length);
249   to->length = new_length;
250 
251 #ifdef HAVE_MBRTOWC
252   if (mb_cur_max > 1 && !is_utf8)
253     while (length)
254       {
255         size_t n = MBRLEN (string, length, &to->mbstate);
256 
257         /* An invalid sequence is treated like a singlebyte character. */
258         if (n == (size_t) -1)
259 	  {
260 	    memset (&to->mbstate, 0, sizeof (to->mbstate));
261 	    n = 1;
262 	  }
263 
264         if (n > 0)
265 	  {
266 	    string += n;
267 	    length -= n;
268 	  }
269         else
270 	  break;
271       }
272 #endif
273 }
274 
275 static void str_append_modified P_((struct line *, const char *, size_t,
276 				    enum replacement_types));
277 static void
str_append_modified(to,string,length,type)278 str_append_modified(to, string, length, type)
279   struct line *to;
280   const char *string;
281   size_t length;
282   enum replacement_types type;
283 {
284 #ifdef HAVE_MBRTOWC
285   mbstate_t from_stat;
286 
287   if (type == REPL_ASIS)
288     {
289       str_append(to, string, length);
290       return;
291     }
292 
293   if (to->alloc - to->length < length * mb_cur_max)
294     resize_line(to, to->length + length * mb_cur_max);
295 
296   MEMCPY (&from_stat, &to->mbstate, sizeof(mbstate_t));
297   while (length)
298     {
299       wchar_t wc;
300       int n = MBRTOWC (&wc, string, length, &from_stat);
301 
302       /* An invalid sequence is treated like a singlebyte character. */
303       if (n == -1)
304         {
305           memset (&to->mbstate, 0, sizeof (from_stat));
306           n = 1;
307         }
308 
309       if (n > 0)
310         string += n, length -= n;
311       else
312 	{
313 	  /* Incomplete sequence, copy it manually.  */
314 	  str_append(to, string, length);
315 	  return;
316 	}
317 
318       /* Convert the first character specially... */
319       if (type & (REPL_UPPERCASE_FIRST | REPL_LOWERCASE_FIRST))
320 	{
321           if (type & REPL_UPPERCASE_FIRST)
322             wc = towupper(wc);
323           else
324             wc = towlower(wc);
325 
326           type &= ~(REPL_LOWERCASE_FIRST | REPL_UPPERCASE_FIRST);
327 	  if (type == REPL_ASIS)
328 	    {
329 	      n = WCRTOMB (to->active + to->length, wc, &to->mbstate);
330 	      to->length += n;
331 	      str_append(to, string, length);
332 	      return;
333 	    }
334         }
335 
336       else if (type & REPL_UPPERCASE)
337         wc = towupper(wc);
338       else
339         wc = towlower(wc);
340 
341       /* Copy the new wide character to the end of the string. */
342       n = WCRTOMB (to->active + to->length, wc, &to->mbstate);
343       to->length += n;
344       if (n == -1)
345 	{
346 	  fprintf (stderr, "Case conversion produced an invalid character!");
347 	  abort ();
348 	}
349     }
350 #else
351   size_t old_length = to->length;
352   char *start, *end;
353 
354   str_append(to, string, length);
355   start = to->active + old_length;
356   end = start + length;
357 
358   /* Now do the required modifications.  First \[lu]... */
359   if (type & REPL_UPPERCASE_FIRST)
360     {
361       *start = toupper(*start);
362       start++;
363       type &= ~REPL_UPPERCASE_FIRST;
364     }
365   else if (type & REPL_LOWERCASE_FIRST)
366     {
367       *start = tolower(*start);
368       start++;
369       type &= ~REPL_LOWERCASE_FIRST;
370     }
371 
372   if (type == REPL_ASIS)
373     return;
374 
375   /* ...and then \[LU] */
376   if (type == REPL_UPPERCASE)
377     for (; start != end; start++)
378       *start = toupper(*start);
379   else
380     for (; start != end; start++)
381       *start = tolower(*start);
382 #endif
383 }
384 
385 /* Initialize a "struct line" buffer.  Copy multibyte state from `state'
386    if not null.  */
387 static void line_init P_((struct line *, struct line *, size_t initial_size));
388 static void
line_init(buf,state,initial_size)389 line_init(buf, state, initial_size)
390   struct line *buf;
391   struct line *state;
392   size_t initial_size;
393 {
394   buf->text = MALLOC(initial_size, char);
395   buf->active = buf->text;
396   buf->alloc = initial_size;
397   buf->length = 0;
398   buf->chomped = true;
399 
400 #ifdef HAVE_MBRTOWC
401   if (state)
402     memcpy (&buf->mbstate, &state->mbstate, sizeof (buf->mbstate));
403   else
404     memset (&buf->mbstate, 0, sizeof (buf->mbstate));
405 #endif
406 }
407 
408 /* Reset a "struct line" buffer to length zero.  Copy multibyte state from
409    `state' if not null.  */
410 static void line_reset P_((struct line *, struct line *));
411 static void
line_reset(buf,state)412 line_reset(buf, state)
413   struct line *buf, *state;
414 {
415   if (buf->alloc == 0)
416     line_init(buf, state, INITIAL_BUFFER_SIZE);
417   else
418     {
419       buf->length = 0;
420 #ifdef HAVE_MBRTOWC
421       if (state)
422         memcpy (&buf->mbstate, &state->mbstate, sizeof (buf->mbstate));
423       else
424         memset (&buf->mbstate, 0, sizeof (buf->mbstate));
425 #endif
426     }
427 }
428 
429 /* Copy the contents of the line `from' into the line `to'.
430    This destroys the old contents of `to'.
431    Copy the multibyte state if `state' is true. */
432 static void line_copy P_((struct line *from, struct line *to, int state));
433 static void
line_copy(from,to,state)434 line_copy(from, to, state)
435   struct line *from;
436   struct line *to;
437   int state;
438 {
439   /* Remove the inactive portion in the destination buffer. */
440   to->alloc += to->active - to->text;
441 
442   if (to->alloc < from->length)
443     {
444       to->alloc *= 2;
445       if (to->alloc < from->length)
446 	to->alloc = from->length;
447       if (to->alloc < INITIAL_BUFFER_SIZE)
448 	to->alloc = INITIAL_BUFFER_SIZE;
449       /* Use FREE()+MALLOC() instead of REALLOC() to
450 	 avoid unnecessary copying of old text. */
451       FREE(to->text);
452       to->text = MALLOC(to->alloc, char);
453     }
454 
455   to->active = to->text;
456   to->length = from->length;
457   to->chomped = from->chomped;
458   MEMCPY(to->active, from->active, from->length);
459 
460 #ifdef HAVE_MBRTOWC
461   if (state)
462     MEMCPY(&to->mbstate, &from->mbstate, sizeof (from->mbstate));
463 #endif
464 }
465 
466 /* Append the contents of the line `from' to the line `to'.
467    Copy the multibyte state if `state' is true. */
468 static void line_append P_((struct line *from, struct line *to, int state));
469 static void
line_append(from,to,state)470 line_append(from, to, state)
471   struct line *from;
472   struct line *to;
473   int state;
474 {
475   str_append(to, "\n", 1);
476   str_append(to, from->active, from->length);
477   to->chomped = from->chomped;
478 
479 #ifdef HAVE_MBRTOWC
480   if (state)
481     MEMCPY (&to->mbstate, &from->mbstate, sizeof (from->mbstate));
482 #endif
483 }
484 
485 /* Exchange two "struct line" buffers.
486    Copy the multibyte state if `state' is true. */
487 static void line_exchange P_((struct line *a, struct line *b, int state));
488 static void
line_exchange(a,b,state)489 line_exchange(a, b, state)
490   struct line *a;
491   struct line *b;
492   int state;
493 {
494   struct line t;
495 
496   if (state)
497     {
498       MEMCPY(&t,  a, sizeof (struct line));
499       MEMCPY( a,  b, sizeof (struct line));
500       MEMCPY( b, &t, sizeof (struct line));
501     }
502   else
503     {
504       MEMCPY(&t,  a, SIZEOF_LINE);
505       MEMCPY( a,  b, SIZEOF_LINE);
506       MEMCPY( b, &t, SIZEOF_LINE);
507     }
508 }
509 
510 
511 /* dummy function to simplify read_pattern_space() */
512 static bool read_always_fail P_((struct input *));
513 static bool
read_always_fail(input)514 read_always_fail(input)
515   struct input *input UNUSED;
516 {
517   return false;
518 }
519 
520 static bool read_file_line P_((struct input *));
521 static bool
read_file_line(input)522 read_file_line(input)
523   struct input *input;
524 {
525   static char *b;
526   static size_t blen;
527 
528   long result = ck_getline (&b, &blen, input->fp);
529   if (result <= 0)
530     return false;
531 
532   /* Remove the trailing new-line that is left by getline. */
533   if (b[result - 1] == '\n')
534     --result;
535   else
536     line.chomped = false;
537 
538   str_append(&line, b, result);
539   return true;
540 }
541 
542 
543 static inline void output_missing_newline P_((struct output *));
544 static inline void
output_missing_newline(outf)545 output_missing_newline(outf)
546   struct output *outf;
547 {
548   if (outf->missing_newline)
549     {
550       ck_fwrite("\n", 1, 1, outf->fp);
551       outf->missing_newline = false;
552     }
553 }
554 
555 static inline void flush_output P_((FILE *));
556 static inline void
flush_output(fp)557 flush_output(fp)
558   FILE *fp;
559 {
560   if (fp != stdout || unbuffered_output)
561     ck_fflush(fp);
562 }
563 
564 static void output_line P_((const char *, size_t, int, struct output *));
565 static void
output_line(text,length,nl,outf)566 output_line(text, length, nl, outf)
567   const char *text;
568   size_t length;
569   int nl;
570   struct output *outf;
571 {
572   if (!text)
573     return;
574 
575   output_missing_newline(outf);
576   if (length)
577     ck_fwrite(text, 1, length, outf->fp);
578   if (nl)
579     ck_fwrite("\n", 1, 1, outf->fp);
580   else
581     outf->missing_newline = true;
582 
583   flush_output(outf->fp);
584 }
585 
586 static struct append_queue *next_append_slot P_((void));
587 static struct append_queue *
next_append_slot()588 next_append_slot()
589 {
590   struct append_queue *n = MALLOC(1, struct append_queue);
591 
592   n->fname = NULL;
593   n->text = NULL;
594   n->textlen = 0;
595   n->next = NULL;
596   n->free = false;
597 
598   if (append_tail)
599       append_tail->next = n;
600   else
601       append_head = n;
602   return append_tail = n;
603 }
604 
605 static void release_append_queue P_((void));
606 static void
release_append_queue()607 release_append_queue()
608 {
609   struct append_queue *p, *q;
610 
611   for (p=append_head; p; p=q)
612     {
613       if (p->free)
614         FREE(p->text);
615 
616       q = p->next;
617       FREE(p);
618     }
619   append_head = append_tail = NULL;
620 }
621 
622 static void dump_append_queue P_((void));
623 static void
dump_append_queue()624 dump_append_queue()
625 {
626   struct append_queue *p;
627 
628   output_missing_newline(&output_file);
629   for (p=append_head; p; p=p->next)
630     {
631       if (p->text)
632         ck_fwrite(p->text, 1, p->textlen, output_file.fp);
633 
634       if (p->fname)
635 	{
636 	  char buf[FREAD_BUFFER_SIZE];
637 	  size_t cnt;
638 	  FILE *fp;
639 
640 	  /* "If _fname_ does not exist or cannot be read, it shall
641 	     be treated as if it were an empty file, causing no error
642 	     condition."  IEEE Std 1003.2-1992
643 	     So, don't fail. */
644 	  fp = ck_fopen(p->fname, read_mode, false);
645 	  if (fp)
646 	    {
647 	      while ((cnt = ck_fread(buf, 1, sizeof buf, fp)) > 0)
648 		ck_fwrite(buf, 1, cnt, output_file.fp);
649 	      ck_fclose(fp);
650 	    }
651 	}
652     }
653 
654   flush_output(output_file.fp);
655   release_append_queue();
656 }
657 
658 
659 /* Compute the name of the backup file for in-place editing */
660 static char *get_backup_file_name P_((const char *));
661 static char *
get_backup_file_name(name)662 get_backup_file_name(name)
663   const char *name;
664 {
665   char *old_asterisk, *asterisk, *backup, *p;
666   int name_length = strlen(name), backup_length = strlen(in_place_extension);
667 
668   /* Compute the length of the backup file */
669   for (asterisk = in_place_extension - 1, old_asterisk = asterisk + 1;
670        (asterisk = strchr(old_asterisk, '*'));
671        old_asterisk = asterisk + 1)
672     backup_length += name_length - 1;
673 
674   p = backup = xmalloc(backup_length + 1);
675 
676   /* Each iteration gobbles up to an asterisk */
677   for (asterisk = in_place_extension - 1, old_asterisk = asterisk + 1;
678        (asterisk = strchr(old_asterisk, '*'));
679        old_asterisk = asterisk + 1)
680     {
681       MEMCPY (p, old_asterisk, asterisk - old_asterisk);
682       p += asterisk - old_asterisk;
683       strcpy (p, name);
684       p += name_length;
685     }
686 
687   /* Tack on what's after the last asterisk */
688   strcpy (p, old_asterisk);
689   return backup;
690 }
691 
692 /* Initialize a struct input for the named file. */
693 static void open_next_file P_((const char *name, struct input *));
694 static void
open_next_file(name,input)695 open_next_file(name, input)
696   const char *name;
697   struct input *input;
698 {
699   buffer.length = 0;
700 
701   if (name[0] == '-' && name[1] == '\0' && !in_place_extension)
702     {
703       clearerr(stdin);	/* clear any stale EOF indication */
704       input->fp = ck_fdopen (fileno (stdin), "stdin", read_mode, false);
705     }
706   else if ( ! (input->fp = ck_fopen(name, read_mode, false)) )
707     {
708       const char *ptr = strerror(errno);
709       fprintf(stderr, _("%s: can't read %s: %s\n"), myname, name, ptr);
710       input->read_fn = read_always_fail; /* a redundancy */
711       ++input->bad_count;
712       return;
713     }
714 
715   input->read_fn = read_file_line;
716 
717   if (in_place_extension)
718     {
719       int input_fd;
720       char *tmpdir, *p;
721 #ifndef BOOTSTRAP
722       security_context_t old_fscreatecon;
723       int reset_fscreatecon = 0;
724       memset (&old_fscreatecon, 0, sizeof (old_fscreatecon));
725 #endif
726 
727       if (follow_symlinks)
728 	input->in_file_name = follow_symlink (name);
729       else
730         input->in_file_name = name;
731 
732       /* get the base name */
733       tmpdir = ck_strdup(input->in_file_name);
734       if ((p = strrchr(tmpdir, '/')))
735 	*p = 0;
736       else
737 	strcpy(tmpdir, ".");
738 
739       if (isatty (fileno (input->fp)))
740         panic(_("couldn't edit %s: is a terminal"), input->in_file_name);
741 
742       input_fd = fileno (input->fp);
743       fstat (input_fd, &input->st);
744       if (!S_ISREG (input->st.st_mode))
745         panic(_("couldn't edit %s: not a regular file"), input->in_file_name);
746 
747 #ifndef BOOTSTRAP
748       if (is_selinux_enabled ())
749 	{
750           security_context_t con;
751 	  if (getfilecon (input->in_file_name, &con) != -1)
752 	    {
753 	      /* Save and restore the old context for the sake of w and W
754 		 commands.  */
755 	      reset_fscreatecon = getfscreatecon (&old_fscreatecon) >= 0;
756 	      if (setfscreatecon (con) < 0)
757 		fprintf (stderr, _("%s: warning: failed to set default file creation context to %s: %s"),
758 			 myname, con, strerror (errno));
759 	      freecon (con);
760 	    }
761 	  else
762 	    {
763 	      if (errno != ENOSYS)
764 		fprintf (stderr, _("%s: warning: failed to get security context of %s: %s"),
765 			 myname, input->in_file_name, strerror (errno));
766 	    }
767 	}
768 #endif
769 
770       output_file.fp = ck_mkstemp (&input->out_file_name, tmpdir, "sed");
771       output_file.missing_newline = false;
772       free (tmpdir);
773 
774 #ifndef BOOTSTRAP
775       if (reset_fscreatecon)
776 	{
777 	  setfscreatecon (old_fscreatecon);
778 	  freecon (old_fscreatecon);
779 	}
780 #endif
781 
782       if (!output_file.fp)
783         panic(_("couldn't open temporary file %s: %s"), input->out_file_name, strerror(errno));
784     }
785   else
786     output_file.fp = stdout;
787 }
788 
789 
790 /* Clean up an input stream that we are done with. */
791 static void closedown P_((struct input *));
792 static void
closedown(input)793 closedown(input)
794   struct input *input;
795 {
796   input->read_fn = read_always_fail;
797   if (!input->fp)
798     return;
799 
800   if (in_place_extension && output_file.fp != NULL)
801     {
802       const char *target_name;
803       int input_fd, output_fd;
804 
805       target_name = input->in_file_name;
806       input_fd = fileno (input->fp);
807       output_fd = fileno (output_file.fp);
808       copy_acl (input->in_file_name, input_fd,
809 		input->out_file_name, output_fd,
810 		input->st.st_mode);
811 #ifdef HAVE_FCHOWN
812       if (fchown (output_fd, input->st.st_uid, input->st.st_gid) == -1)
813         fchown (output_fd, -1, input->st.st_gid);
814 #endif
815 
816       ck_fclose (input->fp);
817       ck_fclose (output_file.fp);
818       if (strcmp(in_place_extension, "*") != 0)
819         {
820           char *backup_file_name = get_backup_file_name(target_name);
821 	  ck_rename (target_name, backup_file_name, input->out_file_name);
822           free (backup_file_name);
823 	}
824 
825       ck_rename (input->out_file_name, target_name, input->out_file_name);
826       free (input->out_file_name);
827     }
828   else
829     ck_fclose (input->fp);
830 
831   input->fp = NULL;
832 }
833 
834 /* Reset range commands so that they are marked as non-matching */
835 static void reset_addresses P_((struct vector *));
836 static void
reset_addresses(vec)837 reset_addresses(vec)
838      struct vector *vec;
839 {
840   struct sed_cmd *cur_cmd;
841   int n;
842 
843   for (cur_cmd = vec->v, n = vec->v_length; n--; cur_cmd++)
844     if (cur_cmd->a1
845 	&& cur_cmd->a1->addr_type == ADDR_IS_NUM
846 	&& cur_cmd->a1->addr_number == 0)
847       cur_cmd->range_state = RANGE_ACTIVE;
848     else
849       cur_cmd->range_state = RANGE_INACTIVE;
850 }
851 
852 /* Read in the next line of input, and store it in the pattern space.
853    Return zero if there is nothing left to input. */
854 static bool read_pattern_space P_((struct input *, struct vector *, int));
855 static bool
read_pattern_space(input,the_program,append)856 read_pattern_space(input, the_program, append)
857   struct input *input;
858   struct vector *the_program;
859   int append;
860 {
861   if (append_head) /* redundant test to optimize for common case */
862     dump_append_queue();
863   replaced = false;
864   if (!append)
865     line.length = 0;
866   line.chomped = true;  /* default, until proved otherwise */
867 
868   while ( ! (*input->read_fn)(input) )
869     {
870       closedown(input);
871 
872       if (!*input->file_list)
873 	return false;
874 
875       if (input->reset_at_next_file)
876 	{
877 	  input->line_number = 0;
878 	  hold.length = 0;
879 	  reset_addresses (the_program);
880 	  rewind_read_files ();
881 
882 	  /* If doing in-place editing, we will never append the
883 	     new-line to this file; but if the output goes to stdout,
884 	     we might still have to output the missing new-line.  */
885 	  if (in_place_extension)
886 	    output_file.missing_newline = false;
887 
888 	  input->reset_at_next_file = separate_files;
889 	}
890 
891       open_next_file (*input->file_list++, input);
892     }
893 
894   ++input->line_number;
895   return true;
896 }
897 
898 
899 static bool last_file_with_data_p P_((struct input *));
900 static bool
last_file_with_data_p(input)901 last_file_with_data_p(input)
902   struct input *input;
903 {
904   for (;;)
905     {
906       int ch;
907 
908       closedown(input);
909       if (!*input->file_list)
910 	return true;
911       open_next_file(*input->file_list++, input);
912       if (input->fp)
913 	{
914 	  if ((ch = getc(input->fp)) != EOF)
915 	    {
916 	      ungetc(ch, input->fp);
917 	      return false;
918 	    }
919 	}
920     }
921 }
922 
923 /* Determine if we match the `$' address. */
924 static bool test_eof P_((struct input *));
925 static bool
test_eof(input)926 test_eof(input)
927   struct input *input;
928 {
929   int ch;
930 
931   if (buffer.length)
932     return false;
933   if (!input->fp)
934     return separate_files || last_file_with_data_p(input);
935   if (feof(input->fp))
936     return separate_files || last_file_with_data_p(input);
937   if ((ch = getc(input->fp)) == EOF)
938     return separate_files || last_file_with_data_p(input);
939   ungetc(ch, input->fp);
940   return false;
941 }
942 
943 /* Return non-zero if the current line matches the address
944    pointed to by `addr'. */
945 static bool match_an_address_p P_((struct addr *, struct input *));
946 static bool
match_an_address_p(addr,input)947 match_an_address_p(addr, input)
948   struct addr *addr;
949   struct input *input;
950 {
951   switch (addr->addr_type)
952     {
953     case ADDR_IS_NULL:
954       return true;
955 
956     case ADDR_IS_REGEX:
957       return match_regex(addr->addr_regex, line.active, line.length, 0, NULL, 0);
958 
959     case ADDR_IS_NUM_MOD:
960       return (input->line_number >= addr->addr_number
961 	      && ((input->line_number - addr->addr_number) % addr->addr_step) == 0);
962 
963     case ADDR_IS_STEP:
964     case ADDR_IS_STEP_MOD:
965       /* reminder: these are only meaningful for a2 addresses */
966       /* a2->addr_number needs to be recomputed each time a1 address
967          matches for the step and step_mod types */
968       return (addr->addr_number <= input->line_number);
969 
970     case ADDR_IS_LAST:
971       return test_eof(input);
972 
973       /* ADDR_IS_NUM is handled in match_address_p.  */
974     case ADDR_IS_NUM:
975     default:
976       panic("INTERNAL ERROR: bad address type");
977     }
978   /*NOTREACHED*/
979   return false;
980 }
981 
982 /* return non-zero if current address is valid for cmd */
983 static bool match_address_p P_((struct sed_cmd *, struct input *));
984 static bool
match_address_p(cmd,input)985 match_address_p(cmd, input)
986   struct sed_cmd *cmd;
987   struct input *input;
988 {
989   if (!cmd->a1)
990     return true;
991 
992   if (cmd->range_state != RANGE_ACTIVE)
993     {
994       /* Find if we are going to activate a range.  Handle ADDR_IS_NUM
995 	 specially: it represent an "absolute" state, it should not
996 	 be computed like regexes.  */
997       if (cmd->a1->addr_type == ADDR_IS_NUM)
998 	{
999 	  if (!cmd->a2)
1000 	    return (input->line_number == cmd->a1->addr_number);
1001 
1002 	  if (cmd->range_state == RANGE_CLOSED
1003 	      || input->line_number < cmd->a1->addr_number)
1004 	    return false;
1005 	}
1006       else
1007 	{
1008           if (!cmd->a2)
1009 	    return match_an_address_p(cmd->a1, input);
1010 
1011 	  if (!match_an_address_p(cmd->a1, input))
1012             return false;
1013 	}
1014 
1015       /* Ok, start a new range.  */
1016       cmd->range_state = RANGE_ACTIVE;
1017       switch (cmd->a2->addr_type)
1018 	{
1019 	case ADDR_IS_REGEX:
1020 	  /* Always include at least two lines.  */
1021 	  return true;
1022 	case ADDR_IS_NUM:
1023 	  /* Same handling as below, but always include at least one line.  */
1024           if (input->line_number >= cmd->a2->addr_number)
1025 	    cmd->range_state = RANGE_CLOSED;
1026           return true;
1027 	case ADDR_IS_STEP:
1028 	  cmd->a2->addr_number = input->line_number + cmd->a2->addr_step;
1029 	  return true;
1030 	case ADDR_IS_STEP_MOD:
1031 	  cmd->a2->addr_number = input->line_number + cmd->a2->addr_step
1032 				 - (input->line_number%cmd->a2->addr_step);
1033 	  return true;
1034 	default:
1035 	  break;
1036         }
1037     }
1038 
1039   /* cmd->range_state == RANGE_ACTIVE.  Check if the range is
1040      ending; also handle ADDR_IS_NUM specially in this case.  */
1041 
1042   if (cmd->a2->addr_type == ADDR_IS_NUM)
1043     {
1044       /* If the second address is a line number, and if we got past
1045          that line, fail to match (it can happen when you jump
1046 	 over such addresses with `b' and `t'.  Use RANGE_CLOSED
1047          so that the range is not re-enabled anymore.  */
1048       if (input->line_number >= cmd->a2->addr_number)
1049 	cmd->range_state = RANGE_CLOSED;
1050 
1051       return (input->line_number <= cmd->a2->addr_number);
1052    }
1053 
1054   /* Other addresses are treated as usual.  */
1055   if (match_an_address_p(cmd->a2, input))
1056     cmd->range_state = RANGE_CLOSED;
1057 
1058   return true;
1059 }
1060 
1061 
1062 static void do_list P_((int line_len));
1063 static void
do_list(line_len)1064 do_list(line_len)
1065      int line_len;
1066 {
1067   unsigned char *p = CAST(unsigned char *)line.active;
1068   countT len = line.length;
1069   countT width = 0;
1070   char obuf[180];	/* just in case we encounter a 512-bit char (;-) */
1071   char *o;
1072   size_t olen;
1073   FILE *fp = output_file.fp;
1074 
1075   output_missing_newline(&output_file);
1076   for (; len--; ++p) {
1077       o = obuf;
1078 
1079       /* Some locales define 8-bit characters as printable.  This makes the
1080 	 testsuite fail at 8to7.sed because the `l' command in fact will not
1081 	 convert the 8-bit characters. */
1082 #if defined isascii || defined HAVE_ISASCII
1083       if (isascii(*p) && ISPRINT(*p)) {
1084 #else
1085       if (ISPRINT(*p)) {
1086 #endif
1087 	  *o++ = *p;
1088 	  if (*p == '\\')
1089 	    *o++ = '\\';
1090       } else {
1091 	  *o++ = '\\';
1092 	  switch (*p) {
1093 #if defined __STDC__ && __STDC__-0
1094 	    case '\a': *o++ = 'a'; break;
1095 #else /* Not STDC; we'll just assume ASCII */
1096 	    case 007:  *o++ = 'a'; break;
1097 #endif
1098 	    case '\b': *o++ = 'b'; break;
1099 	    case '\f': *o++ = 'f'; break;
1100 	    case '\n': *o++ = 'n'; break;
1101 	    case '\r': *o++ = 'r'; break;
1102 	    case '\t': *o++ = 't'; break;
1103 	    case '\v': *o++ = 'v'; break;
1104 	    default:
1105 	      sprintf(o, "%03o", *p);
1106 	      o += strlen(o);
1107 	      break;
1108 	    }
1109       }
1110       olen = o - obuf;
1111       if (width+olen >= line_len && line_len > 0) {
1112 	  ck_fwrite("\\\n", 1, 2, fp);
1113 	  width = 0;
1114       }
1115       ck_fwrite(obuf, 1, olen, fp);
1116       width += olen;
1117   }
1118   ck_fwrite("$\n", 1, 2, fp);
1119   flush_output (fp);
1120 }
1121 
1122 
1123 static enum replacement_types append_replacement P_((struct line *, struct replacement *,
1124 						     struct re_registers *,
1125 						     enum replacement_types));
1126 static enum replacement_types
append_replacement(buf,p,regs,repl_mod)1127 append_replacement (buf, p, regs, repl_mod)
1128   struct line *buf;
1129   struct replacement *p;
1130   struct re_registers *regs;
1131   enum replacement_types repl_mod;
1132 {
1133   for (; p; p=p->next)
1134     {
1135       int i = p->subst_id;
1136       enum replacement_types curr_type;
1137 
1138       /* Apply a \[lu] modifier that was given earlier, but which we
1139          have not had yet the occasion to apply.  But don't do it
1140          if this replacement has a modifier of its own. */
1141       curr_type = (p->repl_type & REPL_MODIFIERS)
1142         ? p->repl_type
1143         : p->repl_type | repl_mod;
1144 
1145       repl_mod = 0;
1146       if (p->prefix_length)
1147         {
1148           str_append_modified(buf, p->prefix, p->prefix_length,
1149     			      curr_type);
1150           curr_type &= ~REPL_MODIFIERS;
1151         }
1152 
1153       if (0 <= i)
1154 	{
1155           if (regs->end[i] == regs->start[i] && p->repl_type & REPL_MODIFIERS)
1156             /* Save this modifier, we shall apply it later.
1157 	       e.g. in s/()([a-z])/\u\1\2/
1158 	       the \u modifier is applied to \2, not \1 */
1159 	    repl_mod = curr_type & REPL_MODIFIERS;
1160 
1161 	  else if (regs->end[i] != regs->start[i])
1162 	    str_append_modified(buf, line.active + regs->start[i],
1163 			        CAST(size_t)(regs->end[i] - regs->start[i]),
1164 			        curr_type);
1165 	}
1166     }
1167 
1168   return repl_mod;
1169 }
1170 
1171 static void do_subst P_((struct subst *));
1172 static void
do_subst(sub)1173 do_subst(sub)
1174   struct subst *sub;
1175 {
1176   size_t start = 0;	/* where to start scan for (next) match in LINE */
1177   size_t last_end = 0;  /* where did the last successful match end in LINE */
1178   countT count = 0;	/* number of matches found */
1179   bool again = true;
1180 
1181   static struct re_registers regs;
1182 
1183   line_reset(&s_accum, &line);
1184 
1185   /* The first part of the loop optimizes s/xxx// when xxx is at the
1186      start, and s/xxx$// */
1187   if (!match_regex(sub->regx, line.active, line.length, start,
1188 		   &regs, sub->max_id + 1))
1189     return;
1190 
1191   if (!sub->replacement && sub->numb <= 1)
1192     {
1193       if (regs.start[0] == 0 && !sub->global)
1194         {
1195 	  /* We found a match, set the `replaced' flag. */
1196 	  replaced = true;
1197 
1198 	  line.active += regs.end[0];
1199 	  line.length -= regs.end[0];
1200 	  line.alloc -= regs.end[0];
1201 	  goto post_subst;
1202         }
1203       else if (regs.end[0] == line.length)
1204         {
1205 	  /* We found a match, set the `replaced' flag. */
1206 	  replaced = true;
1207 
1208 	  line.length = regs.start[0];
1209 	  goto post_subst;
1210         }
1211     }
1212 
1213   do
1214     {
1215       enum replacement_types repl_mod = 0;
1216 
1217       size_t offset = regs.start[0];
1218       size_t matched = regs.end[0] - regs.start[0];
1219 
1220       /* Copy stuff to the left of this match into the output string. */
1221       if (start < offset)
1222 	str_append(&s_accum, line.active + start, offset - start);
1223 
1224       /* If we're counting up to the Nth match, are we there yet?
1225          And even if we are there, there is another case we have to
1226 	 skip: are we matching an empty string immediately following
1227          another match?
1228 
1229          This latter case avoids that baaaac, when passed through
1230          s,a*,x,g, gives `xbxxcx' instead of xbxcx.  This behavior is
1231          unacceptable because it is not consistently applied (for
1232          example, `baaaa' gives `xbx', not `xbxx'). */
1233       if ((matched > 0 || count == 0 || offset > last_end)
1234 	  && ++count >= sub->numb)
1235         {
1236           /* We found a match, set the `replaced' flag. */
1237           replaced = true;
1238 
1239           /* Now expand the replacement string into the output string. */
1240           repl_mod = append_replacement (&s_accum, sub->replacement, &regs, repl_mod);
1241 	  again = sub->global;
1242         }
1243       else
1244 	{
1245           /* The match was not replaced.  Copy the text until its
1246              end; if it was vacuous, skip over one character and
1247 	     add that character to the output.  */
1248 	  if (matched == 0)
1249 	    {
1250 	      if (start < line.length)
1251 	        matched = 1;
1252 	      else
1253 	        break;
1254 	    }
1255 
1256 	  str_append(&s_accum, line.active + offset, matched);
1257         }
1258 
1259       /* Start after the match.  last_end is the real end of the matched
1260 	 substring, excluding characters that were skipped in case the RE
1261 	 matched the empty string.  */
1262       start = offset + matched;
1263       last_end = regs.end[0];
1264     }
1265   while (again
1266 	 && start <= line.length
1267 	 && match_regex(sub->regx, line.active, line.length, start,
1268 			&regs, sub->max_id + 1));
1269 
1270   /* Copy stuff to the right of the last match into the output string. */
1271   if (start < line.length)
1272     str_append(&s_accum, line.active + start, line.length-start);
1273   s_accum.chomped = line.chomped;
1274 
1275   /* Exchange line and s_accum.  This can be much cheaper
1276      than copying s_accum.active into line.text (for huge lines). */
1277   line_exchange(&line, &s_accum, false);
1278 
1279   /* Finish up. */
1280   if (count < sub->numb)
1281     return;
1282 
1283  post_subst:
1284   if (sub->print & 1)
1285     output_line(line.active, line.length, line.chomped, &output_file);
1286 
1287   if (sub->eval)
1288     {
1289 #ifdef HAVE_POPEN
1290       FILE *pipe_fp;
1291       line_reset(&s_accum, NULL);
1292 
1293       str_append (&line, "", 1);
1294       pipe_fp = popen(line.active, "r");
1295 
1296       if (pipe_fp != NULL)
1297 	{
1298 	  while (!feof (pipe_fp))
1299 	    {
1300 	      char buf[4096];
1301 	      int n = fread (buf, sizeof(char), 4096, pipe_fp);
1302 	      if (n > 0)
1303 		str_append(&s_accum, buf, n);
1304 	    }
1305 
1306 	  pclose (pipe_fp);
1307 
1308 	  /* Exchange line and s_accum.  This can be much cheaper than copying
1309 	     s_accum.active into line.text (for huge lines).  See comment above
1310 	     for 'g' as to while the third argument is incorrect anyway.  */
1311 	  line_exchange(&line, &s_accum, true);
1312 	  if (line.length &&
1313 	      line.active[line.length - 1] == '\n')
1314 	    line.length--;
1315 	}
1316       else
1317 	panic(_("error in subprocess"));
1318 #else
1319       panic(_("option `e' not supported"));
1320 #endif
1321     }
1322 
1323   if (sub->print & 2)
1324     output_line(line.active, line.length, line.chomped, &output_file);
1325   if (sub->outf)
1326     output_line(line.active, line.length, line.chomped, sub->outf);
1327 }
1328 
1329 #ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION
1330 /* Used to attempt a simple-minded optimization. */
1331 
1332 static countT branches;
1333 
1334 static countT count_branches P_((struct vector *));
1335 static countT
count_branches(program)1336 count_branches(program)
1337   struct vector *program;
1338 {
1339   struct sed_cmd *cur_cmd = program->v;
1340   countT isn_cnt = program->v_length;
1341   countT cnt = 0;
1342 
1343   while (isn_cnt-- > 0)
1344     {
1345       switch (cur_cmd->cmd)
1346 	{
1347 	case 'b':
1348 	case 't':
1349 	case 'T':
1350 	case '{':
1351 	  ++cnt;
1352 	}
1353     }
1354   return cnt;
1355 }
1356 
1357 static struct sed_cmd *shrink_program P_((struct vector *, struct sed_cmd *));
1358 static struct sed_cmd *
shrink_program(vec,cur_cmd)1359 shrink_program(vec, cur_cmd)
1360   struct vector *vec;
1361   struct sed_cmd *cur_cmd;
1362 {
1363   struct sed_cmd *v = vec->v;
1364   struct sed_cmd *last_cmd = v + vec->v_length;
1365   struct sed_cmd *p;
1366   countT cmd_cnt;
1367 
1368   for (p=v; p < cur_cmd; ++p)
1369     if (p->cmd != '#')
1370       MEMCPY(v++, p, sizeof *v);
1371   cmd_cnt = v - vec->v;
1372 
1373   for (; p < last_cmd; ++p)
1374     if (p->cmd != '#')
1375       MEMCPY(v++, p, sizeof *v);
1376   vec->v_length = v - vec->v;
1377 
1378   return (0 < vec->v_length) ? (vec->v + cmd_cnt) : CAST(struct sed_cmd *)0;
1379 }
1380 #endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/
1381 
1382 /* Execute the program `vec' on the current input line.
1383    Return exit status if caller should quit, -1 otherwise. */
1384 static int execute_program P_((struct vector *, struct input *));
1385 static int
execute_program(vec,input)1386 execute_program(vec, input)
1387   struct vector *vec;
1388   struct input *input;
1389 {
1390   struct sed_cmd *cur_cmd;
1391   struct sed_cmd *end_cmd;
1392 
1393   cur_cmd = vec->v;
1394   end_cmd = vec->v + vec->v_length;
1395   while (cur_cmd < end_cmd)
1396     {
1397       if (match_address_p(cur_cmd, input) != cur_cmd->addr_bang)
1398 	{
1399 	  switch (cur_cmd->cmd)
1400 	    {
1401 	    case 'a':
1402 	      {
1403 		struct append_queue *aq = next_append_slot();
1404 		aq->text = cur_cmd->x.cmd_txt.text;
1405 		aq->textlen = cur_cmd->x.cmd_txt.text_length;
1406 	      }
1407 	      break;
1408 
1409 	    case '{':
1410 	    case 'b':
1411 	      cur_cmd = vec->v + cur_cmd->x.jump_index;
1412 	      continue;
1413 
1414 	    case '}':
1415 	    case '#':
1416 	    case ':':
1417 	      /* Executing labels and block-ends are easy. */
1418 	      break;
1419 
1420 	    case 'c':
1421 	      if (cur_cmd->range_state != RANGE_ACTIVE)
1422 		output_line(cur_cmd->x.cmd_txt.text,
1423 			    cur_cmd->x.cmd_txt.text_length - 1, true,
1424 			    &output_file);
1425 	      /* POSIX.2 is silent about c starting a new cycle,
1426 		 but it seems to be expected (and make sense). */
1427 	      /* Fall Through */
1428 	    case 'd':
1429 	      return -1;
1430 
1431 	    case 'D':
1432 	      {
1433 		char *p = memchr(line.active, '\n', line.length);
1434 		if (!p)
1435 		  return -1;
1436 
1437 		++p;
1438 		line.alloc -= p - line.active;
1439 		line.length -= p - line.active;
1440 		line.active += p - line.active;
1441 
1442 		/* reset to start next cycle without reading a new line: */
1443 		cur_cmd = vec->v;
1444 		continue;
1445 	      }
1446 
1447 	    case 'e': {
1448 #ifdef HAVE_POPEN
1449 	      FILE *pipe_fp;
1450 	      int cmd_length = cur_cmd->x.cmd_txt.text_length;
1451 	      line_reset(&s_accum, NULL);
1452 
1453 	      if (!cmd_length)
1454 		{
1455 		  str_append (&line, "", 1);
1456 		  pipe_fp = popen(line.active, "r");
1457 		}
1458 	      else
1459 		{
1460 		  cur_cmd->x.cmd_txt.text[cmd_length - 1] = 0;
1461 		  pipe_fp = popen(cur_cmd->x.cmd_txt.text, "r");
1462                   output_missing_newline(&output_file);
1463 		}
1464 
1465 	      if (pipe_fp != NULL)
1466 		{
1467 		  char buf[4096];
1468 		  int n;
1469 		  while (!feof (pipe_fp))
1470 		    if ((n = fread (buf, sizeof(char), 4096, pipe_fp)) > 0)
1471 		      {
1472 			if (!cmd_length)
1473 			  str_append(&s_accum, buf, n);
1474 			else
1475 			  ck_fwrite(buf, 1, n, output_file.fp);
1476 		      }
1477 
1478 		  pclose (pipe_fp);
1479 		  if (!cmd_length)
1480 		    {
1481 		      /* Store into pattern space for plain `e' commands */
1482 		      if (s_accum.length &&
1483 			  s_accum.active[s_accum.length - 1] == '\n')
1484 			s_accum.length--;
1485 
1486 		      /* Exchange line and s_accum.  This can be much
1487 			 cheaper than copying s_accum.active into line.text
1488 			 (for huge lines).  See comment above for 'g' as
1489 			 to while the third argument is incorrect anyway.  */
1490 		      line_exchange(&line, &s_accum, true);
1491 		    }
1492                   else
1493                     flush_output(output_file.fp);
1494 
1495 		}
1496 	      else
1497 		panic(_("error in subprocess"));
1498 #else
1499 	      panic(_("`e' command not supported"));
1500 #endif
1501 	      break;
1502 	    }
1503 
1504 	    case 'g':
1505 	      /* We do not have a really good choice for the third parameter.
1506 		 The problem is that hold space and the input file might as
1507 		 well have different states; copying it from hold space means
1508 		 that subsequent input might be read incorrectly, while
1509 		 keeping it as in pattern space means that commands operating
1510 		 on the moved buffer might consider a wrong character set.
1511 		 We keep it true because it's what sed <= 4.1.5 did.  */
1512 	      line_copy(&hold, &line, true);
1513 	      break;
1514 
1515 	    case 'G':
1516 	      /* We do not have a really good choice for the third parameter.
1517 		 The problem is that hold space and pattern space might as
1518 		 well have different states.  So, true is as wrong as false.
1519 		 We keep it true because it's what sed <= 4.1.5 did, but
1520 		 we could consider having line_ap.  */
1521 	      line_append(&hold, &line, true);
1522 	      break;
1523 
1524 	    case 'h':
1525 	      /* Here, it is ok to have true.  */
1526 	      line_copy(&line, &hold, true);
1527 	      break;
1528 
1529 	    case 'H':
1530 	      /* See comment above for 'G' regarding the third parameter.  */
1531 	      line_append(&line, &hold, true);
1532 	      break;
1533 
1534 	    case 'i':
1535 	      output_line(cur_cmd->x.cmd_txt.text,
1536 			  cur_cmd->x.cmd_txt.text_length - 1,
1537 			  true, &output_file);
1538 	      break;
1539 
1540 	    case 'l':
1541 	      do_list(cur_cmd->x.int_arg == -1
1542 		      ? lcmd_out_line_len
1543 		      : cur_cmd->x.int_arg);
1544 	      break;
1545 
1546 	    case 'L':
1547               output_missing_newline(&output_file);
1548 	      fmt(line.active, line.active + line.length,
1549 		  cur_cmd->x.int_arg == -1
1550 		  ? lcmd_out_line_len
1551 		  : cur_cmd->x.int_arg,
1552 		  output_file.fp);
1553               flush_output(output_file.fp);
1554 	      break;
1555 
1556 	    case 'n':
1557 	      if (!no_default_output)
1558 		output_line(line.active, line.length, line.chomped, &output_file);
1559 	      if (test_eof(input) || !read_pattern_space(input, vec, false))
1560 		return -1;
1561 	      break;
1562 
1563 	    case 'N':
1564 	      str_append(&line, "\n", 1);
1565 
1566               if (test_eof(input) || !read_pattern_space(input, vec, true))
1567                 {
1568                   line.length--;
1569                   if (posixicity == POSIXLY_EXTENDED && !no_default_output)
1570                      output_line(line.active, line.length, line.chomped,
1571                                  &output_file);
1572                   return -1;
1573                 }
1574 	      break;
1575 
1576 	    case 'p':
1577 	      output_line(line.active, line.length, line.chomped, &output_file);
1578 	      break;
1579 
1580 	    case 'P':
1581 	      {
1582 		char *p = memchr(line.active, '\n', line.length);
1583 		output_line(line.active, p ? p - line.active : line.length,
1584 			    p ? true : line.chomped, &output_file);
1585 	      }
1586 	      break;
1587 
1588             case 'q':
1589               if (!no_default_output)
1590                 output_line(line.active, line.length, line.chomped, &output_file);
1591 	      dump_append_queue();
1592 
1593 	    case 'Q':
1594 	      return cur_cmd->x.int_arg == -1 ? 0 : cur_cmd->x.int_arg;
1595 
1596 	    case 'r':
1597 	      if (cur_cmd->x.fname)
1598 		{
1599 		  struct append_queue *aq = next_append_slot();
1600 		  aq->fname = cur_cmd->x.fname;
1601 		}
1602 	      break;
1603 
1604 	    case 'R':
1605 	      if (cur_cmd->x.fp && !feof (cur_cmd->x.fp))
1606 		{
1607 		  struct append_queue *aq;
1608 		  size_t buflen;
1609 		  char *text = NULL;
1610 		  int result;
1611 
1612 		  result = ck_getline (&text, &buflen, cur_cmd->x.fp);
1613 		  if (result != EOF)
1614 		    {
1615 		      aq = next_append_slot();
1616 		      aq->free = true;
1617 		      aq->text = text;
1618 		      aq->textlen = result;
1619 		    }
1620 		}
1621 	      break;
1622 
1623 	    case 's':
1624 	      do_subst(cur_cmd->x.cmd_subst);
1625 	      break;
1626 
1627 	    case 't':
1628 	      if (replaced)
1629 		{
1630 		  replaced = false;
1631 		  cur_cmd = vec->v + cur_cmd->x.jump_index;
1632 		  continue;
1633 		}
1634 	      break;
1635 
1636 	    case 'T':
1637 	      if (!replaced)
1638 		{
1639 		  cur_cmd = vec->v + cur_cmd->x.jump_index;
1640 		  continue;
1641 		}
1642 	      else
1643 		replaced = false;
1644 	      break;
1645 
1646 	    case 'w':
1647 	      if (cur_cmd->x.fp)
1648 		output_line(line.active, line.length,
1649 			    line.chomped, cur_cmd->x.outf);
1650 	      break;
1651 
1652 	    case 'W':
1653 	      if (cur_cmd->x.fp)
1654 	        {
1655 		  char *p = memchr(line.active, '\n', line.length);
1656 		  output_line(line.active, p ? p - line.active : line.length,
1657 			      p ? true : line.chomped, cur_cmd->x.outf);
1658 	        }
1659 	      break;
1660 
1661 	    case 'x':
1662 	      /* See comment above for 'g' regarding the third parameter.  */
1663 	      line_exchange(&line, &hold, false);
1664 	      break;
1665 
1666 	    case 'y':
1667 	      {
1668 #ifdef HAVE_MBRTOWC
1669                if (mb_cur_max > 1)
1670                  {
1671                    int idx, prev_idx; /* index in the input line.  */
1672                    char **trans;
1673                    mbstate_t mbstate;
1674                    memset(&mbstate, 0, sizeof(mbstate_t));
1675                    for (idx = 0; idx < line.length;)
1676                      {
1677                        int mbclen, i;
1678                        mbclen = MBRLEN (line.active + idx, line.length - idx,
1679                                           &mbstate);
1680                        /* An invalid sequence, or a truncated multibyte
1681                           character.  We treat it as a singlebyte character.
1682                        */
1683                        if (mbclen == (size_t) -1 || mbclen == (size_t) -2
1684                            || mbclen == 0)
1685                          mbclen = 1;
1686 
1687                        trans = cur_cmd->x.translatemb;
1688                        /* `i' indicate i-th translate pair.  */
1689                        for (i = 0; trans[2*i] != NULL; i++)
1690                          {
1691                            if (strncmp(line.active + idx, trans[2*i], mbclen) == 0)
1692                              {
1693                                bool move_remain_buffer = false;
1694                                int trans_len = strlen(trans[2*i+1]);
1695 
1696                                if (mbclen < trans_len)
1697                                  {
1698                                    int new_len;
1699                                    new_len = line.length + 1 + trans_len - mbclen;
1700                                    /* We must extend the line buffer.  */
1701                                    if (line.alloc < new_len)
1702                                      {
1703                                        /* And we must resize the buffer.  */
1704                                        resize_line(&line, new_len);
1705                                      }
1706                                    move_remain_buffer = true;
1707                                  }
1708                                else if (mbclen > trans_len)
1709                                  {
1710                                    /* We must truncate the line buffer.  */
1711                                    move_remain_buffer = true;
1712                                  }
1713                                prev_idx = idx;
1714                                if (move_remain_buffer)
1715                                  {
1716                                    int move_len, move_offset;
1717                                    char *move_from, *move_to;
1718                                    /* Move the remaining with \0.  */
1719                                    move_from = line.active + idx + mbclen;
1720                                    move_to = line.active + idx + trans_len;
1721                                    move_len = line.length + 1 - idx - mbclen;
1722                                    move_offset = trans_len - mbclen;
1723                                    memmove(move_to, move_from, move_len);
1724                                    line.length += move_offset;
1725                                    idx += move_offset;
1726                                  }
1727                                strncpy(line.active + prev_idx, trans[2*i+1],
1728                                        trans_len);
1729                                break;
1730                              }
1731                          }
1732                        idx += mbclen;
1733                      }
1734                  }
1735                else
1736 #endif /* HAVE_MBRTOWC */
1737                  {
1738                    unsigned char *p, *e;
1739                    p = CAST(unsigned char *)line.active;
1740                    for (e=p+line.length; p<e; ++p)
1741                      *p = cur_cmd->x.translate[*p];
1742                  }
1743 	      }
1744 	      break;
1745 
1746 	    case 'z':
1747 	      line.length = 0;
1748 	      break;
1749 
1750 	    case '=':
1751               output_missing_newline(&output_file);
1752               fprintf(output_file.fp, "%lu\n",
1753                       CAST(unsigned long)input->line_number);
1754               flush_output(output_file.fp);
1755 	      break;
1756 
1757 	    default:
1758 	      panic("INTERNAL ERROR: Bad cmd %c", cur_cmd->cmd);
1759 	    }
1760 	}
1761 
1762 #ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION
1763       /* If our top-level program consists solely of commands with
1764          ADDR_IS_NUM addresses then once we past the last mentioned
1765          line we should be able to quit if no_default_output is true,
1766          or otherwise quickly copy input to output.  Now whether this
1767          optimization is a win or not depends on how cheaply we can
1768          implement this for the cases where it doesn't help, as
1769          compared against how much time is saved.  One semantic
1770          difference (which I think is an improvement) is that *this*
1771          version will terminate after printing line two in the script
1772          "yes | sed -n 2p".
1773 
1774          Don't use this when in-place editing is active, because line
1775          numbers restart each time then. */
1776       else if (!separate_files)
1777 	{
1778 	  if (cur_cmd->a1->addr_type == ADDR_IS_NUM
1779 	      && (cur_cmd->a2
1780 		  ? cur_cmd->range_state == RANGE_CLOSED
1781 		  : cur_cmd->a1->addr_number < input->line_number))
1782 	    {
1783 	      /* Skip this address next time */
1784 	      cur_cmd->addr_bang = !cur_cmd->addr_bang;
1785 	      cur_cmd->a1->addr_type = ADDR_IS_NULL;
1786 	      if (cur_cmd->a2)
1787 		cur_cmd->a2->addr_type = ADDR_IS_NULL;
1788 
1789 	      /* can we make an optimization? */
1790 	      if (cur_cmd->addr_bang)
1791 		{
1792 		  if (cur_cmd->cmd == 'b' || cur_cmd->cmd == 't'
1793 		      || cur_cmd->cmd == 'T' || cur_cmd->cmd == '}')
1794 		    branches--;
1795 
1796 		  cur_cmd->cmd = '#';	/* replace with no-op */
1797 	          if (branches == 0)
1798 		    cur_cmd = shrink_program(vec, cur_cmd);
1799 		  if (!cur_cmd && no_default_output)
1800 		    return 0;
1801 		  end_cmd = vec->v + vec->v_length;
1802 		  if (!cur_cmd)
1803 		    cur_cmd = end_cmd;
1804 		  continue;
1805 		}
1806 	    }
1807 	}
1808 #endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/
1809 
1810       /* this is buried down here so that a "continue" statement can skip it */
1811       ++cur_cmd;
1812     }
1813 
1814     if (!no_default_output)
1815       output_line(line.active, line.length, line.chomped, &output_file);
1816     return -1;
1817 }
1818 
1819 
1820 
1821 /* Apply the compiled script to all the named files. */
1822 int
process_files(the_program,argv)1823 process_files(the_program, argv)
1824   struct vector *the_program;
1825   char **argv;
1826 {
1827   static char dash[] = "-";
1828   static char *stdin_argv[2] = { dash, NULL };
1829   struct input input;
1830   int status;
1831 
1832   line_init(&line, NULL, INITIAL_BUFFER_SIZE);
1833   line_init(&hold, NULL, 0);
1834   line_init(&buffer, NULL, 0);
1835 
1836 #ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION
1837   branches = count_branches(the_program);
1838 #endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/
1839   input.reset_at_next_file = true;
1840   if (argv && *argv)
1841     input.file_list = argv;
1842   else if (in_place_extension)
1843     panic(_("no input files"));
1844   else
1845     input.file_list = stdin_argv;
1846 
1847   input.bad_count = 0;
1848   input.line_number = 0;
1849   input.read_fn = read_always_fail;
1850   input.fp = NULL;
1851 
1852   status = EXIT_SUCCESS;
1853   while (read_pattern_space(&input, the_program, false))
1854     {
1855       status = execute_program(the_program, &input);
1856       if (status == -1)
1857 	status = EXIT_SUCCESS;
1858       else
1859 	break;
1860     }
1861   closedown(&input);
1862 
1863 #ifdef DEBUG_LEAKS
1864   /* We're about to exit, so these free()s are redundant.
1865      But if we're running under a memory-leak detecting
1866      implementation of malloc(), we want to explicitly
1867      deallocate in order to avoid extraneous noise from
1868      the allocator. */
1869   release_append_queue();
1870   FREE(buffer.text);
1871   FREE(hold.text);
1872   FREE(line.text);
1873   FREE(s_accum.text);
1874 #endif /*DEBUG_LEAKS*/
1875 
1876   if (input.bad_count)
1877     status = 2;
1878 
1879   return status;
1880 }
1881