Lines Matching +full:to +full:- +full:regex +full:- +full:range
1 /* sed.c - stream editor. Thing that does s/// and other stuff.
11 * What's the right thing to do for -i when write fails? Skip to next?
12 * test '//q' with no previous regex, also repeat previous regex?
14 USE_SED(NEWTOY(sed, "(help)(version)e*f*i:;nErz(null-data)[+Er]", TOYFLAG_BIN|TOYFLAG_LOCALE|TOYFLA…
20 usage: sed [-inrzE] [-e SCRIPT]...|SCRIPT [-f SCRIPT_FILE]... [FILE...]
22 Stream editor. Apply one or more editing SCRIPTs to each line of input
23 (from FILE or stdin) producing output (by default to stdout).
25 -e Add SCRIPT to list
26 -f Add contents of SCRIPT_FILE to list
27 -i Edit each file in place (-iEXT keeps backup file with extension EXT)
28 -n No default output (use the p command to output matched lines)
29 -r Use extended regular expression syntax
30 -E POSIX alias for -r
31 -s Treat input files separately (implied by -i)
32 -z Use \0 rather than \n as the input line separator
35 semicolons. All -e SCRIPTs are concatenated together as if separated
36 by newlines, followed by all lines from -f SCRIPT_FILEs, in order.
37 If no -e or -f SCRIPTs are specified, the first argument is the SCRIPT.
39 Each COMMAND may be preceded by an address which limits the command to
40 apply only to the specified line(s). Commands without an address apply to
47 matches the last line of input. (In -s or -i mode this matches the last
50 everything from the first address to the second address (inclusive). If
51 both addresses are regular expressions, more than one range of lines in
52 each file can match. The second address can be +N to end N lines later.
56 Backslashes may be used to escape the delimiter if it occurs in the
57 regex, and for the usual printf escapes (\abcefnrtv and octal, hex,
58 and unicode). An empty regex repeats the previous one. ADDRESS regexes
59 (above) require the first delimiter to be escaped with a backslash when
60 it isn't a forward slash (to distinguish it from the COMMANDs below).
63 processes it, and either writes it to the output or discards it before
76 the block are only run for lines within the block's address range.
80 d Delete this line and move on to the next one
88 G Get remembered line (appending to current line)
92 H Remember this line (appending to remembered line, if any)
95 nonprintable characters, wrapping lines to terminal width with a
96 backslash, and appending $ to actual end of line.
101 N Append next line of input to this line, separated by a newline
107 P Print this line up to first newline (from "N")
115 The following commands (may) take an argument. The "text" arguments (to
116 the "a", "b", and "c" commands) may end with an unescaped "\" to append
120 a [text] Append text to output before attempting to read next line
122 b [label] Branch, jumps to :label (or with no label, to end of SCRIPT)
124 c [text] Delete line, output text at end of matching address range
129 r [file] Append contents of file to output before attempting to read
132 s/S/R/F Search for regex S, replace matched text with R using flags F.
134 backslash) is the delimiter, escape with \ to use normally.
136 The replacement text may contain "&" to substitute the matched
138 \9 to substitute a parenthetical subexpression in the regex.
144 [0-9] A number, substitute only that occurrence of pattern
148 w [file] Write (append) line to file if match replaced
150 t [label] Test, jump to :label only if an "s" command found a match in
155 w [file] Write (append) line to file
157 y/old/new/ Change each character in 'old' to corresponding character
165 Deviations from POSIX: allow extended regular expressions with -r,
166 editing in place with -i, separate with -s, NUL-separated input with -z,
168 2-address anywhere an address is allowed, "T" command, multiline
169 continuations for [abc], \; to end [abc] argument before end of line.
191 // regex or string starts, ala offset+(char *)struct, because we remalloc()
192 // these to expand them for multiline inputs, and pointers would have to be
200 int rmatch[2]; // offset of regex struct for prefix matches (/abc/,/def/p)
217 if (eol) line[len-1] = old; in emit()
227 // Extend allocation to include new string, with newline between if newlen<0
234 if (newline) newlen = -newlen; in extend_string()
243 // An empty regex repeats the previous one
247 if (!TT.lastregex) error_exit("no previous regex"); in get_regex()
254 // Apply pattern to line from input file
267 // Ignore EOF for all files before last unless -i in sed_line()
271 // pline at EOF to flush last line). Note that only end of _last_ input in sed_line()
272 // file matches $ (unless we're doing -i). in sed_line()
282 if (line[len-1] == '\n') line[--len] = eol++; in sed_line()
285 // The restart-1 is because we added one to make sure it wasn't NULL, in sed_line()
287 command = TT.restart ? ((struct sedcmd *)TT.restart)-1 : (void *)TT.pattern; in sed_line()
291 char *str, c = command->c; in sed_line()
293 // Have we got a line or regex matching range for this rule? in sed_line()
294 if (*command->lmatch || *command->rmatch) { in sed_line()
299 if (command->hit) { in sed_line()
300 if (!(lm = command->lmatch[1])) { in sed_line()
301 if (!command->rmatch[1]) command->hit = 0; in sed_line()
303 void *rm = get_regex(command, command->rmatch[1]); in sed_line()
305 // regex match end includes matching line, so defer deactivation in sed_line()
308 } else if (lm > 0 && lm < TT.count) command->hit = 0; in sed_line()
309 else if (lm < -1 && TT.count == command->hit+(-lm-1)) command->hit = 0; in sed_line()
313 if (!(lm = *command->lmatch)) { in sed_line()
314 void *rm = get_regex(command, *command->rmatch); in sed_line()
317 command->hit = TT.count; in sed_line()
318 } else if (lm == TT.count || (lm == -1 && !pline)) in sed_line()
319 command->hit = TT.count; in sed_line()
321 if (!command->lmatch[1] && !command->rmatch[1]) miss = 1; in sed_line()
325 lm = !(command->not^!!command->hit); in sed_line()
327 // Deferred disable from regex end match in sed_line()
328 if (miss || command->lmatch[1] == TT.count) command->hit = 0; in sed_line()
336 command = command->next; in sed_line()
337 if (command->c == '{') curly++; in sed_line()
338 if (command->c == '}') curly--; in sed_line()
341 command = command->next; in sed_line()
348 command = command->next; in sed_line()
356 if (command->arg1) a->str = command->arg1+(char *)command; in sed_line()
357 a->file = c=='r'; in sed_line()
364 if (!command->arg1) break; in sed_line()
365 str = command->arg1+(char *)command; in sed_line()
366 for (command = (void *)TT.pattern; command; command = command->next) in sed_line()
367 if (command->c == ':' && !strcmp(command->arg1+(char *)command, str)) in sed_line()
372 str = command->arg1+(char *)command; in sed_line()
373 if (!command->hit) emit(str, strlen(str), 1); in sed_line()
382 // Delete up to \n or end of buffer in sed_line()
384 while ((str-line)<len) if (*(str++) == '\n') break; in sed_line()
385 len -= str - line; in sed_line()
417 str = command->arg1+(char *)command; in sed_line()
425 if (TT.xx > sizeof(toybuf)-10) TT.xx = sizeof(toybuf)-10; in sed_line()
426 if (TT.xx > 4) TT.xx -= 4; in sed_line()
436 if (x != -1) { in sed_line()
445 TT.restart = command->next+1; in sed_line()
450 // we need to actually read ahead to get N;$p EOF detection right. in sed_line()
452 TT.restart = command->next+1; in sed_line()
453 extend_string(&line, TT.nextline, len, -TT.nextlen); in sed_line()
465 if (emit(line, l ? l-line : len, eol)) break; in sed_line()
469 if (!toys.exitval && command->arg1) in sed_line()
470 toys.exitval = atoi(command->arg1+(char *)command); in sed_line()
477 char *rline = line, *new = command->arg2 + (char *)command, *l2 = 0; in sed_line()
479 regex_t *reg = get_regex(command, command->arg1); in sed_line()
483 // Loop finding match in remaining line (up to remaining len) in sed_line()
484 while (!regexec0(reg, rline, len-(rline-line), 10, match, mflags)) { in sed_line()
488 mlen = match[0].rm_eo-match[0].rm_so; in sed_line()
490 if (rline-line == len) break; in sed_line()
497 off = command->sflags>>3; in sed_line()
511 int cc = -1; in sed_line()
514 else if (new[off] == '\\') cc = new[++off] - '0'; in sed_line()
519 newlen += match[cc].rm_eo-match[cc].rm_so; in sed_line()
522 // Copy changed data to new string in sed_line()
525 l2l += newlen-mlen; in sed_line()
537 cc = new[++off] - '0'; in sed_line()
540 l2[l2used+mlen-1] = new[off]; in sed_line()
543 } else if (cc > reg->re_nsub) error_exit("no s//\\%d/", cc); in sed_line()
550 if (match[cc].rm_so != -1) { in sed_line()
551 ll = match[cc].rm_eo-match[cc].rm_so; in sed_line()
560 if (!(command->sflags & 2)) break; in sed_line()
566 mlen = len-(rline-line); in sed_line()
575 if (command->sflags & 4) emit(line, len, eol); in sed_line()
578 if (command->w) goto writenow; in sed_line()
590 name = command->w + (char *)command; in sed_line()
597 perror_exit("w '%s'", command->arg1+(char *)command); in sed_line()
598 *(--name) = TT.noeol; in sed_line()
610 char *from, *to = (char *)command; in sed_line() local
613 from = to+command->arg1; in sed_line()
614 to += command->arg2; in sed_line()
618 if (j != -1) line[i] = to[j]; in sed_line()
625 command = command->next; in sed_line()
632 struct append *a = append->next; in sed_line()
634 if (append->file) { in sed_line()
635 int fd = open(append->str, O_RDONLY); in sed_line()
638 if (fd != -1) { in sed_line()
644 } else if (append->str) emit(append->str, strlen(append->str), 1); in sed_line()
660 if (!fd) return error_msg("-i on stdin"); in do_sed_file()
663 for (command = (void *)TT.pattern; command; command = command->next) in do_sed_file()
664 command->hit = 0; in do_sed_file()
674 replace_tempfile(-1, TT.fdout, &tmp); in do_sed_file()
682 // returns processed copy of string (0 if error), *pstr advances to next
687 char *to, *from, mode = 0, d; in unescape_delimited_string() local
697 to = delim = xmalloc(strlen(*pstr)+1); in unescape_delimited_string()
702 // delimiter in regex character range doesn't count in unescape_delimited_string()
706 if (from[1]=='-' || from[1]==']') *(to++) = *(from++); in unescape_delimited_string()
708 *(to++) = *(from++); in unescape_delimited_string()
714 *(to++) = *(from++); in unescape_delimited_string()
717 // Length 1 range (X-X with same X) is "undefined" and makes regcomp err, in unescape_delimited_string()
718 // but the perl build does it, so we need to filter it out. in unescape_delimited_string()
719 } else if (mode && *from == '-' && from[-1] == from[1]) { in unescape_delimited_string()
727 else if (from[1]=='\\') *(to++) = *(from++); in unescape_delimited_string()
732 *(to++) = c; in unescape_delimited_string()
735 } else if (!mode) *(to++) = *(from++); in unescape_delimited_string()
738 *(to++) = *(from++); in unescape_delimited_string()
740 *to = 0; in unescape_delimited_string()
755 if (len && line[len-1]=='\n') line[--len] = 0; in parse_pattern()
757 // Append this line to previous multiline command? (hit indicates type.) in parse_pattern()
759 // sed_line() it means the match range attached to this command in parse_pattern()
761 if (command && command->prev->hit) { in parse_pattern()
762 // Remove half-finished entry from list so remalloc() doesn't confuse it in parse_pattern()
763 TT.pattern = TT.pattern->prev; in parse_pattern()
765 c = command->c; in parse_pattern()
767 reg += command->arg1 + strlen(reg + command->arg1); in parse_pattern()
770 // TODO: using 256 to indicate 'a' means our s/// delimiter can't be in parse_pattern()
772 if (command->hit < 256) goto resume_s; in parse_pattern()
797 // Parse address range (if any) in parse_pattern()
804 command->lmatch[i] = -2-strtol(line, &line, 0); in parse_pattern()
805 } else if (isdigit(*line)) command->lmatch[i] = strtol(line, &line, 0); in parse_pattern()
807 command->lmatch[i] = -1; in parse_pattern()
813 if (!*s) command->rmatch[i] = 0; in parse_pattern()
816 command->rmatch[i] = reg-toybuf; in parse_pattern()
827 command->not = 1; in parse_pattern()
833 c = command->c = *(line++); in parse_pattern()
838 command = xmemdup(toybuf, reg-toybuf); in parse_pattern()
839 reg = (reg-toybuf) + (char *)command; in parse_pattern()
844 if (!TT.nextlen--) break; in parse_pattern()
851 // so let's fill out arg2 first (since the regex part can't be multiple in parse_pattern()
855 command->arg2 = reg - (char *)command; in parse_pattern()
860 command->arg1 = reg-(char *)command; in parse_pattern()
861 command->hit = delim; in parse_pattern()
863 // get replacement - don't replace escapes yet because \1 and \& need in parse_pattern()
866 while (*end != command->hit) { in parse_pattern()
870 end[-1] = '\n'; in parse_pattern()
877 reg = extend_string((void *)&command, line, reg-(char *)command,end-line); in parse_pattern()
880 if (*line == command->hit) command->hit = 0; in parse_pattern()
883 reg--; in parse_pattern()
889 i = command->arg1; in parse_pattern()
890 command->arg1 = command->arg2; in parse_pattern()
891 command->arg2 = i; in parse_pattern()
899 if (0 <= (l = stridx("igp", *line))) command->sflags |= 1<<l; in parse_pattern()
900 else if (!(command->sflags>>3) && 0<(l = strtol(line, &line, 10))) { in parse_pattern()
901 command->sflags |= l << 3; in parse_pattern()
902 line--; in parse_pattern()
906 // We deferred actually parsing the regex until we had the s///i flag in parse_pattern()
908 if (!*TT.remember) command->arg1 = 0; in parse_pattern()
909 else xregcomp((void *)(command->arg1 + (char *)command), TT.remember, in parse_pattern()
910 (REG_EXTENDED*!!FLAG(r))|((command->sflags&1)*REG_ICASE)); in parse_pattern()
922 // eol status, and to retain the filename for error messages, we'd need in parse_pattern()
923 // to go up to arg5 just for this. Compromise: dynamically allocate the in parse_pattern()
935 command->w = reg - (char *)command; in parse_pattern()
936 command = xrealloc(command, command->w+(cc-line)+6); in parse_pattern()
937 reg = command->w + (char *)command; in parse_pattern()
953 command->arg1 = reg-(char *)command; in parse_pattern()
955 reg = extend_string((void *)&command, s, reg-(char *)command, len); in parse_pattern()
957 command->arg2 = reg-(char *)command; in parse_pattern()
960 reg = extend_string((void *)&command, s, reg-(char*)command, len); in parse_pattern()
971 command->hit = 0; in parse_pattern()
973 // btTqQ: end with space or semicolon, aicrw continue to newline. in parse_pattern()
977 else if (!command->arg1) break; in parse_pattern()
988 // Extend allocation to include new string. We use offsets instead of in parse_pattern()
989 // pointers so realloc() moving stuff doesn't break things. Ok to write in parse_pattern()
990 // \n over NUL terminator because call to extend_string() adds it back. in parse_pattern()
991 if (!command->arg1) command->arg1 = reg - (char*)command; in parse_pattern()
992 else if (*(command->arg1+(char *)command)) *(reg++) = '\n'; in parse_pattern()
994 command->arg1 = 0; in parse_pattern()
997 reg = extend_string((void *)&command, line, reg - (char *)command, end); in parse_pattern()
999 // Recopy data to remove escape sequences and handle line continuation. in parse_pattern()
1001 reg -= end+1; in parse_pattern()
1002 for (i = end; i; i--) { in parse_pattern()
1005 // escape at end of line: resume if -e escaped literal newline, in parse_pattern()
1007 if (!--i) { in parse_pattern()
1008 *--reg = 0; in parse_pattern()
1013 command->hit = 256; in parse_pattern()
1016 if (!(reg[-1] = unescape(*line))) reg[-1] = *line; in parse_pattern()
1028 error_exit("bad pattern '%s'@%ld (%c)", errstart, line-errstart+1L, *line); in parse_pattern()
1038 // Lie to autoconf when it asks stupid questions, so configure regexes in sed_main()
1046 // Handling our own --version means we handle our own --help too. in sed_main()
1051 // If no -e or -f, first argument is the pattern. in sed_main()
1054 (TT.e = xzalloc(sizeof(struct arg_list)))->arg = *(args++); in sed_main()
1057 // Option parsing infrastructure can't interlace "-e blah -f blah -e blah" in sed_main()
1058 // so handle all -e, then all -f. (At least the behavior's consistent.) in sed_main()
1060 for (al = TT.e; al; al = al->next) parse_pattern(&al->arg, strlen(al->arg)); in sed_main()
1062 for (al = TT.f; al; al = al->next) in sed_main()
1063 do_lines(xopenro(al->arg), TT.delim, parse_pattern); in sed_main()
1073 // Provide EOF flush at end of cumulative input for non-i mode. in sed_main()
1079 // todo: need to close fd when done for TOYBOX_FREE? in sed_main()