/* Pattern Matchers for Regular Expressions. Copyright (C) 1992, 1998, 2000, 2005-2006, 2010, 2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifdef HAVE_CONFIG_H # include #endif /* Specification. */ #include "libgrep.h" #include #include #include #include #include #include "error.h" #include "exitfail.h" #include "xalloc.h" #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII)) # define IN_CTYPE_DOMAIN(c) 1 #else # define IN_CTYPE_DOMAIN(c) isascii(c) #endif #define ISALNUM(C) (IN_CTYPE_DOMAIN (C) && isalnum (C)) #define IS_WORD_CONSTITUENT(C) (ISALNUM(C) || (C) == '_') struct patterns { /* Regex compiled regexp. */ struct re_pattern_buffer regexbuf; struct re_registers regs; /* This is here on account of a BRAIN-DEAD Q@#%!# library interface in regex.c. */ }; struct compiled_regex { bool match_words; bool match_lines; char eolbyte; /* The Regex compiled patterns. */ struct patterns *patterns; size_t pcount; }; static void * compile (const char *pattern, size_t pattern_size, bool match_icase, bool match_words, bool match_lines, char eolbyte, reg_syntax_t syntax) { struct compiled_regex *cregex; cregex = XMALLOC (struct compiled_regex); memset (cregex, '\0', sizeof (struct compiled_regex)); cregex->match_words = match_words; cregex->match_lines = match_lines; cregex->eolbyte = eolbyte; cregex->patterns = NULL; cregex->pcount = 0; re_set_syntax (syntax); /* For GNU regex compiler we have to pass the patterns separately to detect errors like "[\nallo\n]\n". The patterns here are "[", "allo" and "]" GNU regex should have raised a syntax error. The same for backref, where the backref should have been local to each pattern. */ { const char *sep; size_t total = pattern_size; const char *motif = pattern; do { size_t len; const char *err; sep = (const char *) memchr (motif, '\n', total); if (sep) { len = sep - motif; sep++; total -= (len + 1); } else { len = total; total = 0; } cregex->patterns = xrealloc (cregex->patterns, (cregex->pcount + 1) * sizeof (struct patterns)); memset (&cregex->patterns[cregex->pcount], '\0', sizeof (struct patterns)); if ((err = re_compile_pattern (motif, len, &cregex->patterns[cregex->pcount].regexbuf)) != NULL) error (exit_failure, 0, "%s", err); cregex->pcount++; motif = sep; } while (sep && total != 0); } return cregex; } static void * Gcompile (const char *pattern, size_t pattern_size, bool match_icase, bool match_words, bool match_lines, char eolbyte) { return compile (pattern, pattern_size, match_icase, match_words, match_lines, eolbyte, RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE); } static void * Ecompile (const char *pattern, size_t pattern_size, bool match_icase, bool match_words, bool match_lines, char eolbyte) { return compile (pattern, pattern_size, match_icase, match_words, match_lines, eolbyte, RE_SYNTAX_POSIX_EGREP); } static void * AWKcompile (const char *pattern, size_t pattern_size, bool match_icase, bool match_words, bool match_lines, char eolbyte) { return compile (pattern, pattern_size, match_icase, match_words, match_lines, eolbyte, RE_SYNTAX_AWK); } static size_t EGexecute (const void *compiled_pattern, const char *buf, size_t buf_size, size_t *match_size, bool exact) { struct compiled_regex *cregex = (struct compiled_regex *) compiled_pattern; char eol = cregex->eolbyte; register const char *buflim = buf + buf_size; register const char *beg; register const char *end; for (beg = buf; beg < buflim; beg = end) { size_t i; end = (const char *) memchr (beg, eol, buflim - beg); if (end == NULL) end = buflim; /* Here, either end < buflim && *end == eol, or end == buflim. */ for (i = 0; i < cregex->pcount; i++) { int start, len; cregex->patterns[i].regexbuf.not_eol = 0; if (0 <= (start = re_search (&cregex->patterns[i].regexbuf, beg, end - beg, 0, end - beg, &cregex->patterns[i].regs))) { len = cregex->patterns[i].regs.end[0] - start; if (exact) { *match_size = len; return start; } if (cregex->match_lines) { if (len == end - beg) /* implies start == 0 */ goto success; } else if (cregex->match_words) { /* If -w, check if the match aligns with word boundaries. We do this iteratively because: (a) the line may contain more than one occurence of the pattern, and (b) Several alternatives in the pattern might be valid at a given point, and we may need to consider a shorter one to find a word boundary. */ while (start >= 0) { if ((start == 0 || !IS_WORD_CONSTITUENT ((unsigned char) beg[start - 1])) && (start + len == end - beg || !IS_WORD_CONSTITUENT ((unsigned char) beg[start + len]))) goto success; if (len > 0) { /* Try a shorter length anchored at the same place. */ --len; cregex->patterns[i].regexbuf.not_eol = 1; len = re_match (&cregex->patterns[i].regexbuf, beg, start + len, start, &cregex->patterns[i].regs); } if (len <= 0) { /* Try looking further on. */ if (start == end - beg) break; ++start; cregex->patterns[i].regexbuf.not_eol = 0; start = re_search (&cregex->patterns[i].regexbuf, beg, end - beg, start, end - beg - start, &cregex->patterns[i].regs); len = cregex->patterns[i].regs.end[0] - start; } } } else goto success; } } if (end < buflim) end++; } return (size_t) -1; success: *match_size = end - beg; return beg - buf; } static void EGfree (void *compiled_pattern) { struct compiled_regex *cregex = (struct compiled_regex *) compiled_pattern; free (cregex->patterns); free (cregex); } /* POSIX Basic Regular Expressions */ matcher_t matcher_grep = { Gcompile, EGexecute, EGfree }; /* POSIX Extended Regular Expressions */ matcher_t matcher_egrep = { Ecompile, EGexecute, EGfree }; /* AWK Regular Expressions */ matcher_t matcher_awk = { AWKcompile, EGexecute, EGfree };