1 /* Pattern Matcher for Fixed String search.
2 Copyright (C) 1992, 1998, 2000, 2005-2006, 2010, 2013 Free Software
3 Foundation, Inc.
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
21
22 /* Specification. */
23 #include "libgrep.h"
24
25 #include <ctype.h>
26 #include <limits.h>
27 #include <stdbool.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31 #if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC
32 /* We can handle multibyte string. */
33 # define MBS_SUPPORT
34 # include <wchar.h>
35 # include <wctype.h>
36 #endif
37
38 #include "error.h"
39 #include "exitfail.h"
40 #include "xalloc.h"
41 #include "kwset.h"
42 #include "gettext.h"
43 #define _(str) gettext (str)
44
45 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
46 # define IN_CTYPE_DOMAIN(c) 1
47 #else
48 # define IN_CTYPE_DOMAIN(c) isascii(c)
49 #endif
50 #define ISUPPER(C) (IN_CTYPE_DOMAIN (C) && isupper (C))
51 #define TOLOWER(C) (ISUPPER(C) ? tolower(C) : (C))
52 #define ISALNUM(C) (IN_CTYPE_DOMAIN (C) && isalnum (C))
53 #define IS_WORD_CONSTITUENT(C) (ISALNUM(C) || (C) == '_')
54
55 #define NCHAR (UCHAR_MAX + 1)
56
57 struct compiled_kwset {
58 kwset_t kwset;
59 char *trans;
60 bool match_words;
61 bool match_lines;
62 char eolbyte;
63 };
64
65 static void
kwsinit(struct compiled_kwset * ckwset,bool match_icase,bool match_words,bool match_lines,char eolbyte)66 kwsinit (struct compiled_kwset *ckwset,
67 bool match_icase, bool match_words, bool match_lines, char eolbyte)
68 {
69 if (match_icase)
70 {
71 int i;
72
73 ckwset->trans = XNMALLOC (NCHAR, char);
74 for (i = 0; i < NCHAR; i++)
75 ckwset->trans[i] = TOLOWER (i);
76 ckwset->kwset = kwsalloc (ckwset->trans);
77 }
78 else
79 {
80 ckwset->trans = NULL;
81 ckwset->kwset = kwsalloc (NULL);
82 }
83 if (ckwset->kwset == NULL)
84 error (exit_failure, 0, _("memory exhausted"));
85 ckwset->match_words = match_words;
86 ckwset->match_lines = match_lines;
87 ckwset->eolbyte = eolbyte;
88 }
89
90 static void *
Fcompile(const char * pattern,size_t pattern_size,bool match_icase,bool match_words,bool match_lines,char eolbyte)91 Fcompile (const char *pattern, size_t pattern_size,
92 bool match_icase, bool match_words, bool match_lines,
93 char eolbyte)
94 {
95 struct compiled_kwset *ckwset;
96 const char *beg;
97 const char *err;
98
99 ckwset = XMALLOC (struct compiled_kwset);
100 kwsinit (ckwset, match_icase, match_words, match_lines, eolbyte);
101
102 beg = pattern;
103 do
104 {
105 const char *lim;
106
107 for (lim = beg; lim < pattern + pattern_size && *lim != '\n'; ++lim)
108 ;
109 if ((err = kwsincr (ckwset->kwset, beg, lim - beg)) != NULL)
110 error (exit_failure, 0, "%s", err);
111 if (lim < pattern + pattern_size)
112 ++lim;
113 beg = lim;
114 }
115 while (beg < pattern + pattern_size);
116
117 if ((err = kwsprep (ckwset->kwset)) != NULL)
118 error (exit_failure, 0, "%s", err);
119 return ckwset;
120 }
121
122 #ifdef MBS_SUPPORT
123 /* This function allocate the array which correspond to "buf".
124 Then this check multibyte string and mark on the positions which
125 are not singlebyte character nor the first byte of a multibyte
126 character. Caller must free the array. */
127 static char*
check_multibyte_string(const char * buf,size_t buf_size)128 check_multibyte_string (const char *buf, size_t buf_size)
129 {
130 char *mb_properties = (char *) malloc (buf_size);
131 mbstate_t cur_state;
132 int i;
133
134 memset (&cur_state, 0, sizeof (mbstate_t));
135 memset (mb_properties, 0, sizeof (char) * buf_size);
136 for (i = 0; i < buf_size ;)
137 {
138 size_t mbclen;
139 mbclen = mbrlen (buf + i, buf_size - i, &cur_state);
140
141 if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
142 {
143 /* An invalid sequence, or a truncated multibyte character.
144 We treat it as a singlebyte character. */
145 mbclen = 1;
146 }
147 mb_properties[i] = mbclen;
148 i += mbclen;
149 }
150
151 return mb_properties;
152 }
153 #endif
154
155 static size_t
Fexecute(const void * compiled_pattern,const char * buf,size_t buf_size,size_t * match_size,bool exact)156 Fexecute (const void *compiled_pattern, const char *buf, size_t buf_size,
157 size_t *match_size, bool exact)
158 {
159 struct compiled_kwset *ckwset = (struct compiled_kwset *) compiled_pattern;
160 char eol = ckwset->eolbyte;
161 register const char *buflim = buf + buf_size;
162 register const char *beg;
163 register size_t len;
164 #ifdef MBS_SUPPORT
165 char *mb_properties;
166 if (MB_CUR_MAX > 1)
167 mb_properties = check_multibyte_string (buf, buf_size);
168 #endif /* MBS_SUPPORT */
169
170 for (beg = buf; beg <= buflim; ++beg)
171 {
172 struct kwsmatch kwsmatch;
173 size_t offset = kwsexec (ckwset->kwset, beg, buflim - beg, &kwsmatch);
174 if (offset == (size_t) -1)
175 {
176 #ifdef MBS_SUPPORT
177 if (MB_CUR_MAX > 1)
178 free (mb_properties);
179 #endif /* MBS_SUPPORT */
180 return offset;
181 }
182 #ifdef MBS_SUPPORT
183 if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0)
184 continue; /* It is a part of multibyte character. */
185 #endif /* MBS_SUPPORT */
186 beg += offset;
187 len = kwsmatch.size[0];
188 if (exact)
189 {
190 *match_size = len;
191 #ifdef MBS_SUPPORT
192 if (MB_CUR_MAX > 1)
193 free (mb_properties);
194 #endif /* MBS_SUPPORT */
195 return beg - buf;
196 }
197 if (ckwset->match_lines)
198 {
199 if (beg > buf && beg[-1] != eol)
200 continue;
201 if (beg + len < buflim && beg[len] != eol)
202 continue;
203 goto success;
204 }
205 else if (ckwset->match_words)
206 {
207 register const char *curr;
208 for (curr = beg; len; )
209 {
210 if (curr > buf && IS_WORD_CONSTITUENT ((unsigned char) curr[-1]))
211 break;
212 if (curr + len < buflim
213 && IS_WORD_CONSTITUENT ((unsigned char) curr[len]))
214 {
215 offset = kwsexec (ckwset->kwset, beg, --len, &kwsmatch);
216 if (offset == (size_t) -1)
217 {
218 #ifdef MBS_SUPPORT
219 if (MB_CUR_MAX > 1)
220 free (mb_properties);
221 #endif /* MBS_SUPPORT */
222 return offset;
223 }
224 curr = beg + offset;
225 len = kwsmatch.size[0];
226 }
227 else
228 goto success;
229 }
230 }
231 else
232 goto success;
233 }
234
235 #ifdef MBS_SUPPORT
236 if (MB_CUR_MAX > 1)
237 free (mb_properties);
238 #endif /* MBS_SUPPORT */
239 return -1;
240
241 success:
242 {
243 register const char *end;
244
245 end = (const char *) memchr (beg + len, eol, buflim - (beg + len));
246 if (end != NULL)
247 end++;
248 else
249 end = buflim;
250 while (buf < beg && beg[-1] != eol)
251 --beg;
252 *match_size = end - beg;
253 #ifdef MBS_SUPPORT
254 if (MB_CUR_MAX > 1)
255 free (mb_properties);
256 #endif /* MBS_SUPPORT */
257 return beg - buf;
258 }
259 }
260
261 static void
Ffree(void * compiled_pattern)262 Ffree (void *compiled_pattern)
263 {
264 struct compiled_kwset *ckwset = (struct compiled_kwset *) compiled_pattern;
265
266 free (ckwset->trans);
267 free (ckwset);
268 }
269
270 matcher_t matcher_fgrep =
271 {
272 Fcompile,
273 Fexecute,
274 Ffree
275 };
276
277