1 /* GPattern copy that supports raw (non-utf8) matching
2 * based on: GLIB - Library of useful routines for C programming
3 * Copyright (C) 1995-1997, 1999 Peter Mattis, Red Hat, Inc.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the
17 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
19 */
20 #ifdef HAVE_CONFIG_H
21 #include "config.h"
22 #endif
23
24 #include "patternspec.h"
25 #include <string.h>
26
27 typedef enum
28 {
29 MATCH_ALL, /* "*A?A*" */
30 MATCH_ALL_TAIL, /* "*A?AA" */
31 MATCH_HEAD, /* "AAAA*" */
32 MATCH_TAIL, /* "*AAAA" */
33 MATCH_EXACT, /* "AAAAA" */
34 MATCH_LAST
35 } MatchType;
36
37 struct _PatternSpec
38 {
39 MatchMode match_mode;
40 MatchType match_type;
41 guint pattern_length;
42 guint min_length;
43 guint max_length;
44 gchar *pattern;
45 };
46
47 static inline gchar *
raw_strreverse(const gchar * str,gssize size)48 raw_strreverse (const gchar * str, gssize size)
49 {
50 g_assert (size > 0);
51 return g_strreverse (g_strndup (str, size));
52 }
53
54 static inline gboolean
pattern_ph_match(const gchar * match_pattern,MatchMode match_mode,const gchar * match_string,gboolean * wildcard_reached_p)55 pattern_ph_match (const gchar * match_pattern, MatchMode match_mode,
56 const gchar * match_string, gboolean * wildcard_reached_p)
57 {
58 register const gchar *pattern, *string;
59 register gchar ch;
60
61 pattern = match_pattern;
62 string = match_string;
63
64 ch = *pattern;
65 pattern++;
66 while (ch) {
67 switch (ch) {
68 case '?':
69 if (!*string)
70 return FALSE;
71 if (match_mode == MATCH_MODE_UTF8)
72 string = g_utf8_next_char (string);
73 else
74 ++string;
75 break;
76
77 case '*':
78 *wildcard_reached_p = TRUE;
79 do {
80 ch = *pattern;
81 pattern++;
82 if (ch == '?') {
83 if (!*string)
84 return FALSE;
85 if (match_mode == MATCH_MODE_UTF8)
86 string = g_utf8_next_char (string);
87 else
88 ++string;
89 }
90 }
91 while (ch == '*' || ch == '?');
92 if (!ch)
93 return TRUE;
94 do {
95 gboolean next_wildcard_reached = FALSE;
96 while (ch != *string) {
97 if (!*string)
98 return FALSE;
99 if (match_mode == MATCH_MODE_UTF8)
100 string = g_utf8_next_char (string);
101 else
102 ++string;
103 }
104 string++;
105 if (pattern_ph_match (pattern, match_mode, string,
106 &next_wildcard_reached))
107 return TRUE;
108 if (next_wildcard_reached)
109 /* the forthcoming pattern substring up to the next wildcard has
110 * been matched, but a mismatch occurred for the rest of the
111 * pattern, following the next wildcard.
112 * there's no need to advance the current match position any
113 * further if the rest pattern will not match.
114 */
115 return FALSE;
116 }
117 while (*string);
118 break;
119
120 default:
121 if (ch == *string)
122 string++;
123 else
124 return FALSE;
125 break;
126 }
127
128 ch = *pattern;
129 pattern++;
130 }
131
132 return *string == 0;
133 }
134
135 static gboolean
pattern_match(PatternSpec * pspec,guint string_length,const gchar * string,const gchar * string_reversed)136 pattern_match (PatternSpec * pspec, guint string_length,
137 const gchar * string, const gchar * string_reversed)
138 {
139 MatchMode match_mode;
140
141 g_assert (pspec != NULL);
142 g_assert (string != NULL);
143
144 if (string_length < pspec->min_length || string_length > pspec->max_length)
145 return FALSE;
146
147 match_mode = pspec->match_mode;
148 if (match_mode == MATCH_MODE_AUTO) {
149 if (!g_utf8_validate (string, string_length, NULL))
150 match_mode = MATCH_MODE_RAW;
151 else
152 match_mode = MATCH_MODE_UTF8;
153 }
154
155 switch (pspec->match_type) {
156 gboolean dummy;
157 case MATCH_ALL:
158 return pattern_ph_match (pspec->pattern, match_mode, string, &dummy);
159 case MATCH_ALL_TAIL:
160 if (string_reversed)
161 return pattern_ph_match (pspec->pattern, match_mode, string_reversed,
162 &dummy);
163 else {
164 gboolean result;
165 gchar *tmp;
166 if (match_mode == MATCH_MODE_UTF8) {
167 tmp = g_utf8_strreverse (string, string_length);
168 } else {
169 tmp = raw_strreverse (string, string_length);
170 }
171 result = pattern_ph_match (pspec->pattern, match_mode, tmp, &dummy);
172 g_free (tmp);
173 return result;
174 }
175 case MATCH_HEAD:
176 if (pspec->pattern_length == string_length)
177 return memcmp (pspec->pattern, string, string_length) == 0;
178 else if (pspec->pattern_length)
179 return memcmp (pspec->pattern, string, pspec->pattern_length) == 0;
180 else
181 return TRUE;
182 case MATCH_TAIL:
183 if (pspec->pattern_length)
184 /* compare incl. NUL terminator */
185 return memcmp (pspec->pattern,
186 string + (string_length - pspec->pattern_length),
187 pspec->pattern_length + 1) == 0;
188 else
189 return TRUE;
190 case MATCH_EXACT:
191 if (pspec->pattern_length != string_length)
192 return FALSE;
193 else
194 return memcmp (pspec->pattern, string, string_length) == 0;
195 default:
196 g_return_val_if_fail (pspec->match_type < MATCH_LAST, FALSE);
197 return FALSE;
198 }
199 }
200
201 PatternSpec *
pattern_spec_new(const gchar * pattern,MatchMode match_mode)202 pattern_spec_new (const gchar * pattern, MatchMode match_mode)
203 {
204 PatternSpec *pspec;
205 gboolean seen_joker = FALSE, seen_wildcard = FALSE, more_wildcards = FALSE;
206 gint hw_pos = -1, tw_pos = -1, hj_pos = -1, tj_pos = -1;
207 gboolean follows_wildcard = FALSE;
208 guint pending_jokers = 0;
209 const gchar *s;
210 gchar *d;
211 guint i;
212
213 g_assert (pattern != NULL);
214 g_assert (match_mode != MATCH_MODE_UTF8
215 || g_utf8_validate (pattern, -1, NULL));
216
217 /* canonicalize pattern and collect necessary stats */
218 pspec = g_new (PatternSpec, 1);
219 pspec->match_mode = match_mode;
220 pspec->pattern_length = strlen (pattern);
221 pspec->min_length = 0;
222 pspec->max_length = 0;
223 pspec->pattern = g_new (gchar, pspec->pattern_length + 1);
224
225 if (pspec->match_mode == MATCH_MODE_AUTO) {
226 if (!g_utf8_validate (pattern, -1, NULL))
227 pspec->match_mode = MATCH_MODE_RAW;
228 }
229
230 d = pspec->pattern;
231 for (i = 0, s = pattern; *s != 0; s++) {
232 switch (*s) {
233 case '*':
234 if (follows_wildcard) { /* compress multiple wildcards */
235 pspec->pattern_length--;
236 continue;
237 }
238 follows_wildcard = TRUE;
239 if (hw_pos < 0)
240 hw_pos = i;
241 tw_pos = i;
242 break;
243 case '?':
244 pending_jokers++;
245 pspec->min_length++;
246 if (pspec->match_mode == MATCH_MODE_RAW) {
247 pspec->max_length += 1;
248 } else {
249 pspec->max_length += 4; /* maximum UTF-8 character length */
250 }
251 continue;
252 default:
253 for (; pending_jokers; pending_jokers--, i++) {
254 *d++ = '?';
255 if (hj_pos < 0)
256 hj_pos = i;
257 tj_pos = i;
258 }
259 follows_wildcard = FALSE;
260 pspec->min_length++;
261 pspec->max_length++;
262 break;
263 }
264 *d++ = *s;
265 i++;
266 }
267 for (; pending_jokers; pending_jokers--) {
268 *d++ = '?';
269 if (hj_pos < 0)
270 hj_pos = i;
271 tj_pos = i;
272 }
273 *d++ = 0;
274 seen_joker = hj_pos >= 0;
275 seen_wildcard = hw_pos >= 0;
276 more_wildcards = seen_wildcard && hw_pos != tw_pos;
277 if (seen_wildcard)
278 pspec->max_length = G_MAXUINT;
279
280 /* special case sole head/tail wildcard or exact matches */
281 if (!seen_joker && !more_wildcards) {
282 if (pspec->pattern[0] == '*') {
283 pspec->match_type = MATCH_TAIL;
284 memmove (pspec->pattern, pspec->pattern + 1, --pspec->pattern_length);
285 pspec->pattern[pspec->pattern_length] = 0;
286 return pspec;
287 }
288 if (pspec->pattern_length > 0 &&
289 pspec->pattern[pspec->pattern_length - 1] == '*') {
290 pspec->match_type = MATCH_HEAD;
291 pspec->pattern[--pspec->pattern_length] = 0;
292 return pspec;
293 }
294 if (!seen_wildcard) {
295 pspec->match_type = MATCH_EXACT;
296 return pspec;
297 }
298 }
299
300 /* now just need to distinguish between head or tail match start */
301 tw_pos = pspec->pattern_length - 1 - tw_pos; /* last pos to tail distance */
302 tj_pos = pspec->pattern_length - 1 - tj_pos; /* last pos to tail distance */
303 if (seen_wildcard)
304 pspec->match_type = tw_pos > hw_pos ? MATCH_ALL_TAIL : MATCH_ALL;
305 else /* seen_joker */
306 pspec->match_type = tj_pos > hj_pos ? MATCH_ALL_TAIL : MATCH_ALL;
307 if (pspec->match_type == MATCH_ALL_TAIL) {
308 gchar *tmp = pspec->pattern;
309
310 if (pspec->match_mode == MATCH_MODE_RAW) {
311 pspec->pattern = raw_strreverse (pspec->pattern, pspec->pattern_length);
312 } else {
313 pspec->pattern =
314 g_utf8_strreverse (pspec->pattern, pspec->pattern_length);
315 }
316 g_free (tmp);
317 }
318 return pspec;
319 }
320
321 void
pattern_spec_free(PatternSpec * pspec)322 pattern_spec_free (PatternSpec * pspec)
323 {
324 g_assert (pspec != NULL);
325
326 g_free (pspec->pattern);
327 g_free (pspec);
328 }
329
330 gboolean
pattern_match_string(PatternSpec * pspec,const gchar * string)331 pattern_match_string (PatternSpec * pspec, const gchar * string)
332 {
333 return pattern_match (pspec, strlen (string), string, NULL);
334 }
335