• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GPattern copy that supports raw (non-utf8) matching
2  * based on: GLIB - Library of useful routines for C programming
3  * Copyright (C) 1995-1997, 1999  Peter Mattis, Red Hat, Inc.
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the
17  * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
18  * Boston, MA 02110-1301, USA.
19  */
20 #ifdef HAVE_CONFIG_H
21 #include "config.h"
22 #endif
23 
24 #include "patternspec.h"
25 #include <string.h>
26 
27 typedef enum
28 {
29   MATCH_ALL,                    /* "*A?A*" */
30   MATCH_ALL_TAIL,               /* "*A?AA" */
31   MATCH_HEAD,                   /* "AAAA*" */
32   MATCH_TAIL,                   /* "*AAAA" */
33   MATCH_EXACT,                  /* "AAAAA" */
34   MATCH_LAST
35 } MatchType;
36 
37 struct _PatternSpec
38 {
39   MatchMode match_mode;
40   MatchType match_type;
41   guint pattern_length;
42   guint min_length;
43   guint max_length;
44   gchar *pattern;
45 };
46 
47 static inline gchar *
raw_strreverse(const gchar * str,gssize size)48 raw_strreverse (const gchar * str, gssize size)
49 {
50   g_assert (size > 0);
51   return g_strreverse (g_strndup (str, size));
52 }
53 
54 static inline gboolean
pattern_ph_match(const gchar * match_pattern,MatchMode match_mode,const gchar * match_string,gboolean * wildcard_reached_p)55 pattern_ph_match (const gchar * match_pattern, MatchMode match_mode,
56     const gchar * match_string, gboolean * wildcard_reached_p)
57 {
58   register const gchar *pattern, *string;
59   register gchar ch;
60 
61   pattern = match_pattern;
62   string = match_string;
63 
64   ch = *pattern;
65   pattern++;
66   while (ch) {
67     switch (ch) {
68       case '?':
69         if (!*string)
70           return FALSE;
71         if (match_mode == MATCH_MODE_UTF8)
72           string = g_utf8_next_char (string);
73         else
74           ++string;
75         break;
76 
77       case '*':
78         *wildcard_reached_p = TRUE;
79         do {
80           ch = *pattern;
81           pattern++;
82           if (ch == '?') {
83             if (!*string)
84               return FALSE;
85             if (match_mode == MATCH_MODE_UTF8)
86               string = g_utf8_next_char (string);
87             else
88               ++string;
89           }
90         }
91         while (ch == '*' || ch == '?');
92         if (!ch)
93           return TRUE;
94         do {
95           gboolean next_wildcard_reached = FALSE;
96           while (ch != *string) {
97             if (!*string)
98               return FALSE;
99             if (match_mode == MATCH_MODE_UTF8)
100               string = g_utf8_next_char (string);
101             else
102               ++string;
103           }
104           string++;
105           if (pattern_ph_match (pattern, match_mode, string,
106                   &next_wildcard_reached))
107             return TRUE;
108           if (next_wildcard_reached)
109             /* the forthcoming pattern substring up to the next wildcard has
110              * been matched, but a mismatch occurred for the rest of the
111              * pattern, following the next wildcard.
112              * there's no need to advance the current match position any
113              * further if the rest pattern will not match.
114              */
115             return FALSE;
116         }
117         while (*string);
118         break;
119 
120       default:
121         if (ch == *string)
122           string++;
123         else
124           return FALSE;
125         break;
126     }
127 
128     ch = *pattern;
129     pattern++;
130   }
131 
132   return *string == 0;
133 }
134 
135 static gboolean
pattern_match(PatternSpec * pspec,guint string_length,const gchar * string,const gchar * string_reversed)136 pattern_match (PatternSpec * pspec, guint string_length,
137     const gchar * string, const gchar * string_reversed)
138 {
139   MatchMode match_mode;
140 
141   g_assert (pspec != NULL);
142   g_assert (string != NULL);
143 
144   if (string_length < pspec->min_length || string_length > pspec->max_length)
145     return FALSE;
146 
147   match_mode = pspec->match_mode;
148   if (match_mode == MATCH_MODE_AUTO) {
149     if (!g_utf8_validate (string, string_length, NULL))
150       match_mode = MATCH_MODE_RAW;
151     else
152       match_mode = MATCH_MODE_UTF8;
153   }
154 
155   switch (pspec->match_type) {
156       gboolean dummy;
157     case MATCH_ALL:
158       return pattern_ph_match (pspec->pattern, match_mode, string, &dummy);
159     case MATCH_ALL_TAIL:
160       if (string_reversed)
161         return pattern_ph_match (pspec->pattern, match_mode, string_reversed,
162             &dummy);
163       else {
164         gboolean result;
165         gchar *tmp;
166         if (match_mode == MATCH_MODE_UTF8) {
167           tmp = g_utf8_strreverse (string, string_length);
168         } else {
169           tmp = raw_strreverse (string, string_length);
170         }
171         result = pattern_ph_match (pspec->pattern, match_mode, tmp, &dummy);
172         g_free (tmp);
173         return result;
174       }
175     case MATCH_HEAD:
176       if (pspec->pattern_length == string_length)
177         return memcmp (pspec->pattern, string, string_length) == 0;
178       else if (pspec->pattern_length)
179         return memcmp (pspec->pattern, string, pspec->pattern_length) == 0;
180       else
181         return TRUE;
182     case MATCH_TAIL:
183       if (pspec->pattern_length)
184         /* compare incl. NUL terminator */
185         return memcmp (pspec->pattern,
186             string + (string_length - pspec->pattern_length),
187             pspec->pattern_length + 1) == 0;
188       else
189         return TRUE;
190     case MATCH_EXACT:
191       if (pspec->pattern_length != string_length)
192         return FALSE;
193       else
194         return memcmp (pspec->pattern, string, string_length) == 0;
195     default:
196       g_return_val_if_fail (pspec->match_type < MATCH_LAST, FALSE);
197       return FALSE;
198   }
199 }
200 
201 PatternSpec *
pattern_spec_new(const gchar * pattern,MatchMode match_mode)202 pattern_spec_new (const gchar * pattern, MatchMode match_mode)
203 {
204   PatternSpec *pspec;
205   gboolean seen_joker = FALSE, seen_wildcard = FALSE, more_wildcards = FALSE;
206   gint hw_pos = -1, tw_pos = -1, hj_pos = -1, tj_pos = -1;
207   gboolean follows_wildcard = FALSE;
208   guint pending_jokers = 0;
209   const gchar *s;
210   gchar *d;
211   guint i;
212 
213   g_assert (pattern != NULL);
214   g_assert (match_mode != MATCH_MODE_UTF8
215       || g_utf8_validate (pattern, -1, NULL));
216 
217   /* canonicalize pattern and collect necessary stats */
218   pspec = g_new (PatternSpec, 1);
219   pspec->match_mode = match_mode;
220   pspec->pattern_length = strlen (pattern);
221   pspec->min_length = 0;
222   pspec->max_length = 0;
223   pspec->pattern = g_new (gchar, pspec->pattern_length + 1);
224 
225   if (pspec->match_mode == MATCH_MODE_AUTO) {
226     if (!g_utf8_validate (pattern, -1, NULL))
227       pspec->match_mode = MATCH_MODE_RAW;
228   }
229 
230   d = pspec->pattern;
231   for (i = 0, s = pattern; *s != 0; s++) {
232     switch (*s) {
233       case '*':
234         if (follows_wildcard) { /* compress multiple wildcards */
235           pspec->pattern_length--;
236           continue;
237         }
238         follows_wildcard = TRUE;
239         if (hw_pos < 0)
240           hw_pos = i;
241         tw_pos = i;
242         break;
243       case '?':
244         pending_jokers++;
245         pspec->min_length++;
246         if (pspec->match_mode == MATCH_MODE_RAW) {
247           pspec->max_length += 1;
248         } else {
249           pspec->max_length += 4;       /* maximum UTF-8 character length */
250         }
251         continue;
252       default:
253         for (; pending_jokers; pending_jokers--, i++) {
254           *d++ = '?';
255           if (hj_pos < 0)
256             hj_pos = i;
257           tj_pos = i;
258         }
259         follows_wildcard = FALSE;
260         pspec->min_length++;
261         pspec->max_length++;
262         break;
263     }
264     *d++ = *s;
265     i++;
266   }
267   for (; pending_jokers; pending_jokers--) {
268     *d++ = '?';
269     if (hj_pos < 0)
270       hj_pos = i;
271     tj_pos = i;
272   }
273   *d++ = 0;
274   seen_joker = hj_pos >= 0;
275   seen_wildcard = hw_pos >= 0;
276   more_wildcards = seen_wildcard && hw_pos != tw_pos;
277   if (seen_wildcard)
278     pspec->max_length = G_MAXUINT;
279 
280   /* special case sole head/tail wildcard or exact matches */
281   if (!seen_joker && !more_wildcards) {
282     if (pspec->pattern[0] == '*') {
283       pspec->match_type = MATCH_TAIL;
284       memmove (pspec->pattern, pspec->pattern + 1, --pspec->pattern_length);
285       pspec->pattern[pspec->pattern_length] = 0;
286       return pspec;
287     }
288     if (pspec->pattern_length > 0 &&
289         pspec->pattern[pspec->pattern_length - 1] == '*') {
290       pspec->match_type = MATCH_HEAD;
291       pspec->pattern[--pspec->pattern_length] = 0;
292       return pspec;
293     }
294     if (!seen_wildcard) {
295       pspec->match_type = MATCH_EXACT;
296       return pspec;
297     }
298   }
299 
300   /* now just need to distinguish between head or tail match start */
301   tw_pos = pspec->pattern_length - 1 - tw_pos;  /* last pos to tail distance */
302   tj_pos = pspec->pattern_length - 1 - tj_pos;  /* last pos to tail distance */
303   if (seen_wildcard)
304     pspec->match_type = tw_pos > hw_pos ? MATCH_ALL_TAIL : MATCH_ALL;
305   else                          /* seen_joker */
306     pspec->match_type = tj_pos > hj_pos ? MATCH_ALL_TAIL : MATCH_ALL;
307   if (pspec->match_type == MATCH_ALL_TAIL) {
308     gchar *tmp = pspec->pattern;
309 
310     if (pspec->match_mode == MATCH_MODE_RAW) {
311       pspec->pattern = raw_strreverse (pspec->pattern, pspec->pattern_length);
312     } else {
313       pspec->pattern =
314           g_utf8_strreverse (pspec->pattern, pspec->pattern_length);
315     }
316     g_free (tmp);
317   }
318   return pspec;
319 }
320 
321 void
pattern_spec_free(PatternSpec * pspec)322 pattern_spec_free (PatternSpec * pspec)
323 {
324   g_assert (pspec != NULL);
325 
326   g_free (pspec->pattern);
327   g_free (pspec);
328 }
329 
330 gboolean
pattern_match_string(PatternSpec * pspec,const gchar * string)331 pattern_match_string (PatternSpec * pspec, const gchar * string)
332 {
333   return pattern_match (pspec, strlen (string), string, NULL);
334 }
335