• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * An implementation of what I call the "Sea of Stars" algorithm for
3  * POSIX fnmatch(). The basic idea is that we factor the pattern into
4  * a head component (which we match first and can reject without ever
5  * measuring the length of the string), an optional tail component
6  * (which only exists if the pattern contains at least one star), and
7  * an optional "sea of stars", a set of star-separated components
8  * between the head and tail. After the head and tail matches have
9  * been removed from the input string, the components in the "sea of
10  * stars" are matched sequentially by searching for their first
11  * occurrence past the end of the previous match.
12  *
13  * - Rich Felker, April 2012
14  */
15 
16 #include <string.h>
17 #include <fnmatch.h>
18 #include <stdlib.h>
19 #include <wchar.h>
20 #include <wctype.h>
21 #include "locale_impl.h"
22 
23 #define END 0
24 #define UNMATCHABLE -2
25 #define BRACKET -3
26 #define QUESTION -4
27 #define STAR -5
28 
str_next(const char * str,size_t n,size_t * step)29 static int str_next(const char *str, size_t n, size_t *step)
30 {
31 	if (!n) {
32 		*step = 0;
33 		return 0;
34 	}
35 	if (str[0] >= 128U) {
36 		wchar_t wc;
37 		int k = mbtowc(&wc, str, n);
38 		if (k<0) {
39 			*step = 1;
40 			return -1;
41 		}
42 		*step = k;
43 		return wc;
44 	}
45 	*step = 1;
46 	return str[0];
47 }
48 
pat_next(const char * pat,size_t m,size_t * step,int flags)49 static int pat_next(const char *pat, size_t m, size_t *step, int flags)
50 {
51 	int esc = 0;
52 	if (!m || !*pat) {
53 		*step = 0;
54 		return END;
55 	}
56 	*step = 1;
57 	if (pat[0]=='\\' && pat[1] && !(flags & FNM_NOESCAPE)) {
58 		*step = 2;
59 		pat++;
60 		esc = 1;
61 		goto escaped;
62 	}
63 	if (pat[0]=='[') {
64 		size_t k = 1;
65 		if (k<m) if (pat[k] == '^' || pat[k] == '!') k++;
66 		if (k<m) if (pat[k] == ']') k++;
67 		for (; k<m && pat[k] && pat[k]!=']'; k++) {
68 			if (k+1<m && pat[k+1] && pat[k]=='[' && (pat[k+1]==':' || pat[k+1]=='.' || pat[k+1]=='=')) {
69 				int z = pat[k+1];
70 				k+=2;
71 				if (k<m && pat[k]) k++;
72 				while (k<m && pat[k] && (pat[k-1]!=z || pat[k]!=']')) k++;
73 				if (k==m || !pat[k]) break;
74 			}
75 		}
76 		if (k==m || !pat[k]) {
77 			*step = 1;
78 			return '[';
79 		}
80 		*step = k+1;
81 		return BRACKET;
82 	}
83 	if (pat[0] == '*')
84 		return STAR;
85 	if (pat[0] == '?')
86 		return QUESTION;
87 escaped:
88 	if (pat[0] >= 128U) {
89 		wchar_t wc;
90 		int k = mbtowc(&wc, pat, m);
91 		if (k<0) {
92 			*step = 0;
93 			return UNMATCHABLE;
94 		}
95 		*step = k + esc;
96 		return wc;
97 	}
98 	return pat[0];
99 }
100 
casefold(int k)101 static int casefold(int k)
102 {
103 	int c = towupper(k);
104 	return c == k ? towlower(k) : c;
105 }
106 
match_bracket(const char * p,int k,int kfold)107 static int match_bracket(const char *p, int k, int kfold)
108 {
109 	wchar_t wc;
110 	int inv = 0;
111 	p++;
112 	if (*p=='^' || *p=='!') {
113 		inv = 1;
114 		p++;
115 	}
116 	if (*p==']') {
117 		if (k==']') return !inv;
118 		p++;
119 	} else if (*p=='-') {
120 		if (k=='-') return !inv;
121 		p++;
122 	}
123 	wc = p[-1];
124 	for (; *p != ']'; p++) {
125 		if (p[0]=='-' && p[1]!=']') {
126 			wchar_t wc2;
127 			int l = mbtowc(&wc2, p+1, 4);
128 			if (l < 0) return 0;
129 			if (wc <= wc2)
130 				if ((unsigned)k-wc <= wc2-wc ||
131 				    (unsigned)kfold-wc <= wc2-wc)
132 					return !inv;
133 			p += l-1;
134 			continue;
135 		}
136 		if (p[0]=='[' && (p[1]==':' || p[1]=='.' || p[1]=='=')) {
137 			const char *p0 = p+2;
138 			int z = p[1];
139 			p+=3;
140 			while (p[-1]!=z || p[0]!=']') p++;
141 			if (z == ':' && p-1-p0 < 16) {
142 				char buf[16];
143 				memcpy(buf, p0, p-1-p0);
144 				buf[p-1-p0] = 0;
145 				if (iswctype(k, wctype(buf)) ||
146 				    iswctype(kfold, wctype(buf)))
147 					return !inv;
148 			}
149 			continue;
150 		}
151 		if (*p < 128U) {
152 			wc = (unsigned char)*p;
153 		} else {
154 			int l = mbtowc(&wc, p, 4);
155 			if (l < 0) return 0;
156 			p += l-1;
157 		}
158 		if (wc==k || wc==kfold) return !inv;
159 	}
160 	return inv;
161 }
162 
fnmatch_internal(const char * pat,size_t m,const char * str,size_t n,int flags)163 static int fnmatch_internal(const char *pat, size_t m, const char *str, size_t n, int flags)
164 {
165 	const char *p, *ptail, *endpat;
166 	const char *s, *stail, *endstr;
167 	size_t pinc, sinc, tailcnt=0;
168 	int c, k, kfold;
169 
170 	if (flags & FNM_PERIOD) {
171 		if (*str == '.' && *pat != '.')
172 			return FNM_NOMATCH;
173 	}
174 	for (;;) {
175 		switch ((c = pat_next(pat, m, &pinc, flags))) {
176 		case UNMATCHABLE:
177 			return FNM_NOMATCH;
178 		case STAR:
179 			pat++;
180 			m--;
181 			break;
182 		default:
183 			k = str_next(str, n, &sinc);
184 			if (k <= 0)
185 				return (c==END) ? 0 : FNM_NOMATCH;
186 			str += sinc;
187 			n -= sinc;
188 			kfold = flags & FNM_CASEFOLD ? casefold(k) : k;
189 			if (c == BRACKET) {
190 				if (!match_bracket(pat, k, kfold))
191 					return FNM_NOMATCH;
192 			} else if (c != QUESTION && k != c && kfold != c) {
193 				return FNM_NOMATCH;
194 			}
195 			pat+=pinc;
196 			m-=pinc;
197 			continue;
198 		}
199 		break;
200 	}
201 
202 	/* Compute real pat length if it was initially unknown/-1 */
203 	m = strnlen(pat, m);
204 	endpat = pat + m;
205 
206 	/* Find the last * in pat and count chars needed after it */
207 	for (p=ptail=pat; p<endpat; p+=pinc) {
208 		switch (pat_next(p, endpat-p, &pinc, flags)) {
209 		case UNMATCHABLE:
210 			return FNM_NOMATCH;
211 		case STAR:
212 			tailcnt=0;
213 			ptail = p+1;
214 			break;
215 		default:
216 			tailcnt++;
217 			break;
218 		}
219 	}
220 
221 	/* Past this point we need not check for UNMATCHABLE in pat,
222 	 * because all of pat has already been parsed once. */
223 
224 	/* Compute real str length if it was initially unknown/-1 */
225 	n = strnlen(str, n);
226 	endstr = str + n;
227 	if (n < tailcnt) return FNM_NOMATCH;
228 
229 	/* Find the final tailcnt chars of str, accounting for UTF-8.
230 	 * On illegal sequences we may get it wrong, but in that case
231 	 * we necessarily have a matching failure anyway. */
232 	for (s=endstr; s>str && tailcnt; tailcnt--) {
233 		if (s[-1] < 128U || MB_CUR_MAX==1) s--;
234 		else while ((unsigned char)*--s-0x80U<0x40 && s>str);
235 	}
236 	if (tailcnt) return FNM_NOMATCH;
237 	stail = s;
238 
239 	/* Check that the pat and str tails match */
240 	p = ptail;
241 	for (;;) {
242 		c = pat_next(p, endpat-p, &pinc, flags);
243 		p += pinc;
244 		if ((k = str_next(s, endstr-s, &sinc)) <= 0) {
245 			if (c != END) return FNM_NOMATCH;
246 			break;
247 		}
248 		s += sinc;
249 		kfold = flags & FNM_CASEFOLD ? casefold(k) : k;
250 		if (c == BRACKET) {
251 			if (!match_bracket(p-pinc, k, kfold))
252 				return FNM_NOMATCH;
253 		} else if (c != QUESTION && k != c && kfold != c) {
254 			return FNM_NOMATCH;
255 		}
256 	}
257 
258 	/* We're all done with the tails now, so throw them out */
259 	endstr = stail;
260 	endpat = ptail;
261 
262 	/* Match pattern components until there are none left */
263 	while (pat<endpat) {
264 		p = pat;
265 		s = str;
266 		for (;;) {
267 			c = pat_next(p, endpat-p, &pinc, flags);
268 			p += pinc;
269 			/* Encountering * completes/commits a component */
270 			if (c == STAR) {
271 				pat = p;
272 				str = s;
273 				break;
274 			}
275 			k = str_next(s, endstr-s, &sinc);
276 			if (!k)
277 				return FNM_NOMATCH;
278 			kfold = flags & FNM_CASEFOLD ? casefold(k) : k;
279 			if (c == BRACKET) {
280 				if (!match_bracket(p-pinc, k, kfold))
281 					break;
282 			} else if (c != QUESTION && k != c && kfold != c) {
283 				break;
284 			}
285 			s += sinc;
286 		}
287 		if (c == STAR) continue;
288 		/* If we failed, advance str, by 1 char if it's a valid
289 		 * char, or past all invalid bytes otherwise. */
290 		k = str_next(str, endstr-str, &sinc);
291 		if (k > 0) str += sinc;
292 		else for (str++; str_next(str, endstr-str, &sinc)<0; str++);
293 	}
294 
295 	return 0;
296 }
297 
fnmatch(const char * pat,const char * str,int flags)298 int fnmatch(const char *pat, const char *str, int flags)
299 {
300 	const char *s, *p;
301 	size_t inc;
302 	int c;
303 	if (flags & FNM_PATHNAME) for (;;) {
304 		for (s=str; *s && *s!='/'; s++);
305 		for (p=pat; (c=pat_next(p, -1, &inc, flags))!=END && c!='/'; p+=inc);
306 		if (c!=*s && (!*s || !(flags & FNM_LEADING_DIR)))
307 			return FNM_NOMATCH;
308 		if (fnmatch_internal(pat, p-pat, str, s-str, flags))
309 			return FNM_NOMATCH;
310 		if (!c) return 0;
311 		str = s+1;
312 		pat = p+inc;
313 	} else if (flags & FNM_LEADING_DIR) {
314 		for (s=str; *s; s++) {
315 			if (*s != '/') continue;
316 			if (!fnmatch_internal(pat, -1, str, s-str, flags))
317 				return 0;
318 		}
319 	}
320 	return fnmatch_internal(pat, -1, str, -1, flags);
321 }
322