1
2 /*--------------------------------------------------------------------*/
3 /*--- A simple sequence matching facility. ---*/
4 /*--- m_seqmatch.c ---*/
5 /*--------------------------------------------------------------------*/
6
7 /*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
10
11 Copyright (C) 2008-2012 OpenWorks Ltd
12 info@open-works.co.uk
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30 */
31
32 #include "pub_core_basics.h"
33 #include "pub_core_libcassert.h"
34 #include "pub_core_libcbase.h" // VG_(strlen)
35 #include "pub_core_seqmatch.h" // self
36
37 /* ---------------------------------------------------------------------
38 A simple sequence matching facility
39 ------------------------------------------------------------------ */
40
41 /* See detailed comment in include/pub_tool_seqmatch.h about this. */
VG_(generic_match)42 Bool VG_(generic_match) (
43 Bool matchAll,
44 void* patt, SizeT szbPatt, UWord nPatt, UWord ixPatt,
45 void* input, SizeT szbInput, UWord nInput, UWord ixInput,
46 Bool (*pIsStar)(void*),
47 Bool (*pIsQuery)(void*),
48 Bool (*pattEQinp)(void*,void*,void*,UWord),
49 void* inputCompleter
50 )
51 {
52 /* This is the spec, written in my favourite formal specification
53 language. It specifies non-greedy matching of '*'s.
54
55 ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is
56 ma ('*':ps) [] = ma ps []
57
58 ma ('?':ps) (i:is) = ma ps is
59 ma ('?':ps) [] = False
60
61 ma (p:ps) (i:is) = p == i && ma ps is
62
63 ma (p:ps) [] = False
64 ma [] (i:is) = False -- m-all, True for m-prefix
65 ma [] [] = True
66 */
67 Bool havePatt, haveInput;
68 void *currPatt, *currInput;
69 tailcall:
70 vg_assert(nPatt >= 0 && nPatt < 1000000); /* arbitrary */
71 vg_assert(nInput >= 0 && nInput < 1000000); /* arbitrary */
72 vg_assert(ixPatt >= 0 && ixPatt <= nPatt);
73 vg_assert(ixInput >= 0 && ixInput <= nInput);
74
75 havePatt = ixPatt < nPatt;
76 haveInput = ixInput < nInput;
77
78 /* No specific need to set NULL when !have{Patt,Input}, but guards
79 against inadvertantly dereferencing an out of range pointer to
80 the pattern or input arrays. */
81 currPatt = havePatt ? ((Char*)patt) + szbPatt * ixPatt : NULL;
82 currInput = haveInput ? ((Char*)input) + szbInput * ixInput : NULL;
83
84 // Deal with the complex case first: wildcards. Do frugal
85 // matching. When encountering a '*', first skip no characters
86 // at all, and see if the rest of the match still works. Only if
87 // that fails do we then skip a character, and retry at the next
88 // position.
89 //
90 // ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is
91 //
92 // If we're out of input, check the rest of the pattern matches
93 // the empty input. This really means can only be be empty or
94 // composed entirely of '*'s.
95 //
96 // ma ('*':ps) [] = ma ps []
97 //
98 if (havePatt && pIsStar(currPatt)) {
99 if (haveInput) {
100 // ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is
101 // we unavoidably have to make a real recursive call for the
102 // first half of the OR, since this isn't straight tail-recursion.
103 if (VG_(generic_match)( matchAll,
104 patt, szbPatt, nPatt, ixPatt+1,
105 input,szbInput,nInput, ixInput+0,
106 pIsStar,pIsQuery,pattEQinp,
107 inputCompleter) ) {
108 return True;
109 }
110 // but we can tail-recurse for the second call
111 ixInput++; goto tailcall;
112 } else {
113 // ma ('*':ps) [] = ma ps []
114 ixPatt++; goto tailcall;
115 }
116 }
117
118 // simpler cases now. Deal with '?' wildcards.
119 //
120 // ma ('?':ps) (i:is) = ma ps is
121 // ma ('?':ps) [] = False
122 if (havePatt && pIsQuery(currPatt)) {
123 if (haveInput) {
124 ixPatt++; ixInput++; goto tailcall;
125 } else {
126 return False;
127 }
128 }
129
130 // obvious case with literal chars in the pattern
131 //
132 // ma (p:ps) (i:is) = p == i && ma ps is
133 if (havePatt && haveInput) {
134 if (!pattEQinp(currPatt,currInput,inputCompleter,ixInput)) return False;
135 ixPatt++; ixInput++; goto tailcall;
136 }
137
138 // if we run out of input before we run out of pattern, we must fail
139 // ma (p:ps) [] = False
140 if (havePatt && !haveInput) return False;
141
142 // if we run out of pattern before we run out of input, the
143 // verdict depends on the matching mode. If we are trying to
144 // match exactly (the pattern must consume the entire input)
145 // then the outcome is failure. However, if we're merely attempting
146 // to match some prefix of the input, then we have been successful.
147 //
148 // ma [] (i:is) = False -- m-all, True for m-prefix
149 if (!havePatt && haveInput) {
150 return matchAll ? False // match-all
151 : True; // match-prefix
152 }
153
154 // finally, if both sequence and input are both completely
155 // consumed, then we were successful, regardless of matching mode.
156 if (!havePatt && !haveInput) return True;
157
158 // end of cases
159 vg_assert(0);
160 }
161
162
163 /* And a parameterization of the above, to make it do
164 string matching.
165 */
charIsStar(void * pV)166 static Bool charIsStar ( void* pV ) { return *(Char*)pV == '*'; }
charIsQuery(void * pV)167 static Bool charIsQuery ( void* pV ) { return *(Char*)pV == '?'; }
char_p_EQ_i(void * pV,void * cV,void * null_completer,UWord ixcV)168 static Bool char_p_EQ_i ( void* pV, void* cV,
169 void* null_completer, UWord ixcV ) {
170 Char p = *(Char*)pV;
171 Char c = *(Char*)cV;
172 vg_assert(p != '*' && p != '?');
173 return p == c;
174 }
VG_(string_match)175 Bool VG_(string_match) ( const Char* patt, const Char* input )
176 {
177 return VG_(generic_match)(
178 True/* match-all */,
179 (void*)patt, sizeof(UChar), VG_(strlen)(patt), 0,
180 (void*)input, sizeof(UChar), VG_(strlen)(input), 0,
181 charIsStar, charIsQuery, char_p_EQ_i,
182 NULL
183 );
184 }
185
186
187 // test cases for the matcher (in match-all mode)
188 // typedef struct { char* patt; char* input; Bool xres; } Test;
189 //
190 //static Test tests[] =
191 // {
192 // { "" ,"" , True },
193 // { "a" ,"" , False },
194 // { "a" ,"b" , False },
195 // { "a" ,"a" , True },
196 // { "a" ,"aa" , False },
197 // { "*" ,"" , True },
198 // { "**" ,"" , True },
199 // { "*" ,"abc", True },
200 // { "*a" ,"abc", False },
201 // { "*b" ,"abc", False },
202 // { "*bc" ,"abc", True },
203 // { "a*b" ,"abc", False },
204 // { "a*c" ,"abc", True },
205 // { "*c" ,"abc", True },
206 // { "c*c" ,"abc", False },
207 // { "abc*" ,"abc", True },
208 // { "abc**" ,"abc", True },
209 // { "**abc" ,"abc", True },
210 // { "**a*b*c**" ,"abc", True },
211 // { "**a*b*d**" ,"abc", False },
212 // { "a?b" ,"abc", False },
213 // { "a?c" ,"abc", True },
214 // { "?" ,"" , False },
215 // { "?" ,"a" , True },
216 // { "?" ,"ab" , False },
217 // { "abcd" ,"abc", False },
218 // { "ab" ,"abc", False },
219 // { NULL ,NULL , False }
220 // };
221 //
222 //int main ( void )
223 //{
224 // Test* t;
225 // for (t = tests; t->patt; t++) {
226 // printf("%10s %6s %s\n",
227 // t->patt, t->input,
228 // match_string_all((UChar*)t->patt,(UChar*)t->input,True)
229 // == t->xres
230 // ? "pass" : "FAIL" );
231 // }
232 // return 0;
233 //}
234
235 /*--------------------------------------------------------------------*/
236 /*--- end m_seqmatch.c ---*/
237 /*--------------------------------------------------------------------*/
238