• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- A simple sequence matching facility.                         ---*/
4 /*---                                                 m_seqmatch.c ---*/
5 /*--------------------------------------------------------------------*/
6 
7 /*
8    This file is part of Valgrind, a dynamic binary instrumentation
9    framework.
10 
11    Copyright (C) 2008-2012 OpenWorks Ltd
12       info@open-works.co.uk
13 
14    This program is free software; you can redistribute it and/or
15    modify it under the terms of the GNU General Public License as
16    published by the Free Software Foundation; either version 2 of the
17    License, or (at your option) any later version.
18 
19    This program is distributed in the hope that it will be useful, but
20    WITHOUT ANY WARRANTY; without even the implied warranty of
21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22    General Public License for more details.
23 
24    You should have received a copy of the GNU General Public License
25    along with this program; if not, write to the Free Software
26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27    02111-1307, USA.
28 
29    The GNU General Public License is contained in the file COPYING.
30 */
31 
32 #include "pub_core_basics.h"
33 #include "pub_core_libcassert.h"
34 #include "pub_core_libcbase.h"    // VG_(strlen)
35 #include "pub_core_seqmatch.h"    // self
36 
37 /* ---------------------------------------------------------------------
38    A simple sequence matching facility
39    ------------------------------------------------------------------ */
40 
41 /* See detailed comment in include/pub_tool_seqmatch.h about this. */
VG_(generic_match)42 Bool VG_(generic_match) (
43         Bool matchAll,
44         void* patt,  SizeT szbPatt,  UWord nPatt,  UWord ixPatt,
45         void* input, SizeT szbInput, UWord nInput, UWord ixInput,
46         Bool (*pIsStar)(void*),
47         Bool (*pIsQuery)(void*),
48         Bool (*pattEQinp)(void*,void*,void*,UWord),
49         void* inputCompleter
50      )
51 {
52    /* This is the spec, written in my favourite formal specification
53       language.  It specifies non-greedy matching of '*'s.
54 
55       ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is
56       ma ('*':ps) []     = ma ps []
57 
58       ma ('?':ps) (i:is) = ma ps is
59       ma ('?':ps) []     = False
60 
61       ma (p:ps)   (i:is) = p == i && ma ps is
62 
63       ma (p:ps)   []     = False
64       ma []       (i:is) = False -- m-all, True for m-prefix
65       ma []       []     = True
66    */
67    Bool  havePatt, haveInput;
68    void  *currPatt, *currInput;
69   tailcall:
70    vg_assert(nPatt >= 0   && nPatt  < 1000000); /* arbitrary */
71    vg_assert(nInput >= 0  && nInput < 1000000); /* arbitrary */
72    vg_assert(ixPatt >= 0  && ixPatt <= nPatt);
73    vg_assert(ixInput >= 0 && ixInput <= nInput);
74 
75    havePatt  = ixPatt < nPatt;
76    haveInput = ixInput < nInput;
77 
78    /* No specific need to set NULL when !have{Patt,Input}, but guards
79       against inadvertantly dereferencing an out of range pointer to
80       the pattern or input arrays. */
81    currPatt  = havePatt  ? ((Char*)patt) + szbPatt * ixPatt    : NULL;
82    currInput = haveInput ? ((Char*)input) + szbInput * ixInput : NULL;
83 
84    // Deal with the complex case first: wildcards.  Do frugal
85    // matching.  When encountering a '*', first skip no characters
86    // at all, and see if the rest of the match still works.  Only if
87    // that fails do we then skip a character, and retry at the next
88    // position.
89    //
90    // ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is
91    //
92    // If we're out of input, check the rest of the pattern matches
93    // the empty input.  This really means can only be be empty or
94    // composed entirely of '*'s.
95    //
96    // ma ('*':ps) []     = ma ps []
97    //
98    if (havePatt && pIsStar(currPatt)) {
99       if (haveInput) {
100          // ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is
101          // we unavoidably have to make a real recursive call for the
102          // first half of the OR, since this isn't straight tail-recursion.
103          if (VG_(generic_match)( matchAll,
104                                  patt, szbPatt, nPatt,  ixPatt+1,
105                                  input,szbInput,nInput, ixInput+0,
106                                  pIsStar,pIsQuery,pattEQinp,
107                                  inputCompleter) ) {
108             return True;
109          }
110          // but we can tail-recurse for the second call
111          ixInput++; goto tailcall;
112       } else {
113          // ma ('*':ps) []     = ma ps []
114          ixPatt++; goto tailcall;
115       }
116    }
117 
118    // simpler cases now.  Deal with '?' wildcards.
119    //
120    // ma ('?':ps) (i:is) = ma ps is
121    // ma ('?':ps) []     = False
122    if (havePatt && pIsQuery(currPatt)) {
123       if (haveInput) {
124          ixPatt++; ixInput++; goto tailcall;
125       } else {
126          return False;
127       }
128    }
129 
130    // obvious case with literal chars in the pattern
131    //
132    // ma (p:ps)   (i:is) = p == i && ma ps is
133    if (havePatt && haveInput) {
134       if (!pattEQinp(currPatt,currInput,inputCompleter,ixInput)) return False;
135       ixPatt++; ixInput++; goto tailcall;
136    }
137 
138    // if we run out of input before we run out of pattern, we must fail
139    // ma (p:ps)   []     = False
140    if (havePatt && !haveInput) return False;
141 
142    // if we run out of pattern before we run out of input, the
143    // verdict depends on the matching mode.  If we are trying to
144    // match exactly (the pattern must consume the entire input)
145    // then the outcome is failure.  However, if we're merely attempting
146    // to match some prefix of the input, then we have been successful.
147    //
148    // ma []       (i:is) = False -- m-all, True for m-prefix
149    if (!havePatt && haveInput) {
150       return matchAll ? False // match-all
151                       : True; // match-prefix
152    }
153 
154    // finally, if both sequence and input are both completely
155    // consumed, then we were successful, regardless of matching mode.
156    if (!havePatt && !haveInput) return True;
157 
158    // end of cases
159    vg_assert(0);
160 }
161 
162 
163 /* And a parameterization of the above, to make it do
164    string matching.
165 */
charIsStar(void * pV)166 static Bool charIsStar  ( void* pV ) { return *(Char*)pV == '*'; }
charIsQuery(void * pV)167 static Bool charIsQuery ( void* pV ) { return *(Char*)pV == '?'; }
char_p_EQ_i(void * pV,void * cV,void * null_completer,UWord ixcV)168 static Bool char_p_EQ_i ( void* pV, void* cV,
169                           void* null_completer, UWord ixcV ) {
170    Char p = *(Char*)pV;
171    Char c = *(Char*)cV;
172    vg_assert(p != '*' && p != '?');
173    return p == c;
174 }
VG_(string_match)175 Bool VG_(string_match) ( const Char* patt, const Char* input )
176 {
177    return VG_(generic_match)(
178              True/* match-all */,
179              (void*)patt,  sizeof(UChar), VG_(strlen)(patt), 0,
180              (void*)input, sizeof(UChar), VG_(strlen)(input), 0,
181              charIsStar, charIsQuery, char_p_EQ_i,
182              NULL
183           );
184 }
185 
186 
187 // test cases for the matcher (in match-all mode)
188 // typedef struct { char* patt; char* input; Bool xres; } Test;
189 //
190 //static Test tests[] =
191 //  {
192 //    { ""          ,""   , True },
193 //    { "a"         ,""   , False },
194 //    { "a"         ,"b"  , False },
195 //    { "a"         ,"a"  , True },
196 //    { "a"         ,"aa" , False },
197 //    { "*"         ,""   , True },
198 //    { "**"        ,""   , True },
199 //    { "*"         ,"abc", True },
200 //    { "*a"        ,"abc", False },
201 //    { "*b"        ,"abc", False },
202 //    { "*bc"       ,"abc", True },
203 //    { "a*b"       ,"abc", False },
204 //    { "a*c"       ,"abc", True },
205 //    { "*c"        ,"abc", True },
206 //    { "c*c"       ,"abc", False },
207 //    { "abc*"      ,"abc", True },
208 //    { "abc**"     ,"abc", True },
209 //    { "**abc"     ,"abc", True },
210 //    { "**a*b*c**" ,"abc", True },
211 //    { "**a*b*d**" ,"abc", False },
212 //    { "a?b"       ,"abc", False },
213 //    { "a?c"       ,"abc", True },
214 //    { "?"         ,""   , False },
215 //    { "?"         ,"a"  , True },
216 //    { "?"         ,"ab" , False },
217 //    { "abcd"      ,"abc", False },
218 //    { "ab"        ,"abc", False },
219 //    { NULL        ,NULL , False }
220 //  };
221 //
222 //int main ( void )
223 //{
224 //   Test* t;
225 //   for (t = tests; t->patt; t++) {
226 //     printf("%10s %6s  %s\n",
227 //            t->patt, t->input,
228 //            match_string_all((UChar*)t->patt,(UChar*)t->input,True)
229 //            == t->xres
230 //               ? "pass" : "FAIL" );
231 //   }
232 //   return 0;
233 //}
234 
235 /*--------------------------------------------------------------------*/
236 /*--- end                                             m_seqmatch.c ---*/
237 /*--------------------------------------------------------------------*/
238