/*--------------------------------------------------------------------*/ /*--- A simple sequence matching facility. ---*/ /*--- m_seqmatch.c ---*/ /*--------------------------------------------------------------------*/ /* This file is part of Valgrind, a dynamic binary instrumentation framework. Copyright (C) 2008-2013 OpenWorks Ltd info@open-works.co.uk This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. The GNU General Public License is contained in the file COPYING. */ #include "pub_core_basics.h" #include "pub_core_libcassert.h" #include "pub_core_libcbase.h" // VG_(strlen) #include "pub_core_seqmatch.h" // self /* --------------------------------------------------------------------- A simple sequence matching facility ------------------------------------------------------------------ */ /* See detailed comment in include/pub_tool_seqmatch.h about this. */ Bool VG_(generic_match) ( Bool matchAll, const void* patt, SizeT szbPatt, UWord nPatt, UWord ixPatt, const void* input, SizeT szbInput, UWord nInput, UWord ixInput, Bool (*pIsStar)(const void*), Bool (*pIsQuery)(const void*), Bool (*pattEQinp)(const void*,const void*,void*,UWord), void* inputCompleter, Bool (*haveInputInpC)(void*,UWord) ) { /* This is the spec, written in my favourite formal specification language. It specifies non-greedy matching of '*'s. ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is ma ('*':ps) [] = ma ps [] ma ('?':ps) (i:is) = ma ps is ma ('?':ps) [] = False ma (p:ps) (i:is) = p == i && ma ps is ma (p:ps) [] = False ma [] (i:is) = False -- m-all, True for m-prefix ma [] [] = True */ Bool havePatt, haveInput; const HChar *currPatt, *currInput; tailcall: vg_assert(nPatt >= 0 && nPatt < 1000000); /* arbitrary */ vg_assert(inputCompleter || (nInput >= 0 && nInput < 1000000)); /* arbitrary */ vg_assert(ixPatt >= 0 && ixPatt <= nPatt); vg_assert(ixInput >= 0 && (inputCompleter || ixInput <= nInput)); havePatt = ixPatt < nPatt; haveInput = inputCompleter ? (*haveInputInpC)(inputCompleter, ixInput) : ixInput < nInput; /* No specific need to set NULL when !have{Patt,Input}, but guards against inadvertantly dereferencing an out of range pointer to the pattern or input arrays. */ currPatt = havePatt ? ((const HChar*)patt) + szbPatt * ixPatt : NULL; currInput = haveInput && !inputCompleter ? ((const HChar*)input) + szbInput * ixInput : NULL; // Deal with the complex case first: wildcards. Do frugal // matching. When encountering a '*', first skip no characters // at all, and see if the rest of the match still works. Only if // that fails do we then skip a character, and retry at the next // position. // // ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is // // If we're out of input, check the rest of the pattern matches // the empty input. This really means can only be be empty or // composed entirely of '*'s. // // ma ('*':ps) [] = ma ps [] // if (havePatt && pIsStar(currPatt)) { if (haveInput) { // ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is // we unavoidably have to make a real recursive call for the // first half of the OR, since this isn't straight tail-recursion. if (VG_(generic_match)( matchAll, patt, szbPatt, nPatt, ixPatt+1, input,szbInput,nInput, ixInput+0, pIsStar,pIsQuery,pattEQinp, inputCompleter,haveInputInpC) ) { return True; } // but we can tail-recurse for the second call ixInput++; goto tailcall; } else { // ma ('*':ps) [] = ma ps [] ixPatt++; goto tailcall; } } // simpler cases now. Deal with '?' wildcards. // // ma ('?':ps) (i:is) = ma ps is // ma ('?':ps) [] = False if (havePatt && pIsQuery(currPatt)) { if (haveInput) { ixPatt++; ixInput++; goto tailcall; } else { return False; } } // obvious case with literal chars in the pattern // // ma (p:ps) (i:is) = p == i && ma ps is if (havePatt && haveInput) { if (!pattEQinp(currPatt,currInput,inputCompleter,ixInput)) return False; ixPatt++; ixInput++; goto tailcall; } // if we run out of input before we run out of pattern, we must fail // ma (p:ps) [] = False if (havePatt && !haveInput) return False; // if we run out of pattern before we run out of input, the // verdict depends on the matching mode. If we are trying to // match exactly (the pattern must consume the entire input) // then the outcome is failure. However, if we're merely attempting // to match some prefix of the input, then we have been successful. // // ma [] (i:is) = False -- m-all, True for m-prefix if (!havePatt && haveInput) { return matchAll ? False // match-all : True; // match-prefix } // finally, if both sequence and input are both completely // consumed, then we were successful, regardless of matching mode. if (!havePatt && !haveInput) return True; // end of cases vg_assert(0); } /* And a parameterization of the above, to make it do string matching. */ static Bool charIsStar ( const void* pV ) { return *(const HChar*)pV == '*'; } static Bool charIsQuery ( const void* pV ) { return *(const HChar*)pV == '?'; } static Bool char_p_EQ_i ( const void* pV, const void* cV, void* null_completer, UWord ixcV ) { HChar p = *(const HChar*)pV; HChar c = *(const HChar*)cV; vg_assert(p != '*' && p != '?'); return p == c; } Bool VG_(string_match) ( const HChar* patt, const HChar* input ) { return VG_(generic_match)( True/* match-all */, patt, sizeof(HChar), VG_(strlen)(patt), 0, input, sizeof(HChar), VG_(strlen)(input), 0, charIsStar, charIsQuery, char_p_EQ_i, NULL, NULL ); } // test cases for the matcher (in match-all mode) // typedef struct { char* patt; char* input; Bool xres; } Test; // //static Test tests[] = // { // { "" ,"" , True }, // { "a" ,"" , False }, // { "a" ,"b" , False }, // { "a" ,"a" , True }, // { "a" ,"aa" , False }, // { "*" ,"" , True }, // { "**" ,"" , True }, // { "*" ,"abc", True }, // { "*a" ,"abc", False }, // { "*b" ,"abc", False }, // { "*bc" ,"abc", True }, // { "a*b" ,"abc", False }, // { "a*c" ,"abc", True }, // { "*c" ,"abc", True }, // { "c*c" ,"abc", False }, // { "abc*" ,"abc", True }, // { "abc**" ,"abc", True }, // { "**abc" ,"abc", True }, // { "**a*b*c**" ,"abc", True }, // { "**a*b*d**" ,"abc", False }, // { "a?b" ,"abc", False }, // { "a?c" ,"abc", True }, // { "?" ,"" , False }, // { "?" ,"a" , True }, // { "?" ,"ab" , False }, // { "abcd" ,"abc", False }, // { "ab" ,"abc", False }, // { NULL ,NULL , False } // }; // //int main ( void ) //{ // Test* t; // for (t = tests; t->patt; t++) { // printf("%-10s %-6s %s\n", // t->patt, t->input, // match_string_all((UChar*)t->patt,(UChar*)t->input,True) // == t->xres // ? "pass" : "FAIL" ); // } // return 0; //} /*--------------------------------------------------------------------*/ /*--- end m_seqmatch.c ---*/ /*--------------------------------------------------------------------*/