• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /*
2   *  Copyright (C) 1999-2001, 2004 Harri Porten (porten@kde.org)
3   *  Copyright (c) 2007, 2008 Apple Inc. All rights reserved.
4   *  Copyright (C) 2009 Torch Mobile, Inc.
5   *  Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
6   *
7   *  This library is free software; you can redistribute it and/or
8   *  modify it under the terms of the GNU Lesser General Public
9   *  License as published by the Free Software Foundation; either
10   *  version 2 of the License, or (at your option) any later version.
11   *
12   *  This library is distributed in the hope that it will be useful,
13   *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14   *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   *  Lesser General Public License for more details.
16   *
17   *  You should have received a copy of the GNU Lesser General Public
18   *  License along with this library; if not, write to the Free Software
19   *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
20   *
21   */
22  
23  #include "config.h"
24  #include "RegExp.h"
25  
26  #include "Lexer.h"
27  #include "yarr/Yarr.h"
28  #include "yarr/YarrJIT.h"
29  #include <stdio.h>
30  #include <stdlib.h>
31  #include <string.h>
32  #include <wtf/Assertions.h>
33  #include <wtf/OwnArrayPtr.h>
34  
35  namespace JSC {
36  
regExpFlags(const UString & string)37  RegExpFlags regExpFlags(const UString& string)
38  {
39      RegExpFlags flags = NoFlags;
40  
41      for (unsigned i = 0; i < string.length(); ++i) {
42          switch (string.characters()[i]) {
43          case 'g':
44              if (flags & FlagGlobal)
45                  return InvalidFlags;
46              flags = static_cast<RegExpFlags>(flags | FlagGlobal);
47              break;
48  
49          case 'i':
50              if (flags & FlagIgnoreCase)
51                  return InvalidFlags;
52              flags = static_cast<RegExpFlags>(flags | FlagIgnoreCase);
53              break;
54  
55          case 'm':
56              if (flags & FlagMultiline)
57                  return InvalidFlags;
58              flags = static_cast<RegExpFlags>(flags | FlagMultiline);
59              break;
60  
61          default:
62              return InvalidFlags;
63          }
64      }
65  
66      return flags;
67  }
68  
69  struct RegExpRepresentation {
70  #if ENABLE(YARR_JIT)
71      Yarr::YarrCodeBlock m_regExpJITCode;
72  #endif
73      OwnPtr<Yarr::BytecodePattern> m_regExpBytecode;
74  };
75  
RegExp(JSGlobalData * globalData,const UString & patternString,RegExpFlags flags)76  inline RegExp::RegExp(JSGlobalData* globalData, const UString& patternString, RegExpFlags flags)
77      : m_patternString(patternString)
78      , m_flags(flags)
79      , m_constructionError(0)
80      , m_numSubpatterns(0)
81  #if ENABLE(REGEXP_TRACING)
82      , m_rtMatchCallCount(0)
83      , m_rtMatchFoundCount(0)
84  #endif
85      , m_representation(adoptPtr(new RegExpRepresentation))
86  {
87      m_state = compile(globalData);
88  }
89  
~RegExp()90  RegExp::~RegExp()
91  {
92  }
93  
create(JSGlobalData * globalData,const UString & patternString,RegExpFlags flags)94  PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& patternString, RegExpFlags flags)
95  {
96      RefPtr<RegExp> res = adoptRef(new RegExp(globalData, patternString, flags));
97  #if ENABLE(REGEXP_TRACING)
98      globalData->addRegExpToTrace(res);
99  #endif
100      return res.release();
101  }
102  
compile(JSGlobalData * globalData)103  RegExp::RegExpState RegExp::compile(JSGlobalData* globalData)
104  {
105      Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError);
106      if (m_constructionError)
107          return ParseError;
108  
109      m_numSubpatterns = pattern.m_numSubpatterns;
110  
111      RegExpState res = ByteCode;
112  
113  #if ENABLE(YARR_JIT)
114      if (!pattern.m_containsBackreferences && globalData->canUseJIT()) {
115          Yarr::jitCompile(pattern, globalData, m_representation->m_regExpJITCode);
116  #if ENABLE(YARR_JIT_DEBUG)
117          if (!m_representation->m_regExpJITCode.isFallBack())
118              res = JITCode;
119          else
120              res = ByteCode;
121  #else
122          if (!m_representation->m_regExpJITCode.isFallBack())
123              return JITCode;
124  #endif
125      }
126  #endif
127  
128      m_representation->m_regExpBytecode = Yarr::byteCompile(pattern, &globalData->m_regExpAllocator);
129  
130      return res;
131  }
132  
match(const UString & s,int startOffset,Vector<int,32> * ovector)133  int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
134  {
135      if (startOffset < 0)
136          startOffset = 0;
137  
138  #if ENABLE(REGEXP_TRACING)
139      m_rtMatchCallCount++;
140  #endif
141  
142      if (static_cast<unsigned>(startOffset) > s.length() || s.isNull())
143          return -1;
144  
145      if (m_state != ParseError) {
146          int offsetVectorSize = (m_numSubpatterns + 1) * 2;
147          int* offsetVector;
148          Vector<int, 32> nonReturnedOvector;
149          if (ovector) {
150              ovector->resize(offsetVectorSize);
151              offsetVector = ovector->data();
152          } else {
153              nonReturnedOvector.resize(offsetVectorSize);
154              offsetVector = nonReturnedOvector.data();
155          }
156  
157          ASSERT(offsetVector);
158          // Initialize offsetVector with the return value (index 0) and the
159          // first subpattern start indicies (even index values) set to -1.
160          // No need to init the subpattern end indicies.
161          for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++)
162              offsetVector[j] = -1;
163  
164          int result;
165  #if ENABLE(YARR_JIT)
166          if (m_state == JITCode) {
167              result = Yarr::execute(m_representation->m_regExpJITCode, s.characters(), startOffset, s.length(), offsetVector);
168  #if ENABLE(YARR_JIT_DEBUG)
169              matchCompareWithInterpreter(s, startOffset, offsetVector, result);
170  #endif
171          } else
172  #endif
173              result = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), offsetVector);
174          ASSERT(result >= -1);
175  
176  #if ENABLE(REGEXP_TRACING)
177          if (result != -1)
178              m_rtMatchFoundCount++;
179  #endif
180  
181          return result;
182      }
183  
184      return -1;
185  }
186  
187  
188  #if ENABLE(YARR_JIT_DEBUG)
matchCompareWithInterpreter(const UString & s,int startOffset,int * offsetVector,int jitResult)189  void RegExp::matchCompareWithInterpreter(const UString& s, int startOffset, int* offsetVector, int jitResult)
190  {
191      int offsetVectorSize = (m_numSubpatterns + 1) * 2;
192      Vector<int, 32> interpreterOvector;
193      interpreterOvector.resize(offsetVectorSize);
194      int* interpreterOffsetVector = interpreterOvector.data();
195      int interpreterResult = 0;
196      int differences = 0;
197  
198      // Initialize interpreterOffsetVector with the return value (index 0) and the
199      // first subpattern start indicies (even index values) set to -1.
200      // No need to init the subpattern end indicies.
201      for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++)
202          interpreterOffsetVector[j] = -1;
203  
204      interpreterResult = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), interpreterOffsetVector);
205  
206      if (jitResult != interpreterResult)
207          differences++;
208  
209      for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++)
210          if ((offsetVector[j] != interpreterOffsetVector[j])
211              || ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1])))
212              differences++;
213  
214      if (differences) {
215          fprintf(stderr, "RegExp Discrepency for /%s/\n    string input ", pattern().utf8().data());
216          unsigned segmentLen = s.length() - static_cast<unsigned>(startOffset);
217  
218          fprintf(stderr, (segmentLen < 150) ? "\"%s\"\n" : "\"%148s...\"\n", s.utf8().data() + startOffset);
219  
220          if (jitResult != interpreterResult) {
221              fprintf(stderr, "    JIT result = %d, blah interpreted result = %d\n", jitResult, interpreterResult);
222              differences--;
223          } else {
224              fprintf(stderr, "    Correct result = %d\n", jitResult);
225          }
226  
227          if (differences) {
228              for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++) {
229                  if (offsetVector[j] != interpreterOffsetVector[j])
230                      fprintf(stderr, "    JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j, offsetVector[j], j, interpreterOffsetVector[j]);
231                  if ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1]))
232                      fprintf(stderr, "    JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j+1, offsetVector[j+1], j+1, interpreterOffsetVector[j+1]);
233              }
234          }
235      }
236  }
237  #endif
238  
239  #if ENABLE(REGEXP_TRACING)
printTraceData()240      void RegExp::printTraceData()
241      {
242          char formattedPattern[41];
243          char rawPattern[41];
244  
245          strncpy(rawPattern, pattern().utf8().data(), 40);
246          rawPattern[40]= '\0';
247  
248          int pattLen = strlen(rawPattern);
249  
250          snprintf(formattedPattern, 41, (pattLen <= 38) ? "/%.38s/" : "/%.36s...", rawPattern);
251  
252  #if ENABLE(YARR_JIT)
253          Yarr::YarrCodeBlock& codeBlock = m_representation->m_regExpJITCode;
254  
255          const size_t jitAddrSize = 20;
256          char jitAddr[jitAddrSize];
257          if (m_state == JITCode)
258              snprintf(jitAddr, jitAddrSize, "fallback");
259          else
260              snprintf(jitAddr, jitAddrSize, "0x%014lx", reinterpret_cast<unsigned long int>(codeBlock.getAddr()));
261  #else
262          const char* jitAddr = "JIT Off";
263  #endif
264  
265          printf("%-40.40s %16.16s %10d %10d\n", formattedPattern, jitAddr, m_rtMatchCallCount, m_rtMatchFoundCount);
266      }
267  #endif
268  
269  } // namespace JSC
270