1 /*
2 * Copyright (C) 1999-2001, 2004 Harri Porten (porten@kde.org)
3 * Copyright (c) 2007, 2008 Apple Inc. All rights reserved.
4 * Copyright (C) 2009 Torch Mobile, Inc.
5 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 *
21 */
22
23 #include "config.h"
24 #include "RegExp.h"
25
26 #include "Lexer.h"
27 #include "yarr/Yarr.h"
28 #include "yarr/YarrJIT.h"
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <wtf/Assertions.h>
33 #include <wtf/OwnArrayPtr.h>
34
35 namespace JSC {
36
regExpFlags(const UString & string)37 RegExpFlags regExpFlags(const UString& string)
38 {
39 RegExpFlags flags = NoFlags;
40
41 for (unsigned i = 0; i < string.length(); ++i) {
42 switch (string.characters()[i]) {
43 case 'g':
44 if (flags & FlagGlobal)
45 return InvalidFlags;
46 flags = static_cast<RegExpFlags>(flags | FlagGlobal);
47 break;
48
49 case 'i':
50 if (flags & FlagIgnoreCase)
51 return InvalidFlags;
52 flags = static_cast<RegExpFlags>(flags | FlagIgnoreCase);
53 break;
54
55 case 'm':
56 if (flags & FlagMultiline)
57 return InvalidFlags;
58 flags = static_cast<RegExpFlags>(flags | FlagMultiline);
59 break;
60
61 default:
62 return InvalidFlags;
63 }
64 }
65
66 return flags;
67 }
68
69 struct RegExpRepresentation {
70 #if ENABLE(YARR_JIT)
71 Yarr::YarrCodeBlock m_regExpJITCode;
72 #endif
73 OwnPtr<Yarr::BytecodePattern> m_regExpBytecode;
74 };
75
RegExp(JSGlobalData * globalData,const UString & patternString,RegExpFlags flags)76 inline RegExp::RegExp(JSGlobalData* globalData, const UString& patternString, RegExpFlags flags)
77 : m_patternString(patternString)
78 , m_flags(flags)
79 , m_constructionError(0)
80 , m_numSubpatterns(0)
81 #if ENABLE(REGEXP_TRACING)
82 , m_rtMatchCallCount(0)
83 , m_rtMatchFoundCount(0)
84 #endif
85 , m_representation(adoptPtr(new RegExpRepresentation))
86 {
87 m_state = compile(globalData);
88 }
89
~RegExp()90 RegExp::~RegExp()
91 {
92 }
93
create(JSGlobalData * globalData,const UString & patternString,RegExpFlags flags)94 PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& patternString, RegExpFlags flags)
95 {
96 RefPtr<RegExp> res = adoptRef(new RegExp(globalData, patternString, flags));
97 #if ENABLE(REGEXP_TRACING)
98 globalData->addRegExpToTrace(res);
99 #endif
100 return res.release();
101 }
102
compile(JSGlobalData * globalData)103 RegExp::RegExpState RegExp::compile(JSGlobalData* globalData)
104 {
105 Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError);
106 if (m_constructionError)
107 return ParseError;
108
109 m_numSubpatterns = pattern.m_numSubpatterns;
110
111 RegExpState res = ByteCode;
112
113 #if ENABLE(YARR_JIT)
114 if (!pattern.m_containsBackreferences && globalData->canUseJIT()) {
115 Yarr::jitCompile(pattern, globalData, m_representation->m_regExpJITCode);
116 #if ENABLE(YARR_JIT_DEBUG)
117 if (!m_representation->m_regExpJITCode.isFallBack())
118 res = JITCode;
119 else
120 res = ByteCode;
121 #else
122 if (!m_representation->m_regExpJITCode.isFallBack())
123 return JITCode;
124 #endif
125 }
126 #endif
127
128 m_representation->m_regExpBytecode = Yarr::byteCompile(pattern, &globalData->m_regExpAllocator);
129
130 return res;
131 }
132
match(const UString & s,int startOffset,Vector<int,32> * ovector)133 int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
134 {
135 if (startOffset < 0)
136 startOffset = 0;
137
138 #if ENABLE(REGEXP_TRACING)
139 m_rtMatchCallCount++;
140 #endif
141
142 if (static_cast<unsigned>(startOffset) > s.length() || s.isNull())
143 return -1;
144
145 if (m_state != ParseError) {
146 int offsetVectorSize = (m_numSubpatterns + 1) * 2;
147 int* offsetVector;
148 Vector<int, 32> nonReturnedOvector;
149 if (ovector) {
150 ovector->resize(offsetVectorSize);
151 offsetVector = ovector->data();
152 } else {
153 nonReturnedOvector.resize(offsetVectorSize);
154 offsetVector = nonReturnedOvector.data();
155 }
156
157 ASSERT(offsetVector);
158 // Initialize offsetVector with the return value (index 0) and the
159 // first subpattern start indicies (even index values) set to -1.
160 // No need to init the subpattern end indicies.
161 for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++)
162 offsetVector[j] = -1;
163
164 int result;
165 #if ENABLE(YARR_JIT)
166 if (m_state == JITCode) {
167 result = Yarr::execute(m_representation->m_regExpJITCode, s.characters(), startOffset, s.length(), offsetVector);
168 #if ENABLE(YARR_JIT_DEBUG)
169 matchCompareWithInterpreter(s, startOffset, offsetVector, result);
170 #endif
171 } else
172 #endif
173 result = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), offsetVector);
174 ASSERT(result >= -1);
175
176 #if ENABLE(REGEXP_TRACING)
177 if (result != -1)
178 m_rtMatchFoundCount++;
179 #endif
180
181 return result;
182 }
183
184 return -1;
185 }
186
187
188 #if ENABLE(YARR_JIT_DEBUG)
matchCompareWithInterpreter(const UString & s,int startOffset,int * offsetVector,int jitResult)189 void RegExp::matchCompareWithInterpreter(const UString& s, int startOffset, int* offsetVector, int jitResult)
190 {
191 int offsetVectorSize = (m_numSubpatterns + 1) * 2;
192 Vector<int, 32> interpreterOvector;
193 interpreterOvector.resize(offsetVectorSize);
194 int* interpreterOffsetVector = interpreterOvector.data();
195 int interpreterResult = 0;
196 int differences = 0;
197
198 // Initialize interpreterOffsetVector with the return value (index 0) and the
199 // first subpattern start indicies (even index values) set to -1.
200 // No need to init the subpattern end indicies.
201 for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++)
202 interpreterOffsetVector[j] = -1;
203
204 interpreterResult = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), interpreterOffsetVector);
205
206 if (jitResult != interpreterResult)
207 differences++;
208
209 for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++)
210 if ((offsetVector[j] != interpreterOffsetVector[j])
211 || ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1])))
212 differences++;
213
214 if (differences) {
215 fprintf(stderr, "RegExp Discrepency for /%s/\n string input ", pattern().utf8().data());
216 unsigned segmentLen = s.length() - static_cast<unsigned>(startOffset);
217
218 fprintf(stderr, (segmentLen < 150) ? "\"%s\"\n" : "\"%148s...\"\n", s.utf8().data() + startOffset);
219
220 if (jitResult != interpreterResult) {
221 fprintf(stderr, " JIT result = %d, blah interpreted result = %d\n", jitResult, interpreterResult);
222 differences--;
223 } else {
224 fprintf(stderr, " Correct result = %d\n", jitResult);
225 }
226
227 if (differences) {
228 for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++) {
229 if (offsetVector[j] != interpreterOffsetVector[j])
230 fprintf(stderr, " JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j, offsetVector[j], j, interpreterOffsetVector[j]);
231 if ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1]))
232 fprintf(stderr, " JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j+1, offsetVector[j+1], j+1, interpreterOffsetVector[j+1]);
233 }
234 }
235 }
236 }
237 #endif
238
239 #if ENABLE(REGEXP_TRACING)
printTraceData()240 void RegExp::printTraceData()
241 {
242 char formattedPattern[41];
243 char rawPattern[41];
244
245 strncpy(rawPattern, pattern().utf8().data(), 40);
246 rawPattern[40]= '\0';
247
248 int pattLen = strlen(rawPattern);
249
250 snprintf(formattedPattern, 41, (pattLen <= 38) ? "/%.38s/" : "/%.36s...", rawPattern);
251
252 #if ENABLE(YARR_JIT)
253 Yarr::YarrCodeBlock& codeBlock = m_representation->m_regExpJITCode;
254
255 const size_t jitAddrSize = 20;
256 char jitAddr[jitAddrSize];
257 if (m_state == JITCode)
258 snprintf(jitAddr, jitAddrSize, "fallback");
259 else
260 snprintf(jitAddr, jitAddrSize, "0x%014lx", reinterpret_cast<unsigned long int>(codeBlock.getAddr()));
261 #else
262 const char* jitAddr = "JIT Off";
263 #endif
264
265 printf("%-40.40s %16.16s %10d %10d\n", formattedPattern, jitAddr, m_rtMatchCallCount, m_rtMatchFoundCount);
266 }
267 #endif
268
269 } // namespace JSC
270