• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (C) 1999-2001, 2004 Harri Porten (porten@kde.org)
3  *  Copyright (c) 2007, 2008 Apple Inc. All rights reserved.
4  *  Copyright (C) 2009 Torch Mobile, Inc.
5  *
6  *  This library is free software; you can redistribute it and/or
7  *  modify it under the terms of the GNU Lesser General Public
8  *  License as published by the Free Software Foundation; either
9  *  version 2 of the License, or (at your option) any later version.
10  *
11  *  This library is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  *  Lesser General Public License for more details.
15  *
16  *  You should have received a copy of the GNU Lesser General Public
17  *  License along with this library; if not, write to the Free Software
18  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  */
21 
22 #include "config.h"
23 #include "RegExp.h"
24 #include "Lexer.h"
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <wtf/Assertions.h>
29 #include <wtf/OwnArrayPtr.h>
30 
31 
32 #if ENABLE(YARR)
33 
34 #include "yarr/RegexCompiler.h"
35 #if ENABLE(YARR_JIT)
36 #include "yarr/RegexJIT.h"
37 #else
38 #include "yarr/RegexInterpreter.h"
39 #endif
40 
41 #else
42 
43 #if ENABLE(WREC)
44 #include "JIT.h"
45 #include "WRECGenerator.h"
46 #endif
47 #include <pcre/pcre.h>
48 
49 #endif
50 
51 namespace JSC {
52 
53 #if ENABLE(WREC)
54 using namespace WREC;
55 #endif
56 
RegExp(JSGlobalData * globalData,const UString & pattern)57 inline RegExp::RegExp(JSGlobalData* globalData, const UString& pattern)
58     : m_pattern(pattern)
59     , m_flagBits(0)
60     , m_constructionError(0)
61     , m_numSubpatterns(0)
62 {
63     compile(globalData);
64 }
65 
RegExp(JSGlobalData * globalData,const UString & pattern,const UString & flags)66 inline RegExp::RegExp(JSGlobalData* globalData, const UString& pattern, const UString& flags)
67     : m_pattern(pattern)
68     , m_flagBits(0)
69     , m_constructionError(0)
70     , m_numSubpatterns(0)
71 {
72     // NOTE: The global flag is handled on a case-by-case basis by functions like
73     // String::match and RegExpObject::match.
74     if (flags.find('g') != -1)
75         m_flagBits |= Global;
76     if (flags.find('i') != -1)
77         m_flagBits |= IgnoreCase;
78     if (flags.find('m') != -1)
79         m_flagBits |= Multiline;
80 
81     compile(globalData);
82 }
83 
84 #if !ENABLE(YARR)
~RegExp()85 RegExp::~RegExp()
86 {
87     jsRegExpFree(m_regExp);
88 }
89 #endif
90 
create(JSGlobalData * globalData,const UString & pattern)91 PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& pattern)
92 {
93     return adoptRef(new RegExp(globalData, pattern));
94 }
95 
create(JSGlobalData * globalData,const UString & pattern,const UString & flags)96 PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& pattern, const UString& flags)
97 {
98     return adoptRef(new RegExp(globalData, pattern, flags));
99 }
100 
101 #if ENABLE(YARR)
102 
compile(JSGlobalData * globalData)103 void RegExp::compile(JSGlobalData* globalData)
104 {
105 #if ENABLE(YARR_JIT)
106     Yarr::jitCompileRegex(globalData, m_regExpJITCode, m_pattern, m_numSubpatterns, m_constructionError, ignoreCase(), multiline());
107 #else
108     UNUSED_PARAM(globalData);
109     m_regExpBytecode.set(Yarr::byteCompileRegex(m_pattern, m_numSubpatterns, m_constructionError, ignoreCase(), multiline()));
110 #endif
111 }
112 
match(const UString & s,int startOffset,Vector<int,32> * ovector)113 int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
114 {
115     if (startOffset < 0)
116         startOffset = 0;
117     if (ovector)
118         ovector->clear();
119 
120     if (startOffset > s.size() || s.isNull())
121         return -1;
122 
123 #if ENABLE(YARR_JIT)
124     if (!!m_regExpJITCode) {
125 #else
126     if (m_regExpBytecode) {
127 #endif
128         int offsetVectorSize = (m_numSubpatterns + 1) * 3; // FIXME: should be 2 - but adding temporary fallback to pcre.
129         int* offsetVector;
130         Vector<int, 32> nonReturnedOvector;
131         if (ovector) {
132             ovector->resize(offsetVectorSize);
133             offsetVector = ovector->data();
134         } else {
135             nonReturnedOvector.resize(offsetVectorSize);
136             offsetVector = nonReturnedOvector.data();
137         }
138 
139         ASSERT(offsetVector);
140         for (int j = 0; j < offsetVectorSize; ++j)
141             offsetVector[j] = -1;
142 
143 
144 #if ENABLE(YARR_JIT)
145         int result = Yarr::executeRegex(m_regExpJITCode, s.data(), startOffset, s.size(), offsetVector, offsetVectorSize);
146 #else
147         int result = Yarr::interpretRegex(m_regExpBytecode.get(), s.data(), startOffset, s.size(), offsetVector);
148 #endif
149 
150         if (result < 0) {
151 #ifndef NDEBUG
152             // TODO: define up a symbol, rather than magic -1
153             if (result != -1)
154                 fprintf(stderr, "jsRegExpExecute failed with result %d\n", result);
155 #endif
156             if (ovector)
157                 ovector->clear();
158         }
159         return result;
160     }
161 
162     return -1;
163 }
164 
165 #else
166 
167 void RegExp::compile(JSGlobalData* globalData)
168 {
169     m_regExp = 0;
170 #if ENABLE(WREC)
171     m_wrecFunction = Generator::compileRegExp(globalData, m_pattern, &m_numSubpatterns, &m_constructionError, m_executablePool, ignoreCase(), multiline());
172     if (m_wrecFunction || m_constructionError)
173         return;
174     // Fall through to non-WREC case.
175 #else
176     UNUSED_PARAM(globalData);
177 #endif
178 
179     JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase() ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase;
180     JSRegExpMultilineOption multilineOption = multiline() ? JSRegExpMultiline : JSRegExpSingleLine;
181     m_regExp = jsRegExpCompile(reinterpret_cast<const UChar*>(m_pattern.data()), m_pattern.size(), ignoreCaseOption, multilineOption, &m_numSubpatterns, &m_constructionError);
182 }
183 
184 int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
185 {
186     if (startOffset < 0)
187         startOffset = 0;
188     if (ovector)
189         ovector->clear();
190 
191     if (startOffset > s.size() || s.isNull())
192         return -1;
193 
194 #if ENABLE(WREC)
195     if (m_wrecFunction) {
196         int offsetVectorSize = (m_numSubpatterns + 1) * 2;
197         int* offsetVector;
198         Vector<int, 32> nonReturnedOvector;
199         if (ovector) {
200             ovector->resize(offsetVectorSize);
201             offsetVector = ovector->data();
202         } else {
203             nonReturnedOvector.resize(offsetVectorSize);
204             offsetVector = nonReturnedOvector.data();
205         }
206         ASSERT(offsetVector);
207         for (int j = 0; j < offsetVectorSize; ++j)
208             offsetVector[j] = -1;
209 
210         int result = m_wrecFunction(s.data(), startOffset, s.size(), offsetVector);
211 
212         if (result < 0) {
213 #ifndef NDEBUG
214             // TODO: define up a symbol, rather than magic -1
215             if (result != -1)
216                 fprintf(stderr, "jsRegExpExecute failed with result %d\n", result);
217 #endif
218             if (ovector)
219                 ovector->clear();
220         }
221         return result;
222     } else
223 #endif
224     if (m_regExp) {
225         // Set up the offset vector for the result.
226         // First 2/3 used for result, the last third used by PCRE.
227         int* offsetVector;
228         int offsetVectorSize;
229         int fixedSizeOffsetVector[3];
230         if (!ovector) {
231             offsetVectorSize = 3;
232             offsetVector = fixedSizeOffsetVector;
233         } else {
234             offsetVectorSize = (m_numSubpatterns + 1) * 3;
235             ovector->resize(offsetVectorSize);
236             offsetVector = ovector->data();
237         }
238 
239         int numMatches = jsRegExpExecute(m_regExp, reinterpret_cast<const UChar*>(s.data()), s.size(), startOffset, offsetVector, offsetVectorSize);
240 
241         if (numMatches < 0) {
242 #ifndef NDEBUG
243             if (numMatches != JSRegExpErrorNoMatch)
244                 fprintf(stderr, "jsRegExpExecute failed with result %d\n", numMatches);
245 #endif
246             if (ovector)
247                 ovector->clear();
248             return -1;
249         }
250 
251         return offsetVector[0];
252     }
253 
254     return -1;
255 }
256 
257 #endif
258 
259 } // namespace JSC
260