• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef ECMASCRIPT_BUILTINS_BUILTINS_REGEXP_H
17 #define ECMASCRIPT_BUILTINS_BUILTINS_REGEXP_H
18 
19 #include "ecmascript/base/builtins_base.h"
20 #include "ecmascript/builtins/builtins_string.h"
21 #include "ecmascript/ecma_runtime_call_info.h"
22 #include "ecmascript/js_tagged_value.h"
23 #include "ecmascript/regexp/regexp_executor.h"
24 #include "ecmascript/regexp/regexp_parser.h"
25 
26 namespace panda::ecmascript::builtins {
27 class BuiltinsRegExp : public base::BuiltinsBase {
28 public:
29     // 21.2.3.1 RegExp ( pattern, flags )
30     static JSTaggedValue RegExpConstructor(EcmaRuntimeCallInfo *argv);
31 
32     // prototype
33     // 21.2.5.2 RegExp.prototype.exec ( string )
34     static JSTaggedValue Exec(EcmaRuntimeCallInfo *argv);
35     // 21.2.5.13 RegExp.prototype.test( S )
36     static JSTaggedValue Test(EcmaRuntimeCallInfo *argv);
37     // 21.2.5.14 RegExp.prototype.toString ( )
38     static JSTaggedValue ToString(EcmaRuntimeCallInfo *argv);
39     // 21.2.5.3 get RegExp.prototype.flags
40     static JSTaggedValue GetFlags(EcmaRuntimeCallInfo *argv);
41     // 21.2.5.4 get RegExp.prototype.global
42     static JSTaggedValue GetGlobal(EcmaRuntimeCallInfo *argv);
43     // 21.2.5.5 get RegExp.prototype.ignoreCase
44     static JSTaggedValue GetIgnoreCase(EcmaRuntimeCallInfo *argv);
45     // 21.2.5.7 get RegExp.prototype.multiline
46     static JSTaggedValue GetMultiline(EcmaRuntimeCallInfo *argv);
47     static JSTaggedValue GetDotAll(EcmaRuntimeCallInfo *argv);
48     // 21.2.5.10 get RegExp.prototype.source
49     static JSTaggedValue GetSource(EcmaRuntimeCallInfo *argv);
50     // 21.2.5.12 get RegExp.prototype.sticky
51     static JSTaggedValue GetSticky(EcmaRuntimeCallInfo *argv);
52     // 21.2.5.15 get RegExp.prototype.unicode
53     static JSTaggedValue GetUnicode(EcmaRuntimeCallInfo *argv);
54     // 21.2.4.2 get RegExp [ @@species ]
55     static JSTaggedValue GetSpecies(EcmaRuntimeCallInfo *argv);
56     // 21.2.5.6 RegExp.prototype [ @@match ] ( string )
57     static JSTaggedValue Match(EcmaRuntimeCallInfo *argv);
58     // 22.2.5.8 RegExp.prototype [ @@matchAll ] ( string )
59     static JSTaggedValue MatchAll(EcmaRuntimeCallInfo *argv);
60     // 21.2.5.8 RegExp.prototype [ @@replace ] ( string, replaceValue )
61     static JSTaggedValue Replace(EcmaRuntimeCallInfo *argv);
62     // 21.2.5.9 RegExp.prototype [ @@search ] ( string )
63     static JSTaggedValue Search(EcmaRuntimeCallInfo *argv);
64     // 21.2.5.11 RegExp.prototype [ @@split ] ( string, limit )
65     static JSTaggedValue Split(EcmaRuntimeCallInfo *argv);
66     // 21.2.3.2.3 Runtime Semantics: RegExpCreate ( P, F )
67     static JSTaggedValue RegExpCreate(JSThread *thread, const JSHandle<JSTaggedValue> &pattern,
68                                       const JSHandle<JSTaggedValue> &flags);
69     static JSTaggedValue FlagsBitsToString(JSThread *thread, uint8_t flags);
70     // 21.2.5.2.1 Runtime Semantics: RegExpExec ( R, S )
71     static JSTaggedValue RegExpExec(JSThread *thread, const JSHandle<JSTaggedValue> &regexp,
72                                     const JSHandle<JSTaggedValue> &inputString, bool useCache);
73     // 21.2.5.2.3 AdvanceStringIndex ( S, index, unicode )
74     static int64_t AdvanceStringIndex(const JSHandle<JSTaggedValue> &inputStr, int64_t index,
75                                       bool unicode);
76     // 22.2.6.6 get RegExp.prototype.hasIndices
77     static JSTaggedValue GetHasIndices(EcmaRuntimeCallInfo *argv);
78 
79     static JSTaggedValue ReplaceInternal(JSThread *thread,
80                                          JSHandle<JSTaggedValue> thisObj,
81                                          JSHandle<JSTaggedValue> string,
82                                          JSHandle<JSTaggedValue> inputReplaceValue);
83     static JSTaggedValue GetAllFlagsInternal(JSThread *thread, JSHandle<JSTaggedValue> &thisObj);
84     static JSTaggedValue IsValidRegularExpression(JSThread *thread, JSHandle<JSTaggedValue> &thisObj);
85 // NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
86 #define SET_GET_CAPTURE(index)                                                                                \
87     static JSTaggedValue GetCapture##index(JSThread *thread, const JSHandle<JSObject> &obj);                  \
88     static bool SetCapture##index(JSThread *thread, const JSHandle<JSObject> &obj,                            \
89                                  const JSHandle<JSTaggedValue> &value, bool mayThrow);
90 
91     SET_GET_CAPTURE(1)
92     SET_GET_CAPTURE(2)
93     SET_GET_CAPTURE(3)
94     SET_GET_CAPTURE(4)
95     SET_GET_CAPTURE(5)
96     SET_GET_CAPTURE(6)
97     SET_GET_CAPTURE(7)
98     SET_GET_CAPTURE(8)
99     SET_GET_CAPTURE(9)
100 #undef SET_GET_CAPTURE
101 
102 private:
103     static constexpr uint32_t MIN_REPLACE_STRING_LENGTH = 1000;
104     static constexpr uint32_t MAX_SPLIT_LIMIT = 0xFFFFFFFFu;
105     static constexpr uint32_t REGEXP_GLOBAL_ARRAY_SIZE = 9;
106     static constexpr uint32_t LAST_INDEX_OFFSET = 0;
107     static constexpr uint32_t MAX_REGEXP_STRING_COUNT = 1U << 16;
108 
109     static bool Matcher(JSThread *thread, const JSHandle<JSTaggedValue> &regexp,
110                         const uint8_t *buffer, size_t length, int32_t lastindex, bool isUtf16);
111 
112     static JSTaggedValue GetFlagsInternal(JSThread *thread, const JSHandle<JSTaggedValue> &obj,
113                                           const JSHandle<JSTaggedValue> &constructor, const uint8_t mask);
114     // 21.2.5.2.2 Runtime Semantics: RegExpBuiltinExec ( R, S )
115     static JSTaggedValue RegExpBuiltinExec(JSThread *thread, const JSHandle<JSTaggedValue> &regexp,
116                                            const JSHandle<JSTaggedValue> &inputStr, bool useCache);
117 
118     // 21.2.3.2.1 Runtime Semantics: RegExpAlloc ( newTarget )
119     static JSTaggedValue RegExpAlloc(JSThread *thread, const JSHandle<JSTaggedValue> &newTarget);
120 
121     static uint32_t UpdateExpressionFlags(JSThread *thread, const CString &checkStr);
122 
123     // 21.2.3.2.2 Runtime Semantics: RegExpInitialize ( obj, pattern, flags )
124     static JSTaggedValue RegExpInitialize(JSThread *thread, const JSHandle<JSTaggedValue> &obj,
125                                           const JSHandle<JSTaggedValue> &pattern, const JSHandle<JSTaggedValue> &flags);
126     // 21.2.3.2.4 Runtime Semantics: EscapeRegExpPattern ( P, F )
127     static EcmaString *EscapeRegExpPattern(JSThread *thread, const JSHandle<JSTaggedValue> &src,
128                                            const JSHandle<JSTaggedValue> &flags);
129     static JSTaggedValue RegExpReplaceFast(JSThread *thread, JSHandle<JSTaggedValue> &regexp,
130                                            JSHandle<EcmaString> inputString, uint32_t inputLength);
131     static JSTaggedValue RegExpTestFast(JSThread *thread, JSHandle<JSTaggedValue> &regexp,
132                                         const JSHandle<JSTaggedValue> &inputString, bool useCache);
133     static JSTaggedValue RegExpExecForTestFast(JSThread *thread, JSHandle<JSTaggedValue> &regexp,
134                                                const JSHandle<JSTaggedValue> &inputStr, bool useCache);
135     static bool IsFastRegExp(JSThread *thread, JSHandle<JSTaggedValue> &regexp);
136     // 22.2.7.8 MakeMatchIndicesIndexPairArray ( S, indices, groupNames, hasGroups )
137     static JSHandle<JSTaggedValue> MakeMatchIndicesIndexPairArray(JSThread* thread,
138         const std::vector<std::pair<JSTaggedValue, JSTaggedValue>>& indices,
139         const std::vector<JSHandle<JSTaggedValue>>& groupNames, bool hasGroups);
140     static bool RegExpExecInternal(JSThread *thread, const JSHandle<JSTaggedValue> &regexp,
141                                    JSHandle<EcmaString> &inputString, int32_t lastIndex);
142     static JSTaggedValue RegExpSplitFast(JSThread *thread, const JSHandle<JSTaggedValue> &regexp,
143                                          JSHandle<EcmaString> string, uint32_t limit, bool useCache);
144 };
145 
146 class RegExpExecResultCache : public TaggedArray {
147 public:
148     enum CacheType {
149         REPLACE_TYPE,
150         SPLIT_TYPE,
151         MATCH_TYPE,
152         EXEC_TYPE,
153         INTERMEDIATE_REPLACE_TYPE,
154         TEST_TYPE
155     };
Cast(TaggedObject * object)156     static RegExpExecResultCache *Cast(TaggedObject *object)
157     {
158         return reinterpret_cast<RegExpExecResultCache *>(object);
159     }
160     static JSTaggedValue CreateCacheTable(JSThread *thread);
161     // extend as an additional parameter to judge cached
162     JSTaggedValue FindCachedResult(JSThread *thread, const JSHandle<JSTaggedValue> &patten,
163                                    const JSHandle<JSTaggedValue> &flags, const JSHandle<JSTaggedValue> &input,
164                                    CacheType type, const JSHandle<JSTaggedValue> &regexp,
165                                    JSTaggedValue lastIndexInput, JSTaggedValue extend = JSTaggedValue::Undefined(),
166                                    bool isIntermediateResult = false);
167     // extend as an additional parameter to judge cached
168     static void AddResultInCache(JSThread *thread, JSHandle<RegExpExecResultCache> cache,
169                                  const JSHandle<JSTaggedValue> &patten, const JSHandle<JSTaggedValue> &flags,
170                                  const JSHandle<JSTaggedValue> &input, const JSHandle<JSTaggedValue> &resultArray,
171                                  CacheType type, uint32_t lastIndexInput, uint32_t lastIndex,
172                                  JSTaggedValue extend = JSTaggedValue::Undefined(),
173                                  bool isIntermediateResult = false);
174 
175     static void GrowRegexpCache(JSThread *thread, JSHandle<RegExpExecResultCache> cache);
176 
177     void ClearEntry(JSThread *thread, int entry);
178     void SetEntry(JSThread *thread, int entry, JSTaggedValue &patten, JSTaggedValue &flags, JSTaggedValue &input,
179                   JSTaggedValue &lastIndexInputValue, JSTaggedValue &lastIndexValue, JSTaggedValue &extendValue);
180     void UpdateResultArray(JSThread *thread, int entry, JSTaggedValue resultArray, CacheType type);
181     bool Match(int entry, JSTaggedValue &pattenStr, JSTaggedValue &flagsStr, JSTaggedValue &inputStr,
182                JSTaggedValue &lastIndexInputValue, JSTaggedValue &extend, CacheType type);
SetHitCount(JSThread * thread,int hitCount)183     inline void SetHitCount(JSThread *thread, int hitCount)
184     {
185         Set(thread, CACHE_HIT_COUNT_INDEX, JSTaggedValue(hitCount));
186     }
187 
GetHitCount()188     inline int GetHitCount()
189     {
190         return Get(CACHE_HIT_COUNT_INDEX).GetInt();
191     }
192 
SetCacheCount(JSThread * thread,int hitCount)193     inline void SetCacheCount(JSThread *thread, int hitCount)
194     {
195         Set(thread, CACHE_COUNT_INDEX, JSTaggedValue(hitCount));
196     }
197 
GetCacheCount()198     inline int GetCacheCount()
199     {
200         return Get(CACHE_COUNT_INDEX).GetInt();
201     }
202 
Print()203     void Print()
204     {
205         std::cout << "cache count: " << GetCacheCount() << std::endl;
206         std::cout << "cache hit count: " << GetHitCount() << std::endl;
207     }
208 
SetLargeStrCount(JSThread * thread,uint32_t newCount)209     inline void SetLargeStrCount(JSThread *thread, uint32_t newCount)
210     {
211         Set(thread, LARGE_STRING_COUNT_INDEX, JSTaggedValue(newCount));
212     }
213 
SetConflictCount(JSThread * thread,uint32_t newCount)214     inline void SetConflictCount(JSThread *thread, uint32_t newCount)
215     {
216         Set(thread, CONFLICT_COUNT_INDEX, JSTaggedValue(newCount));
217     }
218 
SetStrLenThreshold(JSThread * thread,uint32_t newThreshold)219     inline void SetStrLenThreshold(JSThread *thread, uint32_t newThreshold)
220     {
221         Set(thread, STRING_LENGTH_THRESHOLD_INDEX, JSTaggedValue(newThreshold));
222     }
223 
GetLargeStrCount()224     inline uint32_t GetLargeStrCount()
225     {
226         return Get(LARGE_STRING_COUNT_INDEX).GetInt();
227     }
228 
GetConflictCount()229     inline uint32_t GetConflictCount()
230     {
231         return Get(CONFLICT_COUNT_INDEX).GetInt();
232     }
233 
GetStrLenThreshold()234     inline uint32_t GetStrLenThreshold()
235     {
236         return Get(STRING_LENGTH_THRESHOLD_INDEX).GetInt();
237     }
238 
SetCacheLength(JSThread * thread,int length)239     inline void SetCacheLength(JSThread *thread, int length)
240     {
241         Set(thread, CACHE_LENGTH_INDEX, JSTaggedValue(length));
242     }
243 
GetCacheLength()244     inline int GetCacheLength()
245     {
246         return Get(CACHE_LENGTH_INDEX).GetInt();
247     }
248 
249 private:
250     static constexpr int DEFAULT_LARGE_STRING_COUNT = 10;
251     static constexpr int DEFAULT_CONFLICT_COUNT = 100;
252     static constexpr int INITIAL_CACHE_NUMBER = 0x10;
253     static constexpr int DEFAULT_CACHE_NUMBER = 0x1000;
254     static constexpr int CACHE_COUNT_INDEX = 0;
255     static constexpr int CACHE_HIT_COUNT_INDEX = 1;
256     static constexpr int LARGE_STRING_COUNT_INDEX = 2;
257     static constexpr int CONFLICT_COUNT_INDEX = 3;
258     static constexpr int STRING_LENGTH_THRESHOLD_INDEX = 4;
259     static constexpr int CACHE_LENGTH_INDEX = 5;
260     static constexpr int CACHE_TABLE_HEADER_SIZE = 6;
261     static constexpr int PATTERN_INDEX = 0;
262     static constexpr int FLAG_INDEX = 1;
263     static constexpr int INPUT_STRING_INDEX = 2;
264     static constexpr int LAST_INDEX_INPUT_INDEX = 3;
265     static constexpr int LAST_INDEX_INDEX = 4;
266     static constexpr int RESULT_REPLACE_INDEX = 5;
267     static constexpr int RESULT_SPLIT_INDEX = 6;
268     static constexpr int RESULT_MATCH_INDEX = 7;
269     static constexpr int RESULT_EXEC_INDEX = 8;
270     static constexpr int RESULT_INTERMEDIATE_REPLACE_INDEX = 9;
271     static constexpr int RESULT_TEST_INDEX = 10;
272     // Extend index used for saving an additional parameter to judge cached
273     static constexpr int EXTEND_INDEX = 11;
274     static constexpr int ENTRY_SIZE = 12;
275 };
276 
277 class RegExpGlobalResult : public TaggedArray {
278 public:
Cast(TaggedObject * object)279     static RegExpGlobalResult *Cast(TaggedObject *object)
280     {
281         return reinterpret_cast<RegExpGlobalResult *>(object);
282     }
283     static JSTaggedValue CreateGlobalResultTable(JSThread *thread);
284 
SetCapture(JSThread * thread,int index,JSTaggedValue value)285     void SetCapture(JSThread *thread, int index, JSTaggedValue value)
286     {
287         ASSERT(CAPTURE_START_INDEX + index - 1 < GLOBAL_TABLE_SIZE);
288         Set(thread, CAPTURE_START_INDEX + index - 1, value);
289     }
290 
ResetDollar(JSThread * thread)291     void ResetDollar(JSThread *thread)
292     {
293         for (uint32_t i = 0; i < DOLLAR_NUMBER; i++) {
294             Set(thread, CAPTURE_START_INDEX + i, JSTaggedValue::Hole());
295         }
296     }
297 
298     template <int N>
GetCapture(JSThread * thread)299     static JSTaggedValue GetCapture(JSThread *thread)
300     {
301         JSHandle<builtins::RegExpGlobalResult> globalTable(thread->GetCurrentEcmaContext()->GetRegExpGlobalResult());
302         JSTaggedValue res = globalTable->Get(CAPTURE_START_INDEX + N - 1);
303         int captureNum = globalTable->GetTotalCaptureCounts().GetInt();
304         if (res.IsHole() && (N < captureNum)) {
305             uint32_t startIndex = static_cast<uint32_t>(globalTable->GetStartOfCaptureIndex(N).GetInt());
306             uint32_t endIndex = static_cast<uint32_t>(globalTable->GetEndOfCaptureIndex(N).GetInt());
307             uint32_t len = endIndex - startIndex;
308             if (len < 0) {
309                 res = JSTaggedValue::Undefined();
310             } else {
311                 res = JSTaggedValue(EcmaStringAccessor::FastSubString(thread->GetEcmaVM(),
312                     JSHandle<EcmaString>(thread, EcmaString::Cast(globalTable->GetInputString())), startIndex, len));
313             }
314             globalTable->Set(thread, CAPTURE_START_INDEX + N - 1, res);
315         } else if (res.IsHole()) {
316             res = thread->GetEcmaVM()->GetFactory()->GetEmptyString().GetTaggedValue();
317             globalTable->Set(thread, CAPTURE_START_INDEX + N - 1, res);
318         }
319         return res;
320     }
321 
SetTotalCaptureCounts(JSThread * thread,JSTaggedValue counts)322     void SetTotalCaptureCounts(JSThread *thread, JSTaggedValue counts)
323     {
324         Set(thread, TOTAL_CAPTURE_COUNTS_INDEX, counts);
325     }
326 
GetTotalCaptureCounts()327     JSTaggedValue GetTotalCaptureCounts()
328     {
329         return Get(TOTAL_CAPTURE_COUNTS_INDEX);
330     }
331 
SetEndIndex(JSThread * thread,JSTaggedValue endIndex)332     void SetEndIndex(JSThread *thread, JSTaggedValue endIndex)
333     {
334         Set(thread, END_INDEX, endIndex);
335     }
336 
GetEndIndex()337     JSTaggedValue GetEndIndex()
338     {
339         return Get(END_INDEX);
340     }
341 
SetInputString(JSThread * thread,JSTaggedValue string)342     void SetInputString(JSThread *thread, JSTaggedValue string)
343     {
344         Set(thread, INPUT_STRING_INDEX, string);
345     }
346 
GetInputString()347     JSTaggedValue GetInputString()
348     {
349         return Get(INPUT_STRING_INDEX);
350     }
351 
SetStartOfCaptureIndex(JSThread * thread,uint32_t index,JSTaggedValue value)352     void SetStartOfCaptureIndex(JSThread *thread, uint32_t index, JSTaggedValue value)
353     {
354         Set(thread, FIRST_CAPTURE_INDEX + index * 2, value); // 2 : double
355     }
356 
SetEndOfCaptureIndex(JSThread * thread,uint32_t index,JSTaggedValue value)357     void SetEndOfCaptureIndex(JSThread *thread, uint32_t index, JSTaggedValue value)
358     {
359         Set(thread, FIRST_CAPTURE_INDEX + index * 2 + 1, value); // 2 : double
360     }
361 
GetStartOfCaptureIndex(uint32_t index)362     JSTaggedValue GetStartOfCaptureIndex(uint32_t index)
363     {
364         return Get(FIRST_CAPTURE_INDEX + index * 2); // 2 : double
365     }
366 
GetEndOfCaptureIndex(uint32_t index)367     JSTaggedValue GetEndOfCaptureIndex(uint32_t index)
368     {
369         return Get(FIRST_CAPTURE_INDEX + index * 2 + 1); // 2 : double
370     }
371 
372     static JSHandle<RegExpGlobalResult> GrowCapturesCapacity(JSThread *thread,
373         JSHandle<RegExpGlobalResult>result, uint32_t length);
374 
375     static constexpr int FIRST_CAPTURE_INDEX = 12;  // capture index starts here
376 
377 private:
378     static constexpr int GLOBAL_TABLE_SIZE = 12; // initial length
379     static constexpr int DOLLAR_NUMBER = 9;
380     static constexpr int CAPTURE_START_INDEX = 0;
381 
382     static constexpr int TOTAL_CAPTURE_COUNTS_INDEX = 9;  // save total capture size
383     static constexpr int INPUT_STRING_INDEX = 10; // save input string
384     static constexpr int END_INDEX = 11; // save last index
385     static constexpr int INITIAL_CAPTURE_INDICES = 18;  // length: pairs of capture start index and end index
386 };
387 }  // namespace panda::ecmascript::builtins
388 #endif  // ECMASCRIPT_BUILTINS_BUILTINS_REGEXP_H
389