• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_REGEXP_REGEXP_H_
6 #define V8_REGEXP_REGEXP_H_
7 
8 #include "src/common/assert-scope.h"
9 #include "src/handles/handles.h"
10 #include "src/regexp/regexp-error.h"
11 #include "src/regexp/regexp-flags.h"
12 #include "src/zone/zone-containers.h"
13 
14 namespace v8 {
15 namespace internal {
16 
17 class JSRegExp;
18 class RegExpCapture;
19 class RegExpMatchInfo;
20 class RegExpNode;
21 class RegExpTree;
22 
23 enum class RegExpCompilationTarget : int { kBytecode, kNative };
24 
25 // TODO(jgruber): Do not expose in regexp.h.
26 // TODO(jgruber): Consider splitting between ParseData and CompileData.
27 struct RegExpCompileData {
28   // The parsed AST as produced by the RegExpParser.
29   RegExpTree* tree = nullptr;
30 
31   // The compiled Node graph as produced by RegExpTree::ToNode methods.
32   RegExpNode* node = nullptr;
33 
34   // Either the generated code as produced by the compiler or a trampoline
35   // to the interpreter.
36   Handle<Object> code;
37 
38   // True, iff the pattern is a 'simple' atom with zero captures. In other
39   // words, the pattern consists of a string with no metacharacters and special
40   // regexp features, and can be implemented as a standard string search.
41   bool simple = true;
42 
43   // True, iff the pattern is anchored at the start of the string with '^'.
44   bool contains_anchor = false;
45 
46   // Only set if the pattern contains named captures.
47   // Note: the lifetime equals that of the parse/compile zone.
48   ZoneVector<RegExpCapture*>* named_captures = nullptr;
49 
50   // The error message. Only used if an error occurred during parsing or
51   // compilation.
52   RegExpError error = RegExpError::kNone;
53 
54   // The position at which the error was detected. Only used if an
55   // error occurred.
56   int error_pos = 0;
57 
58   // The number of capture groups, without the global capture \0.
59   int capture_count = 0;
60 
61   // The number of registers used by the generated code.
62   int register_count = 0;
63 
64   // The compilation target (bytecode or native code).
65   RegExpCompilationTarget compilation_target;
66 };
67 
68 class RegExp final : public AllStatic {
69  public:
70   // Whether the irregexp engine generates interpreter bytecode.
71   static bool CanGenerateBytecode();
72 
73   // Verify the given pattern, i.e. check that parsing succeeds. If
74   // verification fails, `regexp_error_out` is set.
75   template <class CharT>
76   static bool VerifySyntax(Zone* zone, uintptr_t stack_limit,
77                            const CharT* input, int input_length,
78                            RegExpFlags flags, RegExpError* regexp_error_out,
79                            const DisallowGarbageCollection& no_gc);
80 
81   // Parses the RegExp pattern and prepares the JSRegExp object with
82   // generic data and choice of implementation - as well as what
83   // the implementation wants to store in the data field.
84   // Returns false if compilation fails.
85   V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Compile(
86       Isolate* isolate, Handle<JSRegExp> re, Handle<String> pattern,
87       RegExpFlags flags, uint32_t backtrack_limit);
88 
89   // Ensures that a regexp is fully compiled and ready to be executed on a
90   // subject string.  Returns true on success. Return false on failure, and
91   // then an exception will be pending.
92   V8_WARN_UNUSED_RESULT static bool EnsureFullyCompiled(Isolate* isolate,
93                                                         Handle<JSRegExp> re,
94                                                         Handle<String> subject);
95 
96   enum CallOrigin : int {
97     kFromRuntime = 0,
98     kFromJs = 1,
99   };
100 
101   enum class ExecQuirks {
102     kNone,
103     // Used to work around an issue in the RegExpPrototypeSplit fast path,
104     // which diverges from the spec by not creating a sticky copy of the RegExp
105     // instance and calling `exec` in a loop. If called in this context, we
106     // must not update the last_match_info on a successful match at the subject
107     // string end. See crbug.com/1075514 for more information.
108     kTreatMatchAtEndAsFailure,
109   };
110 
111   // See ECMA-262 section 15.10.6.2.
112   // This function calls the garbage collector if necessary.
113   V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Exec(
114       Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
115       int index, Handle<RegExpMatchInfo> last_match_info,
116       ExecQuirks exec_quirks = ExecQuirks::kNone);
117 
118   V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<Object>
119   ExperimentalOneshotExec(Isolate* isolate, Handle<JSRegExp> regexp,
120                           Handle<String> subject, int index,
121                           Handle<RegExpMatchInfo> last_match_info,
122                           ExecQuirks exec_quirks = ExecQuirks::kNone);
123 
124   // Integral return values used throughout regexp code layers.
125   static constexpr int kInternalRegExpFailure = 0;
126   static constexpr int kInternalRegExpSuccess = 1;
127   static constexpr int kInternalRegExpException = -1;
128   static constexpr int kInternalRegExpRetry = -2;
129   static constexpr int kInternalRegExpFallbackToExperimental = -3;
130   static constexpr int kInternalRegExpSmallestResult = -3;
131 
132   enum IrregexpResult : int32_t {
133     RE_FAILURE = kInternalRegExpFailure,
134     RE_SUCCESS = kInternalRegExpSuccess,
135     RE_EXCEPTION = kInternalRegExpException,
136     RE_RETRY = kInternalRegExpRetry,
137     RE_FALLBACK_TO_EXPERIMENTAL = kInternalRegExpFallbackToExperimental,
138   };
139 
140   // Set last match info.  If match is nullptr, then setting captures is
141   // omitted.
142   static Handle<RegExpMatchInfo> SetLastMatchInfo(
143       Isolate* isolate, Handle<RegExpMatchInfo> last_match_info,
144       Handle<String> subject, int capture_count, int32_t* match);
145 
146   V8_EXPORT_PRIVATE static bool CompileForTesting(
147       Isolate* isolate, Zone* zone, RegExpCompileData* input, RegExpFlags flags,
148       Handle<String> pattern, Handle<String> sample_subject, bool is_one_byte);
149 
150   V8_EXPORT_PRIVATE static void DotPrintForTesting(const char* label,
151                                                    RegExpNode* node);
152 
153   static const int kRegExpTooLargeToOptimize = 20 * KB;
154 
155   V8_WARN_UNUSED_RESULT
156   static MaybeHandle<Object> ThrowRegExpException(Isolate* isolate,
157                                                   Handle<JSRegExp> re,
158                                                   Handle<String> pattern,
159                                                   RegExpError error);
160   static void ThrowRegExpException(Isolate* isolate, Handle<JSRegExp> re,
161                                    RegExpError error_text);
162 
163   static bool IsUnmodifiedRegExp(Isolate* isolate, Handle<JSRegExp> regexp);
164 
165   static Handle<FixedArray> CreateCaptureNameMap(
166       Isolate* isolate, ZoneVector<RegExpCapture*>* named_captures);
167 };
168 
169 // Uses a special global mode of irregexp-generated code to perform a global
170 // search and return multiple results at once. As such, this is essentially an
171 // iterator over multiple results (retrieved batch-wise in advance).
172 class RegExpGlobalCache final {
173  public:
174   RegExpGlobalCache(Handle<JSRegExp> regexp, Handle<String> subject,
175                     Isolate* isolate);
176 
177   ~RegExpGlobalCache();
178 
179   // Fetch the next entry in the cache for global regexp match results.
180   // This does not set the last match info.  Upon failure, nullptr is
181   // returned. The cause can be checked with Result().  The previous result is
182   // still in available in memory when a failure happens.
183   int32_t* FetchNext();
184 
185   int32_t* LastSuccessfulMatch();
186 
HasException()187   bool HasException() { return num_matches_ < 0; }
188 
189  private:
190   int AdvanceZeroLength(int last_index);
191 
192   int num_matches_;
193   int max_matches_;
194   int current_match_index_;
195   int registers_per_match_;
196   // Pointer to the last set of captures.
197   int32_t* register_array_;
198   int register_array_size_;
199   Handle<JSRegExp> regexp_;
200   Handle<String> subject_;
201   Isolate* isolate_;
202 };
203 
204 // Caches results for specific regexp queries on the isolate. At the time of
205 // writing, this is used during global calls to RegExp.prototype.exec and
206 // @@split.
207 class RegExpResultsCache final : public AllStatic {
208  public:
209   enum ResultsCacheType { REGEXP_MULTIPLE_INDICES, STRING_SPLIT_SUBSTRINGS };
210 
211   // Attempt to retrieve a cached result.  On failure, 0 is returned as a Smi.
212   // On success, the returned result is guaranteed to be a COW-array.
213   static Object Lookup(Heap* heap, String key_string, Object key_pattern,
214                        FixedArray* last_match_out, ResultsCacheType type);
215   // Attempt to add value_array to the cache specified by type.  On success,
216   // value_array is turned into a COW-array.
217   static void Enter(Isolate* isolate, Handle<String> key_string,
218                     Handle<Object> key_pattern, Handle<FixedArray> value_array,
219                     Handle<FixedArray> last_match_cache, ResultsCacheType type);
220   static void Clear(FixedArray cache);
221 
222   static constexpr int kRegExpResultsCacheSize = 0x100;
223 
224  private:
225   static constexpr int kStringOffset = 0;
226   static constexpr int kPatternOffset = 1;
227   static constexpr int kArrayOffset = 2;
228   static constexpr int kLastMatchOffset = 3;
229   static constexpr int kArrayEntriesPerCacheEntry = 4;
230 };
231 
232 }  // namespace internal
233 }  // namespace v8
234 
235 #endif  // V8_REGEXP_REGEXP_H_
236