• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef PANDA_RUNTIME_REGEXP_OPCODE_H
17 #define PANDA_RUNTIME_REGEXP_OPCODE_H
18 
19 #include <list>
20 
21 #include "runtime/regexp/ecmascript/mem/dyn_chunk.h"
22 
23 namespace ark {
24 class RegExpOpCode {
25 public:
26     enum : uint8_t {
27         OP_SAVE_START = 0U,
28         OP_SAVE_END,
29         OP_CHAR,
30         OP_GOTO,
31         OP_SPLIT_FIRST,
32         OP_SPLIT_NEXT,
33         OP_MATCH_AHEAD,
34         OP_NEGATIVE_MATCH_AHEAD,
35         OP_MATCH,
36         OP_LOOP,
37         OP_LOOP_GREEDY,
38         OP_PUSH_CHAR,
39         OP_CHECK_CHAR,
40         OP_PUSH,
41         OP_POP,
42         OP_SAVE_RESET,
43         OP_LINE_START,
44         OP_LINE_END,
45         OP_WORD_BOUNDARY,
46         OP_NOT_WORD_BOUNDARY,
47         OP_ALL,
48         OP_DOTS,
49         OP_MATCH_END,
50         OP_PREV,
51         OP_RANGE,
52         OP_BACKREFERENCE,
53         OP_BACKWARD_BACKREFERENCE,
54         OP_CHAR32,
55         OP_RANGE32,
56         OP_INVALID,
57     };
58 
59     static constexpr size_t OP_SIZE_ONE = 1;
60     static constexpr size_t OP_SIZE_TWO = 2;
61     static constexpr size_t OP_SIZE_THREE = 3;
62     static constexpr size_t OP_SIZE_FOUR = 4;
63     static constexpr size_t OP_SIZE_FIVE = 5;
64     static constexpr size_t OP_SIZE_EIGHT = 8;
65     static constexpr size_t OP_SIZE_NINE = 9;
66     static constexpr size_t OP_SIZE_THIRTEEN = 13;
67 
68     RegExpOpCode(uint8_t opCode, int size);
69     NO_COPY_SEMANTIC(RegExpOpCode);
70     NO_MOVE_SEMANTIC(RegExpOpCode);
71 
72     virtual ~RegExpOpCode() = default;
73     static RegExpOpCode *GetRegExpOpCode(const DynChunk &buf, int pcOffset);
74     static RegExpOpCode *GetRegExpOpCode(uint8_t opCode);
75     static void DumpRegExpOpCode(std::ostream &out, const DynChunk &buf);
GetSize()76     inline uint8_t GetSize() const
77     {
78         return size_;
79     }
GetOpCode()80     inline uint8_t GetOpCode() const
81     {
82         return opCode_;
83     }
GetDynChunkfSize(const DynChunk & buf)84     inline int GetDynChunkfSize(const DynChunk &buf) const
85     {
86         return buf.size_;
87     }
88     virtual uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const = 0;
89 
90 private:
91     uint8_t opCode_ {0};
92     uint8_t size_ {0};
93 };
94 
95 class SaveStartOpCode : public RegExpOpCode {
96 public:
SaveStartOpCode()97     SaveStartOpCode() : RegExpOpCode(OP_SAVE_START, RegExpOpCode::OP_SIZE_TWO) {}
98     uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const;
99     ~SaveStartOpCode() override = default;
100     NO_COPY_SEMANTIC(SaveStartOpCode);
101     NO_MOVE_SEMANTIC(SaveStartOpCode);
102     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
103 };
104 
105 class SaveEndOpCode : public RegExpOpCode {
106 public:
SaveEndOpCode()107     SaveEndOpCode() : RegExpOpCode(OP_SAVE_END, RegExpOpCode::OP_SIZE_TWO) {}
108     uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const;
109     ~SaveEndOpCode() override = default;
110     NO_COPY_SEMANTIC(SaveEndOpCode);
111     NO_MOVE_SEMANTIC(SaveEndOpCode);
112     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
113 };
114 
115 class CharOpCode : public RegExpOpCode {
116 public:
CharOpCode()117     CharOpCode() : RegExpOpCode(OP_CHAR, RegExpOpCode::OP_SIZE_THREE) {}
118     uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const;
119     ~CharOpCode() override = default;
120     NO_COPY_SEMANTIC(CharOpCode);
121     NO_MOVE_SEMANTIC(CharOpCode);
122     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
123 };
124 
125 class GotoOpCode : public RegExpOpCode {
126 public:
GotoOpCode()127     GotoOpCode() : RegExpOpCode(OP_GOTO, RegExpOpCode::OP_SIZE_FIVE) {}
128     uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const;
129     void UpdateOpPara(DynChunk *buf, uint32_t offset, uint32_t para) const;
130     ~GotoOpCode() override = default;
131     NO_COPY_SEMANTIC(GotoOpCode);
132     NO_MOVE_SEMANTIC(GotoOpCode);
133     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
134 };
135 
136 class SplitNextOpCode : public RegExpOpCode {
137 public:
SplitNextOpCode()138     SplitNextOpCode() : RegExpOpCode(OP_SPLIT_NEXT, RegExpOpCode::OP_SIZE_FIVE) {}
139     uint32_t InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const;
140     ~SplitNextOpCode() override = default;
141     NO_COPY_SEMANTIC(SplitNextOpCode);
142     NO_MOVE_SEMANTIC(SplitNextOpCode);
143     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
144 };
145 
146 class SplitFirstOpCode : public RegExpOpCode {
147 public:
SplitFirstOpCode()148     SplitFirstOpCode() : RegExpOpCode(OP_SPLIT_FIRST, RegExpOpCode::OP_SIZE_FIVE) {}
149     uint32_t InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const;
150     ~SplitFirstOpCode() override = default;
151     NO_COPY_SEMANTIC(SplitFirstOpCode);
152     NO_MOVE_SEMANTIC(SplitFirstOpCode);
153     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
154 };
155 
156 class PushOpCode : public RegExpOpCode {
157 public:
PushOpCode()158     PushOpCode() : RegExpOpCode(OP_PUSH, RegExpOpCode::OP_SIZE_ONE) {}
159     uint32_t InsertOpCode(DynChunk *buf, uint32_t offset) const;
160     ~PushOpCode() override = default;
161     NO_COPY_SEMANTIC(PushOpCode);
162     NO_MOVE_SEMANTIC(PushOpCode);
163     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
164 };
165 
166 class PopOpCode : public RegExpOpCode {
167 public:
PopOpCode()168     PopOpCode() : RegExpOpCode(OP_POP, RegExpOpCode::OP_SIZE_ONE) {}
169     uint32_t EmitOpCode(DynChunk *buf) const;
170     ~PopOpCode() override = default;
171     NO_COPY_SEMANTIC(PopOpCode);
172     NO_MOVE_SEMANTIC(PopOpCode);
173     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
174 };
175 
176 class PushCharOpCode : public RegExpOpCode {
177 public:
PushCharOpCode()178     PushCharOpCode() : RegExpOpCode(OP_PUSH_CHAR, RegExpOpCode::OP_SIZE_ONE) {}
179     uint32_t InsertOpCode(DynChunk *buf, uint32_t offset) const;
180     ~PushCharOpCode() override = default;
181     NO_COPY_SEMANTIC(PushCharOpCode);
182     NO_MOVE_SEMANTIC(PushCharOpCode);
183     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
184 };
185 
186 class CheckCharOpCode : public RegExpOpCode {
187 public:
CheckCharOpCode()188     CheckCharOpCode() : RegExpOpCode(OP_CHECK_CHAR, RegExpOpCode::OP_SIZE_FIVE) {}
189     uint32_t EmitOpCode(DynChunk *buf, uint32_t offset) const;
190     ~CheckCharOpCode() override = default;
191     NO_COPY_SEMANTIC(CheckCharOpCode);
192     NO_MOVE_SEMANTIC(CheckCharOpCode);
193     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
194 };
195 
196 class LoopOpCode : public RegExpOpCode {
197 public:
LoopOpCode()198     LoopOpCode() : RegExpOpCode(OP_LOOP, RegExpOpCode::OP_SIZE_THIRTEEN) {}
199     uint32_t EmitOpCode(DynChunk *buf, uint32_t start, uint32_t min, uint32_t max) const;
200     ~LoopOpCode() override = default;
201     NO_COPY_SEMANTIC(LoopOpCode);
202     NO_MOVE_SEMANTIC(LoopOpCode);
203     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
204 };
205 
206 class LoopGreedyOpCode : public RegExpOpCode {
207 public:
LoopGreedyOpCode()208     LoopGreedyOpCode() : RegExpOpCode(OP_LOOP_GREEDY, RegExpOpCode::OP_SIZE_THIRTEEN) {}
209     uint32_t EmitOpCode(DynChunk *buf, uint32_t start, uint32_t min, uint32_t max) const;
210     ~LoopGreedyOpCode() override = default;
211     NO_COPY_SEMANTIC(LoopGreedyOpCode);
212     NO_MOVE_SEMANTIC(LoopGreedyOpCode);
213     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
214 };
215 
216 class SaveResetOpCode : public RegExpOpCode {
217 public:
SaveResetOpCode()218     SaveResetOpCode() : RegExpOpCode(OP_SAVE_RESET, RegExpOpCode::OP_SIZE_THREE) {}
219     uint32_t InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t start, uint32_t end) const;
220     ~SaveResetOpCode() override = default;
221     NO_COPY_SEMANTIC(SaveResetOpCode);
222     NO_MOVE_SEMANTIC(SaveResetOpCode);
223     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
224 };
225 
226 class MatchOpCode : public RegExpOpCode {
227 public:
MatchOpCode()228     MatchOpCode() : RegExpOpCode(OP_MATCH, RegExpOpCode::OP_SIZE_ONE) {}
229     uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const;
230     ~MatchOpCode() override = default;
231     NO_COPY_SEMANTIC(MatchOpCode);
232     NO_MOVE_SEMANTIC(MatchOpCode);
233     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
234 };
235 
236 class MatchEndOpCode : public RegExpOpCode {
237 public:
MatchEndOpCode()238     MatchEndOpCode() : RegExpOpCode(OP_MATCH_END, RegExpOpCode::OP_SIZE_ONE) {}
239     uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const;
240     ~MatchEndOpCode() override = default;
241     NO_COPY_SEMANTIC(MatchEndOpCode);
242     NO_MOVE_SEMANTIC(MatchEndOpCode);
243     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
244 };
245 
246 class LineStartOpCode : public RegExpOpCode {
247 public:
LineStartOpCode()248     LineStartOpCode() : RegExpOpCode(OP_LINE_START, RegExpOpCode::OP_SIZE_ONE) {}
249     uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const;
250     ~LineStartOpCode() override = default;
251     NO_COPY_SEMANTIC(LineStartOpCode);
252     NO_MOVE_SEMANTIC(LineStartOpCode);
253     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
254 };
255 
256 class LineEndOpCode : public RegExpOpCode {
257 public:
LineEndOpCode()258     LineEndOpCode() : RegExpOpCode(OP_LINE_END, RegExpOpCode::OP_SIZE_ONE) {}
259     uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const;
260     ~LineEndOpCode() override = default;
261     NO_COPY_SEMANTIC(LineEndOpCode);
262     NO_MOVE_SEMANTIC(LineEndOpCode);
263     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
264 };
265 
266 class WordBoundaryOpCode : public RegExpOpCode {
267 public:
WordBoundaryOpCode()268     WordBoundaryOpCode() : RegExpOpCode(OP_WORD_BOUNDARY, RegExpOpCode::OP_SIZE_ONE) {}
269     uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const;
270     ~WordBoundaryOpCode() override = default;
271     NO_COPY_SEMANTIC(WordBoundaryOpCode);
272     NO_MOVE_SEMANTIC(WordBoundaryOpCode);
273     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
274 };
275 
276 class NotWordBoundaryOpCode : public RegExpOpCode {
277 public:
NotWordBoundaryOpCode()278     NotWordBoundaryOpCode() : RegExpOpCode(OP_NOT_WORD_BOUNDARY, RegExpOpCode::OP_SIZE_ONE) {}
279     uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const;
280     ~NotWordBoundaryOpCode() override = default;
281     NO_COPY_SEMANTIC(NotWordBoundaryOpCode);
282     NO_MOVE_SEMANTIC(NotWordBoundaryOpCode);
283     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
284 };
285 
286 class AllOpCode : public RegExpOpCode {
287 public:
AllOpCode()288     AllOpCode() : RegExpOpCode(OP_ALL, RegExpOpCode::OP_SIZE_ONE) {}
289     uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const;
290     ~AllOpCode() override = default;
291     NO_COPY_SEMANTIC(AllOpCode);
292     NO_MOVE_SEMANTIC(AllOpCode);
293     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
294 };
295 
296 class DotsOpCode : public RegExpOpCode {
297 public:
DotsOpCode()298     DotsOpCode() : RegExpOpCode(OP_DOTS, RegExpOpCode::OP_SIZE_ONE) {}
299     uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const;
300     ~DotsOpCode() override = default;
301     NO_COPY_SEMANTIC(DotsOpCode);
302     NO_MOVE_SEMANTIC(DotsOpCode);
303     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
304 };
305 
306 class RangeSet {
307 public:
308     RangeSet() = default;
RangeSet(uint32_t value)309     explicit RangeSet(uint32_t value)
310     {
311         Insert(value, value);
312     }
RangeSet(uint32_t start,uint32_t end)313     explicit RangeSet(uint32_t start, uint32_t end)
314     {
315         Insert(start, end);
316     }
RangeSet(const std::list<std::pair<uint32_t,uint32_t>> & rangeSet)317     explicit RangeSet(const std::list<std::pair<uint32_t, uint32_t>> &rangeSet)
318     {
319         rangeSet_ = rangeSet;
320     }
321     ~RangeSet() = default;
322 
IsIntersect(uint64_t start,uint64_t end,uint64_t start1,uint64_t end1)323     inline bool IsIntersect(uint64_t start, uint64_t end, uint64_t start1, uint64_t end1) const
324     {
325         return ((start1 > start) && (start1 < end)) || ((start > start1) && (start < end1));
326     }
IsAdjacent(uint64_t start,uint64_t end,uint64_t start1,uint64_t end1)327     inline bool IsAdjacent(uint64_t start, uint64_t end, uint64_t start1, uint64_t end1) const
328     {
329         return ((end == start1 || (end + 1) == start1)) || ((end1 == start) || (end1 + 1 == start));
330     }
331 
332     inline bool operator==(const RangeSet &other) const
333     {
334         return rangeSet_ == other.rangeSet_;
335     }
336 
IsContain(uint32_t value)337     inline bool IsContain(uint32_t value) const
338     {
339         for (auto range : rangeSet_) {
340             if (value >= range.first && value <= range.second) {
341                 return true;
342             }
343         }
344         return false;
345     }
HighestValue()346     inline uint32_t HighestValue() const
347     {
348         if (!rangeSet_.empty()) {
349             return rangeSet_.back().second;
350         }
351         return 0;
352     }
353     RangeSet(RangeSet const &) = default;
354     RangeSet &operator=(RangeSet const &) = default;
355     RangeSet(RangeSet &&) = default;
356     RangeSet &operator=(RangeSet &&) = default;
357 
358     void Insert(uint32_t start, uint32_t end);
359     void Insert(const RangeSet &s1);
360     void Invert(bool isUtf16);
361     void Compress();
362 
363 private:
364     friend class RangeOpCode;
365     friend class Range32OpCode;
366     std::list<std::pair<uint32_t, uint32_t>> rangeSet_ {};
367 };
368 
369 class RangeOpCode : public RegExpOpCode {
370 public:
RangeOpCode()371     RangeOpCode() : RegExpOpCode(OP_RANGE, RegExpOpCode::OP_SIZE_ONE) {}
372     ~RangeOpCode() override = default;
373     NO_COPY_SEMANTIC(RangeOpCode);
374     NO_MOVE_SEMANTIC(RangeOpCode);
375     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
376     uint32_t InsertOpCode(DynChunk *buf, const RangeSet &rangeSet) const;
377 };
378 
379 class MatchAheadOpCode : public RegExpOpCode {
380 public:
MatchAheadOpCode()381     MatchAheadOpCode() : RegExpOpCode(OP_MATCH_AHEAD, RegExpOpCode::OP_SIZE_FIVE) {}
382     ~MatchAheadOpCode() override = default;
383     NO_COPY_SEMANTIC(MatchAheadOpCode);
384     NO_MOVE_SEMANTIC(MatchAheadOpCode);
385     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
386     uint32_t InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const;
387 };
388 
389 class NegativeMatchAheadOpCode : public RegExpOpCode {
390 public:
NegativeMatchAheadOpCode()391     NegativeMatchAheadOpCode() : RegExpOpCode(OP_NEGATIVE_MATCH_AHEAD, RegExpOpCode::OP_SIZE_FIVE) {}
392     uint32_t InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const;
393     ~NegativeMatchAheadOpCode() override = default;
394     NO_COPY_SEMANTIC(NegativeMatchAheadOpCode);
395     NO_MOVE_SEMANTIC(NegativeMatchAheadOpCode);
396     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
397 };
398 
399 class PrevOpCode : public RegExpOpCode {
400 public:
PrevOpCode()401     PrevOpCode() : RegExpOpCode(OP_PREV, RegExpOpCode::OP_SIZE_ONE) {}
402     uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const;
403     ~PrevOpCode() override = default;
404     NO_COPY_SEMANTIC(PrevOpCode);
405     NO_MOVE_SEMANTIC(PrevOpCode);
406     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
407 };
408 
409 class BackReferenceOpCode : public RegExpOpCode {
410 public:
BackReferenceOpCode()411     BackReferenceOpCode() : RegExpOpCode(OP_BACKREFERENCE, RegExpOpCode::OP_SIZE_TWO) {}
412     uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const;
413     ~BackReferenceOpCode() override = default;
414     NO_COPY_SEMANTIC(BackReferenceOpCode);
415     NO_MOVE_SEMANTIC(BackReferenceOpCode);
416     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
417 };
418 
419 class BackwardBackReferenceOpCode : public RegExpOpCode {
420 public:
BackwardBackReferenceOpCode()421     BackwardBackReferenceOpCode() : RegExpOpCode(OP_BACKWARD_BACKREFERENCE, RegExpOpCode::OP_SIZE_TWO) {}
422     uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const;
423     ~BackwardBackReferenceOpCode() override = default;
424     NO_COPY_SEMANTIC(BackwardBackReferenceOpCode);
425     NO_MOVE_SEMANTIC(BackwardBackReferenceOpCode);
426     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
427 };
428 
429 class Char32OpCode : public RegExpOpCode {
430 public:
Char32OpCode()431     Char32OpCode() : RegExpOpCode(OP_CHAR32, RegExpOpCode::OP_SIZE_FIVE) {}
432     uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const;
433     ~Char32OpCode() override = default;
434     NO_COPY_SEMANTIC(Char32OpCode);
435     NO_MOVE_SEMANTIC(Char32OpCode);
436     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
437 };
438 
439 class Range32OpCode : public RegExpOpCode {
440 public:
Range32OpCode()441     Range32OpCode() : RegExpOpCode(OP_RANGE32, RegExpOpCode::OP_SIZE_ONE) {}
442     ~Range32OpCode() override = default;
443     NO_COPY_SEMANTIC(Range32OpCode);
444     NO_MOVE_SEMANTIC(Range32OpCode);
445     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
446     uint32_t InsertOpCode(DynChunk *buf, const RangeSet &rangeSet) const;
447 };
448 }  // namespace ark
449 #endif
450