• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef ECMASCRIPT_REGEXP_OPCODE_H
17 #define ECMASCRIPT_REGEXP_OPCODE_H
18 
19 #include <list>
20 
21 #include "ecmascript/regexp/dyn_chunk.h"
22 
23 namespace panda {
24 namespace ecmascript {
25 class RegExpOpCode {
26 public:
27     enum : uint8_t {
28         OP_SAVE_START = 0U,
29         OP_SAVE_END,
30         OP_CHAR,
31         OP_GOTO,
32         OP_SPLIT_FIRST,
33         OP_SPLIT_NEXT,
34         OP_MATCH_AHEAD,
35         OP_NEGATIVE_MATCH_AHEAD,
36         OP_MATCH,
37         OP_LOOP,
38         OP_LOOP_GREEDY,
39         OP_PUSH_CHAR,
40         OP_CHECK_CHAR,
41         OP_PUSH,
42         OP_POP,
43         OP_SAVE_RESET,
44         OP_LINE_START,
45         OP_LINE_END,
46         OP_WORD_BOUNDARY,
47         OP_NOT_WORD_BOUNDARY,
48         OP_ALL,
49         OP_DOTS,
50         OP_MATCH_END,
51         OP_PREV,
52         OP_RANGE,
53         OP_BACKREFERENCE,
54         OP_BACKWARD_BACKREFERENCE,
55         OP_CHAR32,
56         OP_RANGE32,
57         OP_INVALID,
58     };
59 
60     static constexpr size_t OP_SIZE_ONE = 1;
61     static constexpr size_t OP_SIZE_TWO = 2;
62     static constexpr size_t OP_SIZE_THREE = 3;
63     static constexpr size_t OP_SIZE_FOUR = 4;
64     static constexpr size_t OP_SIZE_FIVE = 5;
65     static constexpr size_t OP_SIZE_EIGHT = 8;
66     static constexpr size_t OP_SIZE_NINE = 9;
67     static constexpr size_t OP_SIZE_THIRTEEN = 13;
68 
69     RegExpOpCode(uint8_t opCode, int size);
70     NO_COPY_SEMANTIC(RegExpOpCode);
71     NO_MOVE_SEMANTIC(RegExpOpCode);
72 
73     virtual ~RegExpOpCode() = default;
74     static RegExpOpCode *GetRegExpOpCode(const DynChunk &buf, int pcOffset);
75     static RegExpOpCode *GetRegExpOpCode(uint8_t opCode);
76     static void DumpRegExpOpCode(std::ostream &out, const DynChunk &buf);
GetSize()77     inline int GetSize() const
78     {
79         return size_;
80     }
GetOpCode()81     inline uint8_t GetOpCode() const
82     {
83         return opCode_;
84     }
GetDynChunkfSize(const DynChunk & buf)85     inline int GetDynChunkfSize(const DynChunk &buf) const
86     {
87         return buf.size_;
88     }
89     virtual uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const = 0;
90 
91 private:
92     uint8_t opCode_{0};
93     uint8_t size_{0};
94 };
95 
96 class SaveStartOpCode : public RegExpOpCode {
97 public:
SaveStartOpCode()98     SaveStartOpCode() : RegExpOpCode(OP_SAVE_START, RegExpOpCode::OP_SIZE_TWO) {}
99     uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const;
100     ~SaveStartOpCode() override = default;
101     NO_COPY_SEMANTIC(SaveStartOpCode);
102     NO_MOVE_SEMANTIC(SaveStartOpCode);
103     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
104 };
105 
106 class SaveEndOpCode : public RegExpOpCode {
107 public:
SaveEndOpCode()108     SaveEndOpCode() : RegExpOpCode(OP_SAVE_END, RegExpOpCode::OP_SIZE_TWO) {}
109     uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const;
110     ~SaveEndOpCode() override = default;
111     NO_COPY_SEMANTIC(SaveEndOpCode);
112     NO_MOVE_SEMANTIC(SaveEndOpCode);
113     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
114 };
115 
116 class CharOpCode : public RegExpOpCode {
117 public:
CharOpCode()118     CharOpCode() : RegExpOpCode(OP_CHAR, RegExpOpCode::OP_SIZE_THREE) {}
119     uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const;
120     ~CharOpCode() override = default;
121     NO_COPY_SEMANTIC(CharOpCode);
122     NO_MOVE_SEMANTIC(CharOpCode);
123     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
124 };
125 
126 class GotoOpCode : public RegExpOpCode {
127 public:
GotoOpCode()128     GotoOpCode() : RegExpOpCode(OP_GOTO, RegExpOpCode::OP_SIZE_FIVE) {}
129     uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const;
130     void UpdateOpPara(DynChunk *buf, uint32_t offset, uint32_t para) const;
131     ~GotoOpCode() override = default;
132     NO_COPY_SEMANTIC(GotoOpCode);
133     NO_MOVE_SEMANTIC(GotoOpCode);
134     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
135 };
136 
137 class SplitNextOpCode : public RegExpOpCode {
138 public:
SplitNextOpCode()139     SplitNextOpCode() : RegExpOpCode(OP_SPLIT_NEXT, RegExpOpCode::OP_SIZE_FIVE) {}
140     uint32_t InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const;
141     ~SplitNextOpCode() override = default;
142     NO_COPY_SEMANTIC(SplitNextOpCode);
143     NO_MOVE_SEMANTIC(SplitNextOpCode);
144     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
145 };
146 
147 class SplitFirstOpCode : public RegExpOpCode {
148 public:
SplitFirstOpCode()149     SplitFirstOpCode() : RegExpOpCode(OP_SPLIT_FIRST, RegExpOpCode::OP_SIZE_FIVE) {}
150     uint32_t InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const;
151     ~SplitFirstOpCode() override = default;
152     NO_COPY_SEMANTIC(SplitFirstOpCode);
153     NO_MOVE_SEMANTIC(SplitFirstOpCode);
154     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
155 };
156 
157 class PushOpCode : public RegExpOpCode {
158 public:
PushOpCode()159     PushOpCode() : RegExpOpCode(OP_PUSH, RegExpOpCode::OP_SIZE_ONE) {}
160     uint32_t InsertOpCode(DynChunk *buf, uint32_t offset) const;
161     ~PushOpCode() override = default;
162     NO_COPY_SEMANTIC(PushOpCode);
163     NO_MOVE_SEMANTIC(PushOpCode);
164     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
165 };
166 
167 class PopOpCode : public RegExpOpCode {
168 public:
PopOpCode()169     PopOpCode() : RegExpOpCode(OP_POP, RegExpOpCode::OP_SIZE_ONE) {}
170     uint32_t EmitOpCode(DynChunk *buf) const;
171     ~PopOpCode() override = default;
172     NO_COPY_SEMANTIC(PopOpCode);
173     NO_MOVE_SEMANTIC(PopOpCode);
174     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
175 };
176 
177 class PushCharOpCode : public RegExpOpCode {
178 public:
PushCharOpCode()179     PushCharOpCode() : RegExpOpCode(OP_PUSH_CHAR, RegExpOpCode::OP_SIZE_ONE) {}
180     uint32_t InsertOpCode(DynChunk *buf, uint32_t offset) const;
181     ~PushCharOpCode() override = default;
182     NO_COPY_SEMANTIC(PushCharOpCode);
183     NO_MOVE_SEMANTIC(PushCharOpCode);
184     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
185 };
186 
187 class CheckCharOpCode : public RegExpOpCode {
188 public:
CheckCharOpCode()189     CheckCharOpCode() : RegExpOpCode(OP_CHECK_CHAR, RegExpOpCode::OP_SIZE_FIVE) {}
190     uint32_t EmitOpCode(DynChunk *buf, uint32_t offset) const;
191     ~CheckCharOpCode() override = default;
192     NO_COPY_SEMANTIC(CheckCharOpCode);
193     NO_MOVE_SEMANTIC(CheckCharOpCode);
194     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
195 };
196 
197 class LoopOpCode : public RegExpOpCode {
198 public:
LoopOpCode()199     LoopOpCode() : RegExpOpCode(OP_LOOP, RegExpOpCode::OP_SIZE_THIRTEEN) {}
200     uint32_t EmitOpCode(DynChunk *buf, uint32_t start, uint32_t min, uint32_t max) const;
201     ~LoopOpCode() override = default;
202     NO_COPY_SEMANTIC(LoopOpCode);
203     NO_MOVE_SEMANTIC(LoopOpCode);
204     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
205 };
206 
207 class LoopGreedyOpCode : public RegExpOpCode {
208 public:
LoopGreedyOpCode()209     LoopGreedyOpCode() : RegExpOpCode(OP_LOOP_GREEDY, RegExpOpCode::OP_SIZE_THIRTEEN) {}
210     uint32_t EmitOpCode(DynChunk *buf, uint32_t start, uint32_t min, uint32_t max) const;
211     ~LoopGreedyOpCode() override = default;
212     NO_COPY_SEMANTIC(LoopGreedyOpCode);
213     NO_MOVE_SEMANTIC(LoopGreedyOpCode);
214     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
215 };
216 
217 class SaveResetOpCode : public RegExpOpCode {
218 public:
SaveResetOpCode()219     SaveResetOpCode() : RegExpOpCode(OP_SAVE_RESET, RegExpOpCode::OP_SIZE_THREE) {}
220     uint32_t InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t start, uint32_t end) const;
221     ~SaveResetOpCode() override = default;
222     NO_COPY_SEMANTIC(SaveResetOpCode);
223     NO_MOVE_SEMANTIC(SaveResetOpCode);
224     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
225 };
226 
227 class MatchOpCode : public RegExpOpCode {
228 public:
MatchOpCode()229     MatchOpCode() : RegExpOpCode(OP_MATCH, RegExpOpCode::OP_SIZE_ONE) {}
230     uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const;
231     ~MatchOpCode() override = default;
232     NO_COPY_SEMANTIC(MatchOpCode);
233     NO_MOVE_SEMANTIC(MatchOpCode);
234     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
235 };
236 
237 class MatchEndOpCode : public RegExpOpCode {
238 public:
MatchEndOpCode()239     MatchEndOpCode() : RegExpOpCode(OP_MATCH_END, RegExpOpCode::OP_SIZE_ONE) {}
240     uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const;
241     ~MatchEndOpCode() override = default;
242     NO_COPY_SEMANTIC(MatchEndOpCode);
243     NO_MOVE_SEMANTIC(MatchEndOpCode);
244     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
245 };
246 
247 class LineStartOpCode : public RegExpOpCode {
248 public:
LineStartOpCode()249     LineStartOpCode() : RegExpOpCode(OP_LINE_START, RegExpOpCode::OP_SIZE_ONE) {}
250     uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const;
251     ~LineStartOpCode() override = default;
252     NO_COPY_SEMANTIC(LineStartOpCode);
253     NO_MOVE_SEMANTIC(LineStartOpCode);
254     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
255 };
256 
257 class LineEndOpCode : public RegExpOpCode {
258 public:
LineEndOpCode()259     LineEndOpCode() : RegExpOpCode(OP_LINE_END, RegExpOpCode::OP_SIZE_ONE) {}
260     uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const;
261     ~LineEndOpCode() override = default;
262     NO_COPY_SEMANTIC(LineEndOpCode);
263     NO_MOVE_SEMANTIC(LineEndOpCode);
264     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
265 };
266 
267 class WordBoundaryOpCode : public RegExpOpCode {
268 public:
WordBoundaryOpCode()269     WordBoundaryOpCode() : RegExpOpCode(OP_WORD_BOUNDARY, RegExpOpCode::OP_SIZE_ONE) {}
270     uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const;
271     ~WordBoundaryOpCode() override = default;
272     NO_COPY_SEMANTIC(WordBoundaryOpCode);
273     NO_MOVE_SEMANTIC(WordBoundaryOpCode);
274     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
275 };
276 
277 class NotWordBoundaryOpCode : public RegExpOpCode {
278 public:
NotWordBoundaryOpCode()279     NotWordBoundaryOpCode() : RegExpOpCode(OP_NOT_WORD_BOUNDARY, RegExpOpCode::OP_SIZE_ONE) {}
280     uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const;
281     ~NotWordBoundaryOpCode() override = default;
282     NO_COPY_SEMANTIC(NotWordBoundaryOpCode);
283     NO_MOVE_SEMANTIC(NotWordBoundaryOpCode);
284     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
285 };
286 
287 class AllOpCode : public RegExpOpCode {
288 public:
AllOpCode()289     AllOpCode() : RegExpOpCode(OP_ALL, RegExpOpCode::OP_SIZE_ONE) {}
290     uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const;
291     ~AllOpCode() override = default;
292     NO_COPY_SEMANTIC(AllOpCode);
293     NO_MOVE_SEMANTIC(AllOpCode);
294     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
295 };
296 
297 class DotsOpCode : public RegExpOpCode {
298 public:
DotsOpCode()299     DotsOpCode() : RegExpOpCode(OP_DOTS, RegExpOpCode::OP_SIZE_ONE) {}
300     uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const;
301     ~DotsOpCode() override = default;
302     NO_COPY_SEMANTIC(DotsOpCode);
303     NO_MOVE_SEMANTIC(DotsOpCode);
304     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
305 };
306 
307 class RangeSet {
308 public:
309     RangeSet() = default;
RangeSet(uint32_t value)310     explicit RangeSet(uint32_t value)
311     {
312         Insert(value, value);
313     }
RangeSet(uint32_t start,uint32_t end)314     explicit RangeSet(uint32_t start, uint32_t end)
315     {
316         Insert(start, end);
317     }
RangeSet(const std::list<std::pair<uint32_t,uint32_t>> & rangeSet)318     explicit RangeSet(const std::list<std::pair<uint32_t, uint32_t>> &rangeSet)
319     {
320         rangeSet_ = rangeSet;
321     }
322     ~RangeSet() = default;
323 
IsIntersect(uint64_t start,uint64_t end,uint64_t start1,uint64_t end1)324     inline bool IsIntersect(uint64_t start, uint64_t end, uint64_t start1, uint64_t end1) const
325     {
326         return ((start1 > start) && (start1 < end)) || ((start > start1) && (start < end1));
327     }
IsAdjacent(uint64_t start,uint64_t end,uint64_t start1,uint64_t end1)328     inline bool IsAdjacent(uint64_t start, uint64_t end, uint64_t start1, uint64_t end1) const
329     {
330         return ((end == start1 || (end + 1) == start1)) || ((end1 == start) || (end1 + 1 == start));
331     }
332 
333     inline bool operator==(const RangeSet &other) const
334     {
335         return rangeSet_ == other.rangeSet_;
336     }
337 
IsContain(uint32_t value)338     inline bool IsContain(uint32_t value) const
339     {
340         for (auto range : rangeSet_) {
341             if (value >= range.first && value <= range.second) {
342                 return true;
343             }
344         }
345         return false;
346     }
347 
HighestValue()348     inline uint32_t HighestValue() const
349     {
350         if (!rangeSet_.empty()) {
351             return rangeSet_.back().second;
352         }
353         return 0;
354     }
355 
356     RangeSet(RangeSet const &) = default;
357     RangeSet &operator=(RangeSet const &) = default;
358     RangeSet(RangeSet &&) = default;
359     RangeSet &operator=(RangeSet &&) = default;
360 
361     void Insert(uint32_t start, uint32_t end);
362     void Insert(const RangeSet &s1);
363     void Invert(bool isUtf16);
364     void Compress();
365 
366 private:
367     friend class RangeOpCode;
368     friend class Range32OpCode;
369     std::list<std::pair<uint32_t, uint32_t>> rangeSet_{};
370 };
371 
372 class RangeOpCode : public RegExpOpCode {
373 public:
RangeOpCode()374     RangeOpCode() : RegExpOpCode(OP_RANGE, RegExpOpCode::OP_SIZE_ONE) {}
375     ~RangeOpCode() override = default;
376     NO_COPY_SEMANTIC(RangeOpCode);
377     NO_MOVE_SEMANTIC(RangeOpCode);
378     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
379     uint32_t InsertOpCode(DynChunk *buf, const RangeSet &rangeSet) const;
380 };
381 
382 class MatchAheadOpCode : public RegExpOpCode {
383 public:
MatchAheadOpCode()384     MatchAheadOpCode() : RegExpOpCode(OP_MATCH_AHEAD, RegExpOpCode::OP_SIZE_FIVE) {}
385     ~MatchAheadOpCode() override = default;
386     NO_COPY_SEMANTIC(MatchAheadOpCode);
387     NO_MOVE_SEMANTIC(MatchAheadOpCode);
388     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
389     uint32_t InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const;
390 };
391 
392 class NegativeMatchAheadOpCode : public RegExpOpCode {
393 public:
NegativeMatchAheadOpCode()394     NegativeMatchAheadOpCode() : RegExpOpCode(OP_NEGATIVE_MATCH_AHEAD, RegExpOpCode::OP_SIZE_FIVE) {}
395     uint32_t InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const;
396     ~NegativeMatchAheadOpCode() override = default;
397     NO_COPY_SEMANTIC(NegativeMatchAheadOpCode);
398     NO_MOVE_SEMANTIC(NegativeMatchAheadOpCode);
399     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
400 };
401 
402 class PrevOpCode : public RegExpOpCode {
403 public:
PrevOpCode()404     PrevOpCode() : RegExpOpCode(OP_PREV, RegExpOpCode::OP_SIZE_ONE) {}
405     uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const;
406     ~PrevOpCode() override = default;
407     NO_COPY_SEMANTIC(PrevOpCode);
408     NO_MOVE_SEMANTIC(PrevOpCode);
409     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
410 };
411 
412 class BackReferenceOpCode : public RegExpOpCode {
413 public:
BackReferenceOpCode()414     BackReferenceOpCode() : RegExpOpCode(OP_BACKREFERENCE, RegExpOpCode::OP_SIZE_TWO) {}
415     uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const;
416     ~BackReferenceOpCode() override = default;
417     NO_COPY_SEMANTIC(BackReferenceOpCode);
418     NO_MOVE_SEMANTIC(BackReferenceOpCode);
419     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
420 };
421 
422 class BackwardBackReferenceOpCode : public RegExpOpCode {
423 public:
BackwardBackReferenceOpCode()424     BackwardBackReferenceOpCode() : RegExpOpCode(OP_BACKWARD_BACKREFERENCE, RegExpOpCode::OP_SIZE_TWO) {}
425     uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const;
426     ~BackwardBackReferenceOpCode() override = default;
427     NO_COPY_SEMANTIC(BackwardBackReferenceOpCode);
428     NO_MOVE_SEMANTIC(BackwardBackReferenceOpCode);
429     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
430 };
431 
432 class Char32OpCode : public RegExpOpCode {
433 public:
Char32OpCode()434     Char32OpCode() : RegExpOpCode(OP_CHAR32, RegExpOpCode::OP_SIZE_FIVE) {}
435     uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const;
436     ~Char32OpCode() override = default;
437     NO_COPY_SEMANTIC(Char32OpCode);
438     NO_MOVE_SEMANTIC(Char32OpCode);
439     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
440 };
441 
442 class Range32OpCode : public RegExpOpCode {
443 public:
Range32OpCode()444     Range32OpCode() : RegExpOpCode(OP_RANGE32, RegExpOpCode::OP_SIZE_ONE) {}
445     ~Range32OpCode() override = default;
446     NO_COPY_SEMANTIC(Range32OpCode);
447     NO_MOVE_SEMANTIC(Range32OpCode);
448     uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override;
449     uint32_t InsertOpCode(DynChunk *buf, const RangeSet &rangeSet) const;
450 };
451 }  // namespace ecmascript
452 }  // namespace panda
453 #endif
454