• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "ecmascript/regexp/regexp_opcode.h"
17 
18 #include "ecmascript/regexp/regexp_executor.h"
19 
20 namespace panda::ecmascript {
21 using CaptureState = RegExpExecutor::CaptureState;
22 
23 static SaveStartOpCode g_saveStartOpcode = SaveStartOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
24 static SaveEndOpCode g_saveEndOpcode = SaveEndOpCode();        // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
25 static CharOpCode g_charOpcode = CharOpCode();                 // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
26 static GotoOpCode g_gotoOpcode = GotoOpCode();                 // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
27 static SplitNextOpCode g_splitNextOpcode = SplitNextOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
28 static SplitFirstOpCode g_splitFirstOpcode =
29     SplitFirstOpCode();                            // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
30 static MatchOpCode g_matchOpcode = MatchOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
31 static LoopOpCode g_loopOpcode = LoopOpCode();     // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
32 static LoopGreedyOpCode g_loopGreedyOpcode =
33     LoopGreedyOpCode();                                        // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
34 static PushCharOpCode g_pushCharOpcode = PushCharOpCode();     // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
35 static CheckCharOpCode g_checkCharOpcode = CheckCharOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
36 static PushOpCode g_pushOpcode = PushOpCode();                 // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
37 static PopOpCode g_popOpcode = PopOpCode();                    // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
38 static SaveResetOpCode g_saveResetOpcode = SaveResetOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
39 static LineStartOpCode g_lineStartOpcode = LineStartOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
40 static LineEndOpCode g_lineEndOpcode = LineEndOpCode();        // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
41 static WordBoundaryOpCode g_wordBoundaryOpcode =
42     WordBoundaryOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
43 static NotWordBoundaryOpCode g_notWordBoundaryOpcode =
44     NotWordBoundaryOpCode();                    // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
45 static AllOpCode g_allOpcode = AllOpCode();     // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
46 static DotsOpCode g_dotsOpcode = DotsOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
47 static MatchAheadOpCode g_matchAheadOpcode =
48     MatchAheadOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
49 static NegativeMatchAheadOpCode g_negativeMatchAheadOpcode =
50     NegativeMatchAheadOpCode();                             // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
51 static MatchEndOpCode g_matchEndOpcode = MatchEndOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
52 static PrevOpCode g_prevOpcode = PrevOpCode();              // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
53 static RangeOpCode g_rangeOpcode = RangeOpCode();           // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
54 static BackReferenceOpCode g_backreferenceOpcode =
55     BackReferenceOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
56 static BackwardBackReferenceOpCode g_backwardBackreferenceOpcode =
57     BackwardBackReferenceOpCode();                       // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
58 static Char32OpCode g_char32Opcode = Char32OpCode();     // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
59 static Range32OpCode g_range32Opcode = Range32OpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
60 // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
61 static std::vector<RegExpOpCode *> g_intrinsicSet = {
62     &g_saveStartOpcode,
63     &g_saveEndOpcode,
64     &g_charOpcode,
65     &g_gotoOpcode,
66     &g_splitFirstOpcode,
67     &g_splitNextOpcode,
68     &g_matchAheadOpcode,
69     &g_negativeMatchAheadOpcode,
70     &g_matchOpcode,
71     &g_loopOpcode,
72     &g_loopGreedyOpcode,
73     &g_pushCharOpcode,
74     &g_checkCharOpcode,
75     &g_pushOpcode,
76     &g_popOpcode,
77     &g_saveResetOpcode,
78     &g_lineStartOpcode,
79     &g_lineEndOpcode,
80     &g_wordBoundaryOpcode,
81     &g_notWordBoundaryOpcode,
82     &g_allOpcode,
83     &g_dotsOpcode,
84     &g_matchEndOpcode,
85     &g_prevOpcode,
86     &g_rangeOpcode,
87     &g_backreferenceOpcode,
88     &g_backwardBackreferenceOpcode,
89     &g_char32Opcode,
90     &g_range32Opcode,
91 };
92 
RegExpOpCode(uint8_t opCode,int size)93 RegExpOpCode::RegExpOpCode(uint8_t opCode, int size) : opCode_(opCode), size_(size) {}
94 
95 /* static */
GetRegExpOpCode(const DynChunk & buf,int pc)96 RegExpOpCode *RegExpOpCode::GetRegExpOpCode(const DynChunk &buf, int pc)
97 {
98     uint8_t opCode = buf.GetU8(pc);
99     ASSERT_PRINT(opCode <= g_intrinsicSet.size(), "invalid op code");
100     return g_intrinsicSet.at(opCode);
101 }
102 
103 /* static */
GetRegExpOpCode(uint8_t opCode)104 RegExpOpCode *RegExpOpCode::GetRegExpOpCode(uint8_t opCode)
105 {
106     ASSERT_PRINT(opCode <= g_intrinsicSet.size(), "invalid op code");
107     return g_intrinsicSet.at(opCode);
108 }
109 
110 /* static */
DumpRegExpOpCode(std::ostream & out,const DynChunk & buf)111 void RegExpOpCode::DumpRegExpOpCode(std::ostream &out, const DynChunk &buf)
112 {
113     out << "OpCode:\t" << std::endl;
114     uint32_t pc = RegExpParser::OP_START_OFFSET;
115     do {
116         RegExpOpCode *byteCode = GetRegExpOpCode(buf, pc);
117         pc = byteCode->DumpOpCode(out, buf, pc);
118     } while (pc < buf.size_);
119 }
120 
EmitOpCode(DynChunk * buf,uint32_t para) const121 uint32_t SaveStartOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
122 {
123     auto capture = static_cast<uint8_t>(para & 0xffU);  // NOLINTNEXTLINE(readability-magic-numbers)
124     buf->EmitChar(GetOpCode());
125     buf->EmitChar(capture);
126     return GetDynChunkfSize(*buf);
127 }
128 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const129 uint32_t SaveStartOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
130 {
131     out << offset << ":\t"
132         << "save_start\t" << buf.GetU8(offset + 1) << std::endl;
133     return offset + GetSize();
134 }
135 
EmitOpCode(DynChunk * buf,uint32_t para) const136 uint32_t SaveEndOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
137 {
138     auto capture = static_cast<uint8_t>(para & 0xffU);  // NOLINTNEXTLINE(readability-magic-numbers)
139     buf->EmitChar(GetOpCode());
140     buf->EmitChar(capture);
141     return GetDynChunkfSize(*buf);
142 }
143 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const144 uint32_t SaveEndOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
145 {
146     out << offset << ":\t"
147         << "save_end\t" << buf.GetU8(offset + 1) << std::endl;
148     return offset + GetSize();
149 }
150 
EmitOpCode(DynChunk * buf,uint32_t para) const151 uint32_t CharOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
152 {
153     auto paraChar = static_cast<uint16_t>(para & 0xffffU);  // NOLINTNEXTLINE(readability-magic-numbers)
154     buf->EmitChar(GetOpCode());
155     buf->EmitU16(paraChar);
156     return GetDynChunkfSize(*buf);
157 }
158 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const159 uint32_t CharOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
160 {
161     out << offset << ":\t"
162         << "char\t" << static_cast<char>(buf.GetU16(offset + 1)) << std::endl;
163     return offset + GetSize();
164 }
165 
EmitOpCode(DynChunk * buf,uint32_t para) const166 uint32_t Char32OpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
167 {
168     buf->EmitChar(GetOpCode());
169     buf->EmitU32(para);
170     return GetDynChunkfSize(*buf);
171 }
172 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const173 uint32_t Char32OpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
174 {
175     out << offset << ":\t"
176         << "char32\t" << static_cast<char>(buf.GetU32(offset + 1)) << std::endl;
177     return offset + GetSize();
178 }
179 
EmitOpCode(DynChunk * buf,uint32_t para) const180 uint32_t GotoOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
181 {
182     buf->EmitChar(GetOpCode());
183     buf->EmitU32(para);
184     return GetDynChunkfSize(*buf);
185 }
186 
UpdateOpPara(DynChunk * buf,uint32_t offset,uint32_t para) const187 void GotoOpCode::UpdateOpPara(DynChunk *buf, uint32_t offset, uint32_t para) const
188 {
189     buf->PutU32(offset + 1, para);
190 }
191 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const192 uint32_t GotoOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
193 {
194     out << offset << ":\t"
195         << "goto\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl;
196     return offset + GetSize();
197 }
198 
InsertOpCode(DynChunk * buf,uint32_t offset,uint32_t para) const199 uint32_t SplitNextOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const
200 {
201     buf->Insert(offset, GetSize());
202     buf->PutU8(offset, GetOpCode());
203     buf->PutU32(offset + 1, para);
204     return GetDynChunkfSize(*buf);
205 }
206 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const207 uint32_t SplitNextOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
208 {
209     out << offset << ":\t"
210         << "split_next\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl;
211     return offset + GetSize();
212 }
213 
InsertOpCode(DynChunk * buf,uint32_t offset,uint32_t para) const214 uint32_t SplitFirstOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const
215 {
216     buf->Insert(offset, GetSize());
217     buf->PutU8(offset, GetOpCode());
218     buf->PutU32(offset + 1, para);
219     return GetDynChunkfSize(*buf);
220 }
221 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const222 uint32_t SplitFirstOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
223 {
224     out << offset << ":\t"
225         << "split_first\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl;
226     return offset + GetSize();
227 }
228 
EmitOpCode(DynChunk * buf,uint32_t start,uint32_t min,uint32_t max) const229 uint32_t LoopOpCode::EmitOpCode(DynChunk *buf, uint32_t start, uint32_t min, uint32_t max) const
230 {
231     buf->EmitChar(GetOpCode());
232     buf->EmitU32(start);
233     buf->EmitU32(min);
234     buf->EmitU32(max);
235     return GetDynChunkfSize(*buf);
236 }
237 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const238 uint32_t LoopOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
239 {
240     out << offset << ":\t"
241         << "loop\t" << buf.GetU32(offset + 1) + offset + GetSize() << "\t"
242         << buf.GetU32(offset + RegExpOpCode::OP_SIZE_FIVE) << "\t" << buf.GetU32(offset + RegExpOpCode::OP_SIZE_NINE)
243         << std::endl;
244     return offset + GetSize();
245 }
246 
EmitOpCode(DynChunk * buf,uint32_t start,uint32_t min,uint32_t max) const247 uint32_t LoopGreedyOpCode::EmitOpCode(DynChunk *buf, uint32_t start, uint32_t min, uint32_t max) const
248 {
249     buf->EmitChar(GetOpCode());
250     buf->EmitU32(start);
251     buf->EmitU32(min);
252     buf->EmitU32(max);
253     return GetDynChunkfSize(*buf);
254 }
255 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const256 uint32_t LoopGreedyOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
257 {
258     out << offset << ":\t"
259         << "greedy_loop\t" << buf.GetU32(offset + 1) + offset + GetSize() << "\t"
260         << buf.GetU32(offset + RegExpOpCode::OP_SIZE_FIVE) << "\t" << buf.GetU32(offset + RegExpOpCode::OP_SIZE_NINE)
261         << std::endl;
262     return offset + GetSize();
263 }
264 
InsertOpCode(DynChunk * buf,uint32_t offset) const265 uint32_t PushCharOpCode::InsertOpCode(DynChunk *buf, uint32_t offset) const
266 {
267     buf->Insert(offset, GetSize());
268     buf->PutU8(offset, GetOpCode());
269     return GetDynChunkfSize(*buf);
270 }
271 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const272 uint32_t PushCharOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
273 {
274     out << offset << ":\t"
275         << "push_char" << std::endl;
276     return offset + GetSize();
277 }
278 
InsertOpCode(DynChunk * buf,uint32_t offset) const279 uint32_t PushOpCode::InsertOpCode(DynChunk *buf, uint32_t offset) const
280 {
281     buf->Insert(offset, GetSize());
282     buf->PutU8(offset, GetOpCode());
283     return GetDynChunkfSize(*buf);
284 }
285 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const286 uint32_t PushOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
287 {
288     out << offset << ":\t"
289         << "push" << std::endl;
290     return offset + GetSize();
291 }
292 
EmitOpCode(DynChunk * buf) const293 uint32_t PopOpCode::EmitOpCode(DynChunk *buf) const
294 {
295     buf->EmitChar(GetOpCode());
296     return GetDynChunkfSize(*buf);
297 }
298 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const299 uint32_t PopOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
300 {
301     out << offset << ":\t"
302         << "pop" << std::endl;
303     return offset + GetSize();
304 }
305 
EmitOpCode(DynChunk * buf,uint32_t offset) const306 uint32_t CheckCharOpCode::EmitOpCode(DynChunk *buf, uint32_t offset) const
307 {
308     buf->EmitChar(GetOpCode());
309     buf->EmitU32(offset);
310     return GetDynChunkfSize(*buf);
311 }
312 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const313 uint32_t CheckCharOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
314 {
315     out << offset << ":\t"
316         << "check_char\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl;
317     return offset + GetSize();
318 }
319 
InsertOpCode(DynChunk * buf,uint32_t offset,uint32_t start,uint32_t end) const320 uint32_t SaveResetOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t start, uint32_t end) const
321 {
322     auto captureStart = static_cast<uint8_t>(start & 0xffU);  // NOLINTNEXTLINE(readability-magic-numbers)
323     auto captureEnd = static_cast<uint8_t>(end & 0xffU);      // NOLINTNEXTLINE(readability-magic-numbers)
324     buf->Insert(offset, GetSize());
325     buf->PutU8(offset, GetOpCode());
326     buf->PutU8(offset + RegExpOpCode::OP_SIZE_ONE, captureStart);
327     buf->PutU8(offset + RegExpOpCode::OP_SIZE_TWO, captureEnd);
328     return GetDynChunkfSize(*buf);
329 }
330 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const331 uint32_t SaveResetOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
332 {
333     out << offset << ":\t"
334         << "save_reset\t" << buf.GetU8(offset + RegExpOpCode::OP_SIZE_ONE) << "\t"
335         << buf.GetU8(offset + RegExpOpCode::OP_SIZE_TWO) << std::endl;
336     return offset + GetSize();
337 }
338 
EmitOpCode(DynChunk * buf,uint32_t para) const339 uint32_t MatchOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
340 {
341     buf->EmitChar(GetOpCode());
342     return GetDynChunkfSize(*buf);
343 }
344 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const345 uint32_t MatchOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
346 {
347     out << offset << ":\t"
348         << "match" << std::endl;
349     return offset + GetSize();
350 }
351 
EmitOpCode(DynChunk * buf,uint32_t para) const352 uint32_t MatchEndOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
353 {
354     buf->EmitChar(GetOpCode());
355     return GetDynChunkfSize(*buf);
356 }
357 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const358 uint32_t MatchEndOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
359 {
360     out << offset << ":\t"
361         << "match_end" << std::endl;
362     return offset + GetSize();
363 }
364 
EmitOpCode(DynChunk * buf,uint32_t para) const365 uint32_t LineStartOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
366 {
367     buf->EmitChar(GetOpCode());
368     return GetDynChunkfSize(*buf);
369 }
370 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const371 uint32_t LineStartOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
372 {
373     out << offset << ":\t"
374         << "line_start" << std::endl;
375     return offset + GetSize();
376 }
377 
EmitOpCode(DynChunk * buf,uint32_t para) const378 uint32_t LineEndOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
379 {
380     buf->EmitChar(GetOpCode());
381     return GetDynChunkfSize(*buf);
382 }
383 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const384 uint32_t LineEndOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
385 {
386     out << offset << ":\t"
387         << "line_end" << std::endl;
388     return offset + GetSize();
389 }
390 
EmitOpCode(DynChunk * buf,uint32_t para) const391 uint32_t WordBoundaryOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
392 {
393     buf->EmitChar(GetOpCode());
394     return GetDynChunkfSize(*buf);
395 }
396 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const397 uint32_t WordBoundaryOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
398 {
399     out << offset << ":\t"
400         << "word_boundary" << std::endl;
401     return offset + GetSize();
402 }
403 
EmitOpCode(DynChunk * buf,uint32_t para) const404 uint32_t NotWordBoundaryOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
405 {
406     buf->EmitChar(GetOpCode());
407     return GetDynChunkfSize(*buf);
408 }
409 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const410 uint32_t NotWordBoundaryOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf,
411                                            uint32_t offset) const
412 {
413     out << offset << ":\t"
414         << "not_word_boundary" << std::endl;
415     return offset + GetSize();
416 }
417 
EmitOpCode(DynChunk * buf,uint32_t para) const418 uint32_t AllOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
419 {
420     buf->EmitChar(GetOpCode());
421     return GetDynChunkfSize(*buf);
422 }
423 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const424 uint32_t AllOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
425 {
426     out << offset << ":\t"
427         << "all" << std::endl;
428     return offset + GetSize();
429 }
430 
EmitOpCode(DynChunk * buf,uint32_t para) const431 uint32_t DotsOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
432 {
433     buf->EmitChar(GetOpCode());
434     return GetDynChunkfSize(*buf);
435 }
436 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const437 uint32_t DotsOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
438 {
439     out << offset << ":\t"
440         << "dots" << std::endl;
441     return offset + GetSize();
442 }
443 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const444 uint32_t MatchAheadOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
445 {
446     out << offset << ":\t"
447         << "match_ahead\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl;
448     return offset + GetSize();
449 }
450 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const451 uint32_t RangeOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
452 {
453     out << offset << ":\t"
454         << "range\t";
455     size_t size = buf.GetU16(offset + 1);
456     for (size_t i = 0; i < size; i++) {
457         out << buf.GetU16(offset + RegExpOpCode::OP_SIZE_THREE + (i * RegExpOpCode::OP_SIZE_FOUR)) << "\t"
458             << buf.GetU16(offset + RegExpOpCode::OP_SIZE_THREE +
459                           (i * RegExpOpCode::OP_SIZE_FOUR + RegExpOpCode::OP_SIZE_TWO))
460             << "\t";
461     }
462     out << std::endl;
463     return offset + size * RegExpOpCode::OP_SIZE_FOUR + RegExpOpCode::OP_SIZE_THREE;
464 }
465 
InsertOpCode(DynChunk * buf,const RangeSet & rangeSet) const466 uint32_t RangeOpCode::InsertOpCode(DynChunk *buf, const RangeSet &rangeSet) const
467 {
468     buf->EmitChar(GetOpCode());
469     size_t size = rangeSet.rangeSet_.size();
470     buf->EmitU16(size);
471     for (auto range : rangeSet.rangeSet_) {
472         buf->EmitU16(range.first);
473         buf->EmitU16(range.second);
474     }
475     return GetDynChunkfSize(*buf);
476 }
477 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const478 uint32_t Range32OpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
479 {
480     out << offset << ":\t"
481         << "range32\t";
482     size_t size = buf.GetU16(offset + 1);
483     for (size_t i = 0; i < size; i++) {
484         out << buf.GetU32(offset + RegExpOpCode::OP_SIZE_THREE + (i * RegExpOpCode::OP_SIZE_EIGHT)) << "\t"
485             << buf.GetU32(offset + RegExpOpCode::OP_SIZE_THREE +
486                           (i * RegExpOpCode::OP_SIZE_EIGHT + RegExpOpCode::OP_SIZE_FOUR))
487             << "\t";
488     }
489     out << std::endl;
490     return offset + size * +RegExpOpCode::OP_SIZE_EIGHT + RegExpOpCode::OP_SIZE_THREE;
491 }
492 
InsertOpCode(DynChunk * buf,const RangeSet & rangeSet) const493 uint32_t Range32OpCode::InsertOpCode(DynChunk *buf, const RangeSet &rangeSet) const
494 {
495     buf->EmitChar(GetOpCode());
496     size_t size = rangeSet.rangeSet_.size();
497     buf->EmitU16(size);
498     for (auto range : rangeSet.rangeSet_) {
499         buf->EmitU32(range.first);
500         buf->EmitU32(range.second);
501     }
502     return GetDynChunkfSize(*buf);
503 }
504 
InsertOpCode(DynChunk * buf,uint32_t offset,uint32_t para) const505 uint32_t MatchAheadOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const
506 {
507     buf->Insert(offset, GetSize());
508     buf->PutU8(offset, GetOpCode());
509     buf->PutU32(offset + 1, para);
510     return GetDynChunkfSize(*buf);
511 }
512 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const513 uint32_t NegativeMatchAheadOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
514 {
515     out << offset << ":\t"
516         << "negative_match_ahead\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl;
517     return offset + GetSize();
518 }
519 
InsertOpCode(DynChunk * buf,uint32_t offset,uint32_t para) const520 uint32_t NegativeMatchAheadOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const
521 {
522     buf->Insert(offset, GetSize());
523     buf->PutU8(offset, GetOpCode());
524     buf->PutU32(offset + 1, para);
525     return GetDynChunkfSize(*buf);
526 }
527 
EmitOpCode(DynChunk * buf,uint32_t para) const528 uint32_t PrevOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
529 {
530     buf->EmitChar(GetOpCode());
531     return GetDynChunkfSize(*buf);
532 }
533 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const534 uint32_t PrevOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
535 {
536     out << offset << ":\t"
537         << "prev" << std::endl;
538     return offset + GetSize();
539 }
540 
EmitOpCode(DynChunk * buf,uint32_t para) const541 uint32_t BackReferenceOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
542 {
543     auto capture = static_cast<uint8_t>(para & 0xffU);  // NOLINTNEXTLINE(readability-magic-numbers)
544     buf->EmitChar(GetOpCode());
545     buf->EmitChar(capture);
546     return GetDynChunkfSize(*buf);
547 }
548 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const549 uint32_t BackReferenceOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
550 {
551     out << offset << ":\t"
552         << "backreference\t" << buf.GetU8(offset + 1) << std::endl;
553     return offset + GetSize();
554 }
555 
EmitOpCode(DynChunk * buf,uint32_t para) const556 uint32_t BackwardBackReferenceOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
557 {
558     auto capture = static_cast<uint8_t>(para & 0xffU);  // NOLINTNEXTLINE(readability-magic-numbers)
559     buf->EmitChar(GetOpCode());
560     buf->EmitChar(capture);
561     return GetDynChunkfSize(*buf);
562 }
563 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const564 uint32_t BackwardBackReferenceOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
565 {
566     out << offset << ":\t"
567         << "backward_backreference\t" << buf.GetU8(offset + 1) << std::endl;
568     return offset + GetSize();
569 }
570 
Insert(uint32_t start,uint32_t end)571 void RangeSet::Insert(uint32_t start, uint32_t end)
572 {
573     if (start > end) {
574         return;
575     }
576     std::pair<uint32_t, uint32_t> pairElement = std::make_pair(start, end);
577     if (rangeSet_.empty()) {
578         rangeSet_.emplace_back(pairElement);
579     } else {
580         for (auto iter = rangeSet_.begin(); iter != rangeSet_.end(); iter++) {
581             if (IsIntersect(start, end, iter->first, iter->second) ||
582                 IsAdjacent(start, end, iter->first, iter->second)) {
583                 iter->first = std::min(iter->first, start);
584                 iter->second = std::max(iter->second, end);
585                 Compress();
586                 return;
587             }
588             if (iter->first > end) {
589                 rangeSet_.insert(iter, pairElement);
590                 return;
591             }
592         }
593         rangeSet_.emplace_back(pairElement);
594     }
595 }
596 // if RangeResult cross-intersects with [a, z] and [A, Z],
597 // we capitalize the intersection part and insert into RangeResult.
Inter(RangeSet & cr,const RangeSet & s1)598 void RangeSet::Inter(RangeSet &cr, const RangeSet &s1)
599 {
600     if (s1.rangeSet_.empty()) {
601         rangeSet_.clear();
602         return;
603     }
604     if (rangeSet_.empty()) {
605         return;
606     }
607     for (const auto &interItem : s1.rangeSet_) {
608         uint32_t firstMax = 0;
609         uint32_t secondMin = 0;
610         for (const auto &range : rangeSet_) {
611             if (range.first >= interItem.first) {
612                 firstMax = range.first;
613             } else {
614                 firstMax = interItem.first;
615             }
616             if (range.second >= interItem.second) {
617                 secondMin = interItem.second;
618             } else {
619                 secondMin = range.second;
620             }
621             if (secondMin < firstMax) {
622                 continue;
623             }
624             if (firstMax >= 'a' && firstMax <= 'z') {
625                 cr.Insert(firstMax + 'A' - 'a', secondMin + 'A' - 'a');
626             }
627             if (firstMax >= 'A' && firstMax <= 'Z') {
628                 cr.Insert(firstMax - 'A' + 'a', secondMin - 'A' + 'a');
629             }
630         }
631     }
632 }
Insert(const RangeSet & s1)633 void RangeSet::Insert(const RangeSet &s1)
634 {
635     if (s1.rangeSet_.empty()) {
636         return;
637     }
638     if (rangeSet_.empty()) {
639         rangeSet_ = s1.rangeSet_;
640     } else {
641         for (auto range : s1.rangeSet_) {
642             Insert(range.first, range.second);
643         }
644         Compress();
645     }
646 }
647 
Invert(bool isUtf16)648 void RangeSet::Invert(bool isUtf16)
649 {
650     uint32_t maxValue = isUtf16 ? UINT32_MAX : UINT16_MAX;
651     if (rangeSet_.empty()) {
652         rangeSet_.emplace_back(std::make_pair(0, maxValue));
653         return;
654     }
655 
656     auto iter = rangeSet_.begin();
657     auto iter2 = rangeSet_.begin();
658     if (iter->first == 0 && iter->second == maxValue) {
659         rangeSet_.clear();
660         return;
661     }
662     iter2++;
663 
664     uint32_t first = iter->first;
665 
666     for (iter = rangeSet_.begin(); iter != rangeSet_.end(); iter++) {
667         if (iter->second == maxValue) {
668             rangeSet_.erase(iter);
669             break;
670         }
671         iter->first = iter->second + 1;
672         if (iter2 != rangeSet_.end()) {
673             iter->second = iter2->first - 1;
674             iter2++;
675         } else {
676             iter->second = maxValue;
677         }
678     }
679     if (first > 0) {
680         std::pair<uint32_t, uint32_t> pair1 = std::make_pair(0, first - 1);
681         rangeSet_.push_front(pair1);
682     }
683     Compress();
684 }
685 
Compress()686 void RangeSet::Compress()
687 {
688     auto iter = rangeSet_.begin();
689     auto iter2 = rangeSet_.begin();
690     iter2++;
691     while (iter2 != rangeSet_.end()) {
692         if (IsIntersect(iter->first, iter->second, iter2->first, iter2->second) ||
693             IsAdjacent(iter->first, iter->second, iter2->first, iter2->second)) {
694             iter->first = std::min(iter->first, iter2->first);
695             iter->second = std::max(iter->second, iter2->second);
696             iter2 = rangeSet_.erase(iter2);
697         } else {
698             iter++;
699             iter2++;
700         }
701     }
702 }
703 }  // namespace panda::ecmascript
704