• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "ecmascript/regexp/regexp_opcode.h"
17 
18 #include "ecmascript/regexp/regexp_executor.h"
19 
20 namespace panda::ecmascript {
21 using CaptureState = RegExpExecutor::CaptureState;
22 
23 static SaveStartOpCode g_saveStartOpcode = SaveStartOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
24 static SaveEndOpCode g_saveEndOpcode = SaveEndOpCode();        // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
25 static CharOpCode g_charOpcode = CharOpCode();                 // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
26 static GotoOpCode g_gotoOpcode = GotoOpCode();                 // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
27 static SplitNextOpCode g_splitNextOpcode = SplitNextOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
28 static SplitFirstOpCode g_splitFirstOpcode =
29     SplitFirstOpCode();                            // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
30 static MatchOpCode g_matchOpcode = MatchOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
31 static LoopOpCode g_loopOpcode = LoopOpCode();     // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
32 static LoopGreedyOpCode g_loopGreedyOpcode =
33     LoopGreedyOpCode();                                        // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
34 static PushCharOpCode g_pushCharOpcode = PushCharOpCode();     // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
35 static CheckCharOpCode g_checkCharOpcode = CheckCharOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
36 static PushOpCode g_pushOpcode = PushOpCode();                 // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
37 static PopOpCode g_popOpcode = PopOpCode();                    // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
38 static SaveResetOpCode g_saveResetOpcode = SaveResetOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
39 static LineStartOpCode g_lineStartOpcode = LineStartOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
40 static LineEndOpCode g_lineEndOpcode = LineEndOpCode();        // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
41 static WordBoundaryOpCode g_wordBoundaryOpcode =
42     WordBoundaryOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
43 static NotWordBoundaryOpCode g_notWordBoundaryOpcode =
44     NotWordBoundaryOpCode();                    // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
45 static AllOpCode g_allOpcode = AllOpCode();     // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
46 static DotsOpCode g_dotsOpcode = DotsOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
47 static MatchAheadOpCode g_matchAheadOpcode =
48     MatchAheadOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
49 static NegativeMatchAheadOpCode g_negativeMatchAheadOpcode =
50     NegativeMatchAheadOpCode();                             // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
51 static MatchEndOpCode g_matchEndOpcode = MatchEndOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
52 static PrevOpCode g_prevOpcode = PrevOpCode();              // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
53 static RangeOpCode g_rangeOpcode = RangeOpCode();           // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
54 static BackReferenceOpCode g_backreferenceOpcode =
55     BackReferenceOpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
56 static BackwardBackReferenceOpCode g_backwardBackreferenceOpcode =
57     BackwardBackReferenceOpCode();                       // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
58 static Char32OpCode g_char32Opcode = Char32OpCode();     // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
59 static Range32OpCode g_range32Opcode = Range32OpCode();  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
60 static SparseOpCode g_sparseOpcode = SparseOpCode();     // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
61 // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
62 static std::vector<RegExpOpCode *> g_intrinsicSet = {
63     &g_saveStartOpcode,
64     &g_saveEndOpcode,
65     &g_charOpcode,
66     &g_gotoOpcode,
67     &g_splitFirstOpcode,
68     &g_splitNextOpcode,
69     &g_negativeMatchAheadOpcode,
70     &g_matchAheadOpcode,
71     &g_matchOpcode,
72     &g_loopOpcode,
73     &g_loopGreedyOpcode,
74     &g_pushCharOpcode,
75     &g_checkCharOpcode,
76     &g_pushOpcode,
77     &g_popOpcode,
78     &g_saveResetOpcode,
79     &g_lineStartOpcode,
80     &g_lineEndOpcode,
81     &g_wordBoundaryOpcode,
82     &g_notWordBoundaryOpcode,
83     &g_allOpcode,
84     &g_dotsOpcode,
85     &g_matchEndOpcode,
86     &g_prevOpcode,
87     &g_rangeOpcode,
88     &g_backreferenceOpcode,
89     &g_backwardBackreferenceOpcode,
90     &g_char32Opcode,
91     &g_range32Opcode,
92     &g_sparseOpcode,
93 };
94 
RegExpOpCode(uint8_t opCode,int size)95 RegExpOpCode::RegExpOpCode(uint8_t opCode, int size) : opCode_(opCode), size_(size) {}
96 
97 /* static */
GetRegExpOpCode(const DynChunk & buf,int pc)98 RegExpOpCode *RegExpOpCode::GetRegExpOpCode(const DynChunk &buf, int pc)
99 {
100     uint8_t opCode = buf.GetU8(pc);
101     ASSERT_PRINT(opCode <= g_intrinsicSet.size(), "invalid op code");
102     return g_intrinsicSet.at(opCode);
103 }
104 
105 /* static */
GetRegExpOpCode(uint8_t opCode)106 RegExpOpCode *RegExpOpCode::GetRegExpOpCode(uint8_t opCode)
107 {
108     ASSERT_PRINT(opCode <= g_intrinsicSet.size(), "invalid op code");
109     return g_intrinsicSet.at(opCode);
110 }
111 
112 /* static */
DumpRegExpOpCode(std::ostream & out,const DynChunk & buf,uint32_t size)113 void RegExpOpCode::DumpRegExpOpCode(std::ostream &out, const DynChunk &buf, uint32_t size)
114 {
115     out << "OpCode:\t" << std::endl;
116     uint32_t pc = RegExpParser::OP_START_OFFSET;
117     do {
118         RegExpOpCode *byteCode = GetRegExpOpCode(buf, pc);
119         pc = byteCode->DumpOpCode(out, buf, pc);
120     } while (pc < size);
121 }
122 
EmitOpCode(DynChunk * buf,uint32_t para) const123 uint32_t SaveStartOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
124 {
125     auto capture = static_cast<uint8_t>(para & 0xffU);  // NOLINTNEXTLINE(readability-magic-numbers)
126     buf->EmitChar(GetOpCode());
127     buf->EmitChar(capture);
128     return GetDynChunkfSize(*buf);
129 }
130 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const131 uint32_t SaveStartOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
132 {
133     out << offset << ":\t"
134         << "save_start\t" << buf.GetU8(offset + 1) << std::endl;
135     return offset + GetSize();
136 }
137 
EmitOpCode(DynChunk * buf,uint32_t para) const138 uint32_t SaveEndOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
139 {
140     auto capture = static_cast<uint8_t>(para & 0xffU);  // NOLINTNEXTLINE(readability-magic-numbers)
141     buf->EmitChar(GetOpCode());
142     buf->EmitChar(capture);
143     return GetDynChunkfSize(*buf);
144 }
145 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const146 uint32_t SaveEndOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
147 {
148     out << offset << ":\t"
149         << "save_end\t" << buf.GetU8(offset + 1) << std::endl;
150     return offset + GetSize();
151 }
152 
EmitOpCode(DynChunk * buf,uint32_t para) const153 uint32_t CharOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
154 {
155     auto paraChar = static_cast<uint16_t>(para & 0xffffU);  // NOLINTNEXTLINE(readability-magic-numbers)
156     buf->EmitChar(GetOpCode());
157     buf->EmitU16(paraChar);
158     return GetDynChunkfSize(*buf);
159 }
160 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const161 uint32_t CharOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
162 {
163     out << offset << ":\t"
164         << "char\t" << static_cast<char>(buf.GetU16(offset + 1)) << std::endl;
165     return offset + GetSize();
166 }
167 
EmitOpCode(DynChunk * buf,uint32_t para) const168 uint32_t Char32OpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
169 {
170     buf->EmitChar(GetOpCode());
171     buf->EmitU32(para);
172     return GetDynChunkfSize(*buf);
173 }
174 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const175 uint32_t Char32OpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
176 {
177     out << offset << ":\t"
178         << "char32\t" << static_cast<char>(buf.GetU32(offset + 1)) << std::endl;
179     return offset + GetSize();
180 }
181 
EmitOpCode(DynChunk * buf,uint32_t para) const182 uint32_t GotoOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
183 {
184     buf->EmitChar(GetOpCode());
185     buf->EmitU32(para);
186     return GetDynChunkfSize(*buf);
187 }
188 
UpdateOpPara(DynChunk * buf,uint32_t offset,uint32_t para) const189 void GotoOpCode::UpdateOpPara(DynChunk *buf, uint32_t offset, uint32_t para) const
190 {
191     buf->PutU32(offset + 1, para);
192 }
193 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const194 uint32_t GotoOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
195 {
196     out << offset << ":\t"
197         << "goto\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl;
198     return offset + GetSize();
199 }
200 
InsertOpCode(DynChunk * buf,uint32_t offset,uint32_t para) const201 uint32_t SplitNextOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const
202 {
203     buf->Insert(offset, GetSize());
204     buf->PutU8(offset, GetOpCode());
205     buf->PutU32(offset + 1, para);
206     return GetDynChunkfSize(*buf);
207 }
208 
EmitOpCode(DynChunk * buf,uint32_t para) const209 uint32_t SplitNextOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
210 {
211     buf->EmitChar(GetOpCode());
212     buf->EmitU32(para);
213     return GetDynChunkfSize(*buf);
214 }
215 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const216 uint32_t SplitNextOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
217 {
218     out << offset << ":\t"
219         << "split_next\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl;
220     return offset + GetSize();
221 }
222 
InsertOpCode(DynChunk * buf,uint32_t offset,uint32_t para) const223 uint32_t SplitFirstOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const
224 {
225     buf->Insert(offset, GetSize());
226     buf->PutU8(offset, GetOpCode());
227     buf->PutU32(offset + 1, para);
228     return GetDynChunkfSize(*buf);
229 }
230 
EmitOpCode(DynChunk * buf,uint32_t para) const231 uint32_t SplitFirstOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
232 {
233     buf->EmitChar(GetOpCode());
234     buf->EmitU32(para);
235     return GetDynChunkfSize(*buf);
236 }
237 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const238 uint32_t SplitFirstOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
239 {
240     out << offset << ":\t"
241         << "split_first\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl;
242     return offset + GetSize();
243 }
244 
EmitOpCode(DynChunk * buf,uint32_t start,uint32_t min,uint32_t max) const245 uint32_t LoopOpCode::EmitOpCode(DynChunk *buf, uint32_t start, uint32_t min, uint32_t max) const
246 {
247     buf->EmitChar(GetOpCode());
248     buf->EmitU32(start);
249     buf->EmitU32(min);
250     buf->EmitU32(max);
251     return GetDynChunkfSize(*buf);
252 }
253 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const254 uint32_t LoopOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
255 {
256     out << offset << ":\t"
257         << "loop\t" << buf.GetU32(offset + 1) + offset + GetSize() << "\t"
258         << buf.GetU32(offset + RegExpOpCode::OP_SIZE_FIVE) << "\t" << buf.GetU32(offset + RegExpOpCode::OP_SIZE_NINE)
259         << std::endl;
260     return offset + GetSize();
261 }
262 
EmitOpCode(DynChunk * buf,uint32_t start,uint32_t min,uint32_t max) const263 uint32_t LoopGreedyOpCode::EmitOpCode(DynChunk *buf, uint32_t start, uint32_t min, uint32_t max) const
264 {
265     buf->EmitChar(GetOpCode());
266     buf->EmitU32(start);
267     buf->EmitU32(min);
268     buf->EmitU32(max);
269     return GetDynChunkfSize(*buf);
270 }
271 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const272 uint32_t LoopGreedyOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
273 {
274     out << offset << ":\t"
275         << "greedy_loop\t" << buf.GetU32(offset + 1) + offset + GetSize() << "\t"
276         << buf.GetU32(offset + RegExpOpCode::OP_SIZE_FIVE) << "\t" << buf.GetU32(offset + RegExpOpCode::OP_SIZE_NINE)
277         << std::endl;
278     return offset + GetSize();
279 }
280 
InsertOpCode(DynChunk * buf,uint32_t offset) const281 uint32_t PushCharOpCode::InsertOpCode(DynChunk *buf, uint32_t offset) const
282 {
283     buf->Insert(offset, GetSize());
284     buf->PutU8(offset, GetOpCode());
285     return GetDynChunkfSize(*buf);
286 }
287 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const288 uint32_t PushCharOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
289 {
290     out << offset << ":\t"
291         << "push_char" << std::endl;
292     return offset + GetSize();
293 }
294 
InsertOpCode(DynChunk * buf,uint32_t offset) const295 uint32_t PushOpCode::InsertOpCode(DynChunk *buf, uint32_t offset) const
296 {
297     buf->Insert(offset, GetSize());
298     buf->PutU8(offset, GetOpCode());
299     return GetDynChunkfSize(*buf);
300 }
301 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const302 uint32_t PushOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
303 {
304     out << offset << ":\t"
305         << "push" << std::endl;
306     return offset + GetSize();
307 }
308 
EmitOpCode(DynChunk * buf) const309 uint32_t PopOpCode::EmitOpCode(DynChunk *buf) const
310 {
311     buf->EmitChar(GetOpCode());
312     return GetDynChunkfSize(*buf);
313 }
314 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const315 uint32_t PopOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
316 {
317     out << offset << ":\t"
318         << "pop" << std::endl;
319     return offset + GetSize();
320 }
321 
EmitOpCode(DynChunk * buf,uint32_t offset) const322 uint32_t CheckCharOpCode::EmitOpCode(DynChunk *buf, uint32_t offset) const
323 {
324     buf->EmitChar(GetOpCode());
325     buf->EmitU32(offset);
326     return GetDynChunkfSize(*buf);
327 }
328 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const329 uint32_t CheckCharOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
330 {
331     out << offset << ":\t"
332         << "check_char\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl;
333     return offset + GetSize();
334 }
335 
InsertOpCode(DynChunk * buf,uint32_t offset,uint32_t start,uint32_t end) const336 uint32_t SaveResetOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t start, uint32_t end) const
337 {
338     auto captureStart = static_cast<uint8_t>(start & 0xffU);  // NOLINTNEXTLINE(readability-magic-numbers)
339     auto captureEnd = static_cast<uint8_t>(end & 0xffU);      // NOLINTNEXTLINE(readability-magic-numbers)
340     buf->Insert(offset, GetSize());
341     buf->PutU8(offset, GetOpCode());
342     buf->PutU8(offset + RegExpOpCode::OP_SIZE_ONE, captureStart);
343     buf->PutU8(offset + RegExpOpCode::OP_SIZE_TWO, captureEnd);
344     return GetDynChunkfSize(*buf);
345 }
346 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const347 uint32_t SaveResetOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
348 {
349     out << offset << ":\t"
350         << "save_reset\t" << buf.GetU8(offset + RegExpOpCode::OP_SIZE_ONE) << "\t"
351         << buf.GetU8(offset + RegExpOpCode::OP_SIZE_TWO) << std::endl;
352     return offset + GetSize();
353 }
354 
EmitOpCode(DynChunk * buf,uint32_t para) const355 uint32_t MatchOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
356 {
357     buf->EmitChar(GetOpCode());
358     return GetDynChunkfSize(*buf);
359 }
360 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const361 uint32_t MatchOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
362 {
363     out << offset << ":\t"
364         << "match" << std::endl;
365     return offset + GetSize();
366 }
367 
EmitOpCode(DynChunk * buf,uint32_t para) const368 uint32_t MatchEndOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
369 {
370     buf->EmitChar(GetOpCode());
371     return GetDynChunkfSize(*buf);
372 }
373 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const374 uint32_t MatchEndOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
375 {
376     out << offset << ":\t"
377         << "match_end" << std::endl;
378     return offset + GetSize();
379 }
380 
EmitOpCode(DynChunk * buf,uint32_t para) const381 uint32_t LineStartOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
382 {
383     buf->EmitChar(GetOpCode());
384     return GetDynChunkfSize(*buf);
385 }
386 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const387 uint32_t LineStartOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
388 {
389     out << offset << ":\t"
390         << "line_start" << std::endl;
391     return offset + GetSize();
392 }
393 
EmitOpCode(DynChunk * buf,uint32_t para) const394 uint32_t LineEndOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
395 {
396     buf->EmitChar(GetOpCode());
397     return GetDynChunkfSize(*buf);
398 }
399 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const400 uint32_t LineEndOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
401 {
402     out << offset << ":\t"
403         << "line_end" << std::endl;
404     return offset + GetSize();
405 }
406 
EmitOpCode(DynChunk * buf,uint32_t para) const407 uint32_t WordBoundaryOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
408 {
409     buf->EmitChar(GetOpCode());
410     return GetDynChunkfSize(*buf);
411 }
412 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const413 uint32_t WordBoundaryOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
414 {
415     out << offset << ":\t"
416         << "word_boundary" << std::endl;
417     return offset + GetSize();
418 }
419 
EmitOpCode(DynChunk * buf,uint32_t para) const420 uint32_t NotWordBoundaryOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
421 {
422     buf->EmitChar(GetOpCode());
423     return GetDynChunkfSize(*buf);
424 }
425 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const426 uint32_t NotWordBoundaryOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf,
427                                            uint32_t offset) const
428 {
429     out << offset << ":\t"
430         << "not_word_boundary" << std::endl;
431     return offset + GetSize();
432 }
433 
EmitOpCode(DynChunk * buf,uint32_t para) const434 uint32_t AllOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
435 {
436     buf->EmitChar(GetOpCode());
437     return GetDynChunkfSize(*buf);
438 }
439 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const440 uint32_t AllOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
441 {
442     out << offset << ":\t"
443         << "all" << std::endl;
444     return offset + GetSize();
445 }
446 
EmitOpCode(DynChunk * buf,uint32_t para) const447 uint32_t DotsOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
448 {
449     buf->EmitChar(GetOpCode());
450     return GetDynChunkfSize(*buf);
451 }
452 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const453 uint32_t DotsOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
454 {
455     out << offset << ":\t"
456         << "dots" << std::endl;
457     return offset + GetSize();
458 }
459 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const460 uint32_t MatchAheadOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
461 {
462     out << offset << ":\t"
463         << "match_ahead\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl;
464     return offset + GetSize();
465 }
466 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const467 uint32_t RangeOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
468 {
469     out << offset << ":\t"
470         << "range\t";
471     size_t size = buf.GetU16(offset + 1);
472     for (size_t i = 0; i < size; i++) {
473         out << buf.GetU16(offset + RegExpOpCode::OP_SIZE_THREE + (i * RegExpOpCode::OP_SIZE_FOUR)) << "\t"
474             << buf.GetU16(offset + RegExpOpCode::OP_SIZE_THREE +
475                           (i * RegExpOpCode::OP_SIZE_FOUR + RegExpOpCode::OP_SIZE_TWO))
476             << "\t";
477     }
478     out << std::endl;
479     return offset + size * RegExpOpCode::OP_SIZE_FOUR + RegExpOpCode::OP_SIZE_THREE;
480 }
481 
InsertOpCode(DynChunk * buf,const RangeSet & rangeSet) const482 uint32_t RangeOpCode::InsertOpCode(DynChunk *buf, const RangeSet &rangeSet) const
483 {
484     buf->EmitChar(GetOpCode());
485     size_t size = rangeSet.rangeSet_.size();
486     buf->EmitU16(size);
487     for (auto range : rangeSet.rangeSet_) {
488         buf->EmitU16(range.first);
489         buf->EmitU16(range.second);
490     }
491     return GetDynChunkfSize(*buf);
492 }
493 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const494 uint32_t Range32OpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
495 {
496     out << offset << ":\t"
497         << "range32\t";
498     size_t size = buf.GetU16(offset + 1);
499     for (size_t i = 0; i < size; i++) {
500         out << buf.GetU32(offset + RegExpOpCode::OP_SIZE_THREE + (i * RegExpOpCode::OP_SIZE_EIGHT)) << "\t"
501             << buf.GetU32(offset + RegExpOpCode::OP_SIZE_THREE +
502                           (i * RegExpOpCode::OP_SIZE_EIGHT + RegExpOpCode::OP_SIZE_FOUR))
503             << "\t";
504     }
505     out << std::endl;
506     return offset + size * +RegExpOpCode::OP_SIZE_EIGHT + RegExpOpCode::OP_SIZE_THREE;
507 }
508 
InsertOpCode(DynChunk * buf,const RangeSet & rangeSet) const509 uint32_t Range32OpCode::InsertOpCode(DynChunk *buf, const RangeSet &rangeSet) const
510 {
511     buf->EmitChar(GetOpCode());
512     size_t size = rangeSet.rangeSet_.size();
513     buf->EmitU16(size);
514     for (auto range : rangeSet.rangeSet_) {
515         buf->EmitU32(range.first);
516         buf->EmitU32(range.second);
517     }
518     return GetDynChunkfSize(*buf);
519 }
520 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const521 uint32_t SparseOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
522 {
523     out << offset << ":\t"
524         << "sparse\t";
525     size_t size = buf.GetU16(offset + 1);
526     for (size_t i = 0; i < size; i++) {
527         out << static_cast<char>(buf.GetU16(offset + RegExpOpCode::OP_SIZE_THREE + (i * RegExpOpCode::OP_SIZE_SIX)))
528             << "\t" << buf.GetU32(offset + RegExpOpCode::OP_SIZE_THREE +
529                           (i * RegExpOpCode::OP_SIZE_SIX + RegExpOpCode::OP_SIZE_TWO)) +
530                           offset + size * RegExpOpCode::OP_SIZE_SIX + RegExpOpCode::OP_SIZE_THREE
531             << "\t";
532     }
533     out << std::endl;
534     return offset + size * RegExpOpCode::OP_SIZE_SIX + RegExpOpCode::OP_SIZE_THREE;
535 }
536 
InsertOpCode(DynChunk * buf,uint32_t offset,uint32_t para) const537 uint32_t MatchAheadOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const
538 {
539     buf->Insert(offset, GetSize());
540     buf->PutU8(offset, GetOpCode());
541     buf->PutU32(offset + 1, para);
542     return GetDynChunkfSize(*buf);
543 }
544 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const545 uint32_t NegativeMatchAheadOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
546 {
547     out << offset << ":\t"
548         << "negative_match_ahead\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl;
549     return offset + GetSize();
550 }
551 
InsertOpCode(DynChunk * buf,uint32_t offset,uint32_t para) const552 uint32_t NegativeMatchAheadOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const
553 {
554     buf->Insert(offset, GetSize());
555     buf->PutU8(offset, GetOpCode());
556     buf->PutU32(offset + 1, para);
557     return GetDynChunkfSize(*buf);
558 }
559 
EmitOpCode(DynChunk * buf,uint32_t para) const560 uint32_t PrevOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
561 {
562     buf->EmitChar(GetOpCode());
563     return GetDynChunkfSize(*buf);
564 }
565 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const566 uint32_t PrevOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
567 {
568     out << offset << ":\t"
569         << "prev" << std::endl;
570     return offset + GetSize();
571 }
572 
EmitOpCode(DynChunk * buf,uint32_t para) const573 uint32_t BackReferenceOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
574 {
575     auto capture = static_cast<uint8_t>(para & 0xffU);  // NOLINTNEXTLINE(readability-magic-numbers)
576     buf->EmitChar(GetOpCode());
577     buf->EmitChar(capture);
578     return GetDynChunkfSize(*buf);
579 }
580 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const581 uint32_t BackReferenceOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
582 {
583     out << offset << ":\t"
584         << "backreference\t" << buf.GetU8(offset + 1) << std::endl;
585     return offset + GetSize();
586 }
587 
EmitOpCode(DynChunk * buf,uint32_t para) const588 uint32_t BackwardBackReferenceOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
589 {
590     auto capture = static_cast<uint8_t>(para & 0xffU);  // NOLINTNEXTLINE(readability-magic-numbers)
591     buf->EmitChar(GetOpCode());
592     buf->EmitChar(capture);
593     return GetDynChunkfSize(*buf);
594 }
595 
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const596 uint32_t BackwardBackReferenceOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
597 {
598     out << offset << ":\t"
599         << "backward_backreference\t" << buf.GetU8(offset + 1) << std::endl;
600     return offset + GetSize();
601 }
602 
Insert(uint32_t start,uint32_t end)603 void RangeSet::Insert(uint32_t start, uint32_t end)
604 {
605     if (start > end) {
606         return;
607     }
608     std::pair<uint32_t, uint32_t> pairElement = std::make_pair(start, end);
609     if (rangeSet_.empty()) {
610         rangeSet_.emplace_back(pairElement);
611     } else {
612         for (auto iter = rangeSet_.begin(); iter != rangeSet_.end(); iter++) {
613             if (IsIntersect(start, end, iter->first, iter->second) ||
614                 IsAdjacent(start, end, iter->first, iter->second)) {
615                 iter->first = std::min(iter->first, start);
616                 iter->second = std::max(iter->second, end);
617                 Compress();
618                 return;
619             }
620             if (iter->first > end) {
621                 rangeSet_.insert(iter, pairElement);
622                 return;
623             }
624         }
625         rangeSet_.emplace_back(pairElement);
626     }
627 }
628 // if RangeResult cross-intersects with [a, z] and [A, Z],
629 // we capitalize the intersection part and insert into RangeResult.
Inter(RangeSet & cr,const RangeSet & s1)630 void RangeSet::Inter(RangeSet &cr, const RangeSet &s1)
631 {
632     if (s1.rangeSet_.empty()) {
633         rangeSet_.clear();
634         return;
635     }
636     if (rangeSet_.empty()) {
637         return;
638     }
639     for (const auto &interItem : s1.rangeSet_) {
640         uint32_t firstMax = 0;
641         uint32_t secondMin = 0;
642         for (const auto &range : rangeSet_) {
643             if (range.first >= interItem.first) {
644                 firstMax = range.first;
645             } else {
646                 firstMax = interItem.first;
647             }
648             if (range.second >= interItem.second) {
649                 secondMin = interItem.second;
650             } else {
651                 secondMin = range.second;
652             }
653             if (secondMin < firstMax) {
654                 continue;
655             }
656             if (firstMax >= 'a' && firstMax <= 'z') {
657                 cr.Insert(firstMax + 'A' - 'a', secondMin + 'A' - 'a');
658             }
659             if (firstMax >= 'A' && firstMax <= 'Z') {
660                 cr.Insert(firstMax - 'A' + 'a', secondMin - 'A' + 'a');
661             }
662         }
663     }
664 }
Insert(const RangeSet & s1)665 void RangeSet::Insert(const RangeSet &s1)
666 {
667     if (s1.rangeSet_.empty()) {
668         return;
669     }
670     if (rangeSet_.empty()) {
671         rangeSet_ = s1.rangeSet_;
672     } else {
673         for (auto range : s1.rangeSet_) {
674             Insert(range.first, range.second);
675         }
676         Compress();
677     }
678 }
679 
Invert(bool isUtf16)680 void RangeSet::Invert(bool isUtf16)
681 {
682     uint32_t maxValue = isUtf16 ? UINT32_MAX : UINT16_MAX;
683     if (rangeSet_.empty()) {
684         rangeSet_.emplace_back(std::make_pair(0, maxValue));
685         return;
686     }
687 
688     auto iter = rangeSet_.begin();
689     auto iter2 = rangeSet_.begin();
690     if (iter->first == 0 && iter->second == maxValue) {
691         rangeSet_.clear();
692         return;
693     }
694     iter2++;
695 
696     uint32_t first = iter->first;
697 
698     for (iter = rangeSet_.begin(); iter != rangeSet_.end(); iter++) {
699         if (iter->second == maxValue) {
700             rangeSet_.erase(iter);
701             break;
702         }
703         iter->first = iter->second + 1;
704         if (iter2 != rangeSet_.end()) {
705             iter->second = iter2->first - 1;
706             iter2++;
707         } else {
708             iter->second = maxValue;
709         }
710     }
711     if (first > 0) {
712         std::pair<uint32_t, uint32_t> pair1 = std::make_pair(0, first - 1);
713         rangeSet_.push_front(pair1);
714     }
715     Compress();
716 }
717 
Compress()718 void RangeSet::Compress()
719 {
720     auto iter = rangeSet_.begin();
721     auto iter2 = rangeSet_.begin();
722     iter2++;
723     while (iter2 != rangeSet_.end()) {
724         if (IsIntersect(iter->first, iter->second, iter2->first, iter2->second) ||
725             IsAdjacent(iter->first, iter->second, iter2->first, iter2->second)) {
726             iter->first = std::min(iter->first, iter2->first);
727             iter->second = std::max(iter->second, iter2->second);
728             iter2 = rangeSet_.erase(iter2);
729         } else {
730             iter++;
731             iter2++;
732         }
733     }
734 }
735 }  // namespace panda::ecmascript
736