1 /*
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "ecmascript/regexp/regexp_opcode.h"
17
18 #include "ecmascript/regexp/regexp_executor.h"
19
20 namespace panda::ecmascript {
21 using CaptureState = RegExpExecutor::CaptureState;
22
23 static SaveStartOpCode g_saveStartOpcode = SaveStartOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
24 static SaveEndOpCode g_saveEndOpcode = SaveEndOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
25 static CharOpCode g_charOpcode = CharOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
26 static GotoOpCode g_gotoOpcode = GotoOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
27 static SplitNextOpCode g_splitNextOpcode = SplitNextOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
28 static SplitFirstOpCode g_splitFirstOpcode =
29 SplitFirstOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
30 static MatchOpCode g_matchOpcode = MatchOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
31 static LoopOpCode g_loopOpcode = LoopOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
32 static LoopGreedyOpCode g_loopGreedyOpcode =
33 LoopGreedyOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
34 static PushCharOpCode g_pushCharOpcode = PushCharOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
35 static CheckCharOpCode g_checkCharOpcode = CheckCharOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
36 static PushOpCode g_pushOpcode = PushOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
37 static PopOpCode g_popOpcode = PopOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
38 static SaveResetOpCode g_saveResetOpcode = SaveResetOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
39 static LineStartOpCode g_lineStartOpcode = LineStartOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
40 static LineEndOpCode g_lineEndOpcode = LineEndOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
41 static WordBoundaryOpCode g_wordBoundaryOpcode =
42 WordBoundaryOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
43 static NotWordBoundaryOpCode g_notWordBoundaryOpcode =
44 NotWordBoundaryOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
45 static AllOpCode g_allOpcode = AllOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
46 static DotsOpCode g_dotsOpcode = DotsOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
47 static MatchAheadOpCode g_matchAheadOpcode =
48 MatchAheadOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
49 static NegativeMatchAheadOpCode g_negativeMatchAheadOpcode =
50 NegativeMatchAheadOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
51 static MatchEndOpCode g_matchEndOpcode = MatchEndOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
52 static PrevOpCode g_prevOpcode = PrevOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
53 static RangeOpCode g_rangeOpcode = RangeOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
54 static BackReferenceOpCode g_backreferenceOpcode =
55 BackReferenceOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
56 static BackwardBackReferenceOpCode g_backwardBackreferenceOpcode =
57 BackwardBackReferenceOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
58 static Char32OpCode g_char32Opcode = Char32OpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
59 static Range32OpCode g_range32Opcode = Range32OpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
60 static SparseOpCode g_sparseOpcode = SparseOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
61 // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
62 static std::vector<RegExpOpCode *> g_intrinsicSet = {
63 &g_saveStartOpcode,
64 &g_saveEndOpcode,
65 &g_charOpcode,
66 &g_gotoOpcode,
67 &g_splitFirstOpcode,
68 &g_splitNextOpcode,
69 &g_negativeMatchAheadOpcode,
70 &g_matchAheadOpcode,
71 &g_matchOpcode,
72 &g_loopOpcode,
73 &g_loopGreedyOpcode,
74 &g_pushCharOpcode,
75 &g_checkCharOpcode,
76 &g_pushOpcode,
77 &g_popOpcode,
78 &g_saveResetOpcode,
79 &g_lineStartOpcode,
80 &g_lineEndOpcode,
81 &g_wordBoundaryOpcode,
82 &g_notWordBoundaryOpcode,
83 &g_allOpcode,
84 &g_dotsOpcode,
85 &g_matchEndOpcode,
86 &g_prevOpcode,
87 &g_rangeOpcode,
88 &g_backreferenceOpcode,
89 &g_backwardBackreferenceOpcode,
90 &g_char32Opcode,
91 &g_range32Opcode,
92 &g_sparseOpcode,
93 };
94
RegExpOpCode(uint8_t opCode,int size)95 RegExpOpCode::RegExpOpCode(uint8_t opCode, int size) : opCode_(opCode), size_(size) {}
96
97 /* static */
GetRegExpOpCode(const DynChunk & buf,int pc)98 RegExpOpCode *RegExpOpCode::GetRegExpOpCode(const DynChunk &buf, int pc)
99 {
100 uint8_t opCode = buf.GetU8(pc);
101 ASSERT_PRINT(opCode <= g_intrinsicSet.size(), "invalid op code");
102 return g_intrinsicSet.at(opCode);
103 }
104
105 /* static */
GetRegExpOpCode(uint8_t opCode)106 RegExpOpCode *RegExpOpCode::GetRegExpOpCode(uint8_t opCode)
107 {
108 ASSERT_PRINT(opCode <= g_intrinsicSet.size(), "invalid op code");
109 return g_intrinsicSet.at(opCode);
110 }
111
112 /* static */
DumpRegExpOpCode(std::ostream & out,const DynChunk & buf,uint32_t size)113 void RegExpOpCode::DumpRegExpOpCode(std::ostream &out, const DynChunk &buf, uint32_t size)
114 {
115 out << "OpCode:\t" << std::endl;
116 uint32_t pc = RegExpParser::OP_START_OFFSET;
117 do {
118 RegExpOpCode *byteCode = GetRegExpOpCode(buf, pc);
119 pc = byteCode->DumpOpCode(out, buf, pc);
120 } while (pc < size);
121 }
122
EmitOpCode(DynChunk * buf,uint32_t para) const123 uint32_t SaveStartOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
124 {
125 auto capture = static_cast<uint8_t>(para & 0xffU); // NOLINTNEXTLINE(readability-magic-numbers)
126 buf->EmitChar(GetOpCode());
127 buf->EmitChar(capture);
128 return GetDynChunkfSize(*buf);
129 }
130
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const131 uint32_t SaveStartOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
132 {
133 out << offset << ":\t"
134 << "save_start\t" << buf.GetU8(offset + 1) << std::endl;
135 return offset + GetSize();
136 }
137
EmitOpCode(DynChunk * buf,uint32_t para) const138 uint32_t SaveEndOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
139 {
140 auto capture = static_cast<uint8_t>(para & 0xffU); // NOLINTNEXTLINE(readability-magic-numbers)
141 buf->EmitChar(GetOpCode());
142 buf->EmitChar(capture);
143 return GetDynChunkfSize(*buf);
144 }
145
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const146 uint32_t SaveEndOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
147 {
148 out << offset << ":\t"
149 << "save_end\t" << buf.GetU8(offset + 1) << std::endl;
150 return offset + GetSize();
151 }
152
EmitOpCode(DynChunk * buf,uint32_t para) const153 uint32_t CharOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
154 {
155 auto paraChar = static_cast<uint16_t>(para & 0xffffU); // NOLINTNEXTLINE(readability-magic-numbers)
156 buf->EmitChar(GetOpCode());
157 buf->EmitU16(paraChar);
158 return GetDynChunkfSize(*buf);
159 }
160
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const161 uint32_t CharOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
162 {
163 out << offset << ":\t"
164 << "char\t" << static_cast<char>(buf.GetU16(offset + 1)) << std::endl;
165 return offset + GetSize();
166 }
167
EmitOpCode(DynChunk * buf,uint32_t para) const168 uint32_t Char32OpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
169 {
170 buf->EmitChar(GetOpCode());
171 buf->EmitU32(para);
172 return GetDynChunkfSize(*buf);
173 }
174
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const175 uint32_t Char32OpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
176 {
177 out << offset << ":\t"
178 << "char32\t" << static_cast<char>(buf.GetU32(offset + 1)) << std::endl;
179 return offset + GetSize();
180 }
181
EmitOpCode(DynChunk * buf,uint32_t para) const182 uint32_t GotoOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
183 {
184 buf->EmitChar(GetOpCode());
185 buf->EmitU32(para);
186 return GetDynChunkfSize(*buf);
187 }
188
UpdateOpPara(DynChunk * buf,uint32_t offset,uint32_t para) const189 void GotoOpCode::UpdateOpPara(DynChunk *buf, uint32_t offset, uint32_t para) const
190 {
191 buf->PutU32(offset + 1, para);
192 }
193
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const194 uint32_t GotoOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
195 {
196 out << offset << ":\t"
197 << "goto\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl;
198 return offset + GetSize();
199 }
200
InsertOpCode(DynChunk * buf,uint32_t offset,uint32_t para) const201 uint32_t SplitNextOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const
202 {
203 buf->Insert(offset, GetSize());
204 buf->PutU8(offset, GetOpCode());
205 buf->PutU32(offset + 1, para);
206 return GetDynChunkfSize(*buf);
207 }
208
EmitOpCode(DynChunk * buf,uint32_t para) const209 uint32_t SplitNextOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
210 {
211 buf->EmitChar(GetOpCode());
212 buf->EmitU32(para);
213 return GetDynChunkfSize(*buf);
214 }
215
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const216 uint32_t SplitNextOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
217 {
218 out << offset << ":\t"
219 << "split_next\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl;
220 return offset + GetSize();
221 }
222
InsertOpCode(DynChunk * buf,uint32_t offset,uint32_t para) const223 uint32_t SplitFirstOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const
224 {
225 buf->Insert(offset, GetSize());
226 buf->PutU8(offset, GetOpCode());
227 buf->PutU32(offset + 1, para);
228 return GetDynChunkfSize(*buf);
229 }
230
EmitOpCode(DynChunk * buf,uint32_t para) const231 uint32_t SplitFirstOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
232 {
233 buf->EmitChar(GetOpCode());
234 buf->EmitU32(para);
235 return GetDynChunkfSize(*buf);
236 }
237
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const238 uint32_t SplitFirstOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
239 {
240 out << offset << ":\t"
241 << "split_first\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl;
242 return offset + GetSize();
243 }
244
EmitOpCode(DynChunk * buf,uint32_t start,uint32_t min,uint32_t max) const245 uint32_t LoopOpCode::EmitOpCode(DynChunk *buf, uint32_t start, uint32_t min, uint32_t max) const
246 {
247 buf->EmitChar(GetOpCode());
248 buf->EmitU32(start);
249 buf->EmitU32(min);
250 buf->EmitU32(max);
251 return GetDynChunkfSize(*buf);
252 }
253
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const254 uint32_t LoopOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
255 {
256 out << offset << ":\t"
257 << "loop\t" << buf.GetU32(offset + 1) + offset + GetSize() << "\t"
258 << buf.GetU32(offset + RegExpOpCode::OP_SIZE_FIVE) << "\t" << buf.GetU32(offset + RegExpOpCode::OP_SIZE_NINE)
259 << std::endl;
260 return offset + GetSize();
261 }
262
EmitOpCode(DynChunk * buf,uint32_t start,uint32_t min,uint32_t max) const263 uint32_t LoopGreedyOpCode::EmitOpCode(DynChunk *buf, uint32_t start, uint32_t min, uint32_t max) const
264 {
265 buf->EmitChar(GetOpCode());
266 buf->EmitU32(start);
267 buf->EmitU32(min);
268 buf->EmitU32(max);
269 return GetDynChunkfSize(*buf);
270 }
271
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const272 uint32_t LoopGreedyOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
273 {
274 out << offset << ":\t"
275 << "greedy_loop\t" << buf.GetU32(offset + 1) + offset + GetSize() << "\t"
276 << buf.GetU32(offset + RegExpOpCode::OP_SIZE_FIVE) << "\t" << buf.GetU32(offset + RegExpOpCode::OP_SIZE_NINE)
277 << std::endl;
278 return offset + GetSize();
279 }
280
InsertOpCode(DynChunk * buf,uint32_t offset) const281 uint32_t PushCharOpCode::InsertOpCode(DynChunk *buf, uint32_t offset) const
282 {
283 buf->Insert(offset, GetSize());
284 buf->PutU8(offset, GetOpCode());
285 return GetDynChunkfSize(*buf);
286 }
287
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const288 uint32_t PushCharOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
289 {
290 out << offset << ":\t"
291 << "push_char" << std::endl;
292 return offset + GetSize();
293 }
294
InsertOpCode(DynChunk * buf,uint32_t offset) const295 uint32_t PushOpCode::InsertOpCode(DynChunk *buf, uint32_t offset) const
296 {
297 buf->Insert(offset, GetSize());
298 buf->PutU8(offset, GetOpCode());
299 return GetDynChunkfSize(*buf);
300 }
301
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const302 uint32_t PushOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
303 {
304 out << offset << ":\t"
305 << "push" << std::endl;
306 return offset + GetSize();
307 }
308
EmitOpCode(DynChunk * buf) const309 uint32_t PopOpCode::EmitOpCode(DynChunk *buf) const
310 {
311 buf->EmitChar(GetOpCode());
312 return GetDynChunkfSize(*buf);
313 }
314
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const315 uint32_t PopOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
316 {
317 out << offset << ":\t"
318 << "pop" << std::endl;
319 return offset + GetSize();
320 }
321
EmitOpCode(DynChunk * buf,uint32_t offset) const322 uint32_t CheckCharOpCode::EmitOpCode(DynChunk *buf, uint32_t offset) const
323 {
324 buf->EmitChar(GetOpCode());
325 buf->EmitU32(offset);
326 return GetDynChunkfSize(*buf);
327 }
328
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const329 uint32_t CheckCharOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
330 {
331 out << offset << ":\t"
332 << "check_char\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl;
333 return offset + GetSize();
334 }
335
InsertOpCode(DynChunk * buf,uint32_t offset,uint32_t start,uint32_t end) const336 uint32_t SaveResetOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t start, uint32_t end) const
337 {
338 auto captureStart = static_cast<uint8_t>(start & 0xffU); // NOLINTNEXTLINE(readability-magic-numbers)
339 auto captureEnd = static_cast<uint8_t>(end & 0xffU); // NOLINTNEXTLINE(readability-magic-numbers)
340 buf->Insert(offset, GetSize());
341 buf->PutU8(offset, GetOpCode());
342 buf->PutU8(offset + RegExpOpCode::OP_SIZE_ONE, captureStart);
343 buf->PutU8(offset + RegExpOpCode::OP_SIZE_TWO, captureEnd);
344 return GetDynChunkfSize(*buf);
345 }
346
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const347 uint32_t SaveResetOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
348 {
349 out << offset << ":\t"
350 << "save_reset\t" << buf.GetU8(offset + RegExpOpCode::OP_SIZE_ONE) << "\t"
351 << buf.GetU8(offset + RegExpOpCode::OP_SIZE_TWO) << std::endl;
352 return offset + GetSize();
353 }
354
EmitOpCode(DynChunk * buf,uint32_t para) const355 uint32_t MatchOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
356 {
357 buf->EmitChar(GetOpCode());
358 return GetDynChunkfSize(*buf);
359 }
360
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const361 uint32_t MatchOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
362 {
363 out << offset << ":\t"
364 << "match" << std::endl;
365 return offset + GetSize();
366 }
367
EmitOpCode(DynChunk * buf,uint32_t para) const368 uint32_t MatchEndOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
369 {
370 buf->EmitChar(GetOpCode());
371 return GetDynChunkfSize(*buf);
372 }
373
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const374 uint32_t MatchEndOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
375 {
376 out << offset << ":\t"
377 << "match_end" << std::endl;
378 return offset + GetSize();
379 }
380
EmitOpCode(DynChunk * buf,uint32_t para) const381 uint32_t LineStartOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
382 {
383 buf->EmitChar(GetOpCode());
384 return GetDynChunkfSize(*buf);
385 }
386
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const387 uint32_t LineStartOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
388 {
389 out << offset << ":\t"
390 << "line_start" << std::endl;
391 return offset + GetSize();
392 }
393
EmitOpCode(DynChunk * buf,uint32_t para) const394 uint32_t LineEndOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
395 {
396 buf->EmitChar(GetOpCode());
397 return GetDynChunkfSize(*buf);
398 }
399
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const400 uint32_t LineEndOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
401 {
402 out << offset << ":\t"
403 << "line_end" << std::endl;
404 return offset + GetSize();
405 }
406
EmitOpCode(DynChunk * buf,uint32_t para) const407 uint32_t WordBoundaryOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
408 {
409 buf->EmitChar(GetOpCode());
410 return GetDynChunkfSize(*buf);
411 }
412
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const413 uint32_t WordBoundaryOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
414 {
415 out << offset << ":\t"
416 << "word_boundary" << std::endl;
417 return offset + GetSize();
418 }
419
EmitOpCode(DynChunk * buf,uint32_t para) const420 uint32_t NotWordBoundaryOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
421 {
422 buf->EmitChar(GetOpCode());
423 return GetDynChunkfSize(*buf);
424 }
425
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const426 uint32_t NotWordBoundaryOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf,
427 uint32_t offset) const
428 {
429 out << offset << ":\t"
430 << "not_word_boundary" << std::endl;
431 return offset + GetSize();
432 }
433
EmitOpCode(DynChunk * buf,uint32_t para) const434 uint32_t AllOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
435 {
436 buf->EmitChar(GetOpCode());
437 return GetDynChunkfSize(*buf);
438 }
439
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const440 uint32_t AllOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
441 {
442 out << offset << ":\t"
443 << "all" << std::endl;
444 return offset + GetSize();
445 }
446
EmitOpCode(DynChunk * buf,uint32_t para) const447 uint32_t DotsOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
448 {
449 buf->EmitChar(GetOpCode());
450 return GetDynChunkfSize(*buf);
451 }
452
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const453 uint32_t DotsOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
454 {
455 out << offset << ":\t"
456 << "dots" << std::endl;
457 return offset + GetSize();
458 }
459
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const460 uint32_t MatchAheadOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
461 {
462 out << offset << ":\t"
463 << "match_ahead\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl;
464 return offset + GetSize();
465 }
466
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const467 uint32_t RangeOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
468 {
469 out << offset << ":\t"
470 << "range\t";
471 size_t size = buf.GetU16(offset + 1);
472 for (size_t i = 0; i < size; i++) {
473 out << buf.GetU16(offset + RegExpOpCode::OP_SIZE_THREE + (i * RegExpOpCode::OP_SIZE_FOUR)) << "\t"
474 << buf.GetU16(offset + RegExpOpCode::OP_SIZE_THREE +
475 (i * RegExpOpCode::OP_SIZE_FOUR + RegExpOpCode::OP_SIZE_TWO))
476 << "\t";
477 }
478 out << std::endl;
479 return offset + size * RegExpOpCode::OP_SIZE_FOUR + RegExpOpCode::OP_SIZE_THREE;
480 }
481
InsertOpCode(DynChunk * buf,const RangeSet & rangeSet) const482 uint32_t RangeOpCode::InsertOpCode(DynChunk *buf, const RangeSet &rangeSet) const
483 {
484 buf->EmitChar(GetOpCode());
485 size_t size = rangeSet.rangeSet_.size();
486 buf->EmitU16(size);
487 for (auto range : rangeSet.rangeSet_) {
488 buf->EmitU16(range.first);
489 buf->EmitU16(range.second);
490 }
491 return GetDynChunkfSize(*buf);
492 }
493
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const494 uint32_t Range32OpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
495 {
496 out << offset << ":\t"
497 << "range32\t";
498 size_t size = buf.GetU16(offset + 1);
499 for (size_t i = 0; i < size; i++) {
500 out << buf.GetU32(offset + RegExpOpCode::OP_SIZE_THREE + (i * RegExpOpCode::OP_SIZE_EIGHT)) << "\t"
501 << buf.GetU32(offset + RegExpOpCode::OP_SIZE_THREE +
502 (i * RegExpOpCode::OP_SIZE_EIGHT + RegExpOpCode::OP_SIZE_FOUR))
503 << "\t";
504 }
505 out << std::endl;
506 return offset + size * +RegExpOpCode::OP_SIZE_EIGHT + RegExpOpCode::OP_SIZE_THREE;
507 }
508
InsertOpCode(DynChunk * buf,const RangeSet & rangeSet) const509 uint32_t Range32OpCode::InsertOpCode(DynChunk *buf, const RangeSet &rangeSet) const
510 {
511 buf->EmitChar(GetOpCode());
512 size_t size = rangeSet.rangeSet_.size();
513 buf->EmitU16(size);
514 for (auto range : rangeSet.rangeSet_) {
515 buf->EmitU32(range.first);
516 buf->EmitU32(range.second);
517 }
518 return GetDynChunkfSize(*buf);
519 }
520
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const521 uint32_t SparseOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
522 {
523 out << offset << ":\t"
524 << "sparse\t";
525 size_t size = buf.GetU16(offset + 1);
526 for (size_t i = 0; i < size; i++) {
527 out << static_cast<char>(buf.GetU16(offset + RegExpOpCode::OP_SIZE_THREE + (i * RegExpOpCode::OP_SIZE_SIX)))
528 << "\t" << buf.GetU32(offset + RegExpOpCode::OP_SIZE_THREE +
529 (i * RegExpOpCode::OP_SIZE_SIX + RegExpOpCode::OP_SIZE_TWO)) +
530 offset + size * RegExpOpCode::OP_SIZE_SIX + RegExpOpCode::OP_SIZE_THREE
531 << "\t";
532 }
533 out << std::endl;
534 return offset + size * RegExpOpCode::OP_SIZE_SIX + RegExpOpCode::OP_SIZE_THREE;
535 }
536
InsertOpCode(DynChunk * buf,uint32_t offset,uint32_t para) const537 uint32_t MatchAheadOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const
538 {
539 buf->Insert(offset, GetSize());
540 buf->PutU8(offset, GetOpCode());
541 buf->PutU32(offset + 1, para);
542 return GetDynChunkfSize(*buf);
543 }
544
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const545 uint32_t NegativeMatchAheadOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
546 {
547 out << offset << ":\t"
548 << "negative_match_ahead\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl;
549 return offset + GetSize();
550 }
551
InsertOpCode(DynChunk * buf,uint32_t offset,uint32_t para) const552 uint32_t NegativeMatchAheadOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const
553 {
554 buf->Insert(offset, GetSize());
555 buf->PutU8(offset, GetOpCode());
556 buf->PutU32(offset + 1, para);
557 return GetDynChunkfSize(*buf);
558 }
559
EmitOpCode(DynChunk * buf,uint32_t para) const560 uint32_t PrevOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
561 {
562 buf->EmitChar(GetOpCode());
563 return GetDynChunkfSize(*buf);
564 }
565
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const566 uint32_t PrevOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
567 {
568 out << offset << ":\t"
569 << "prev" << std::endl;
570 return offset + GetSize();
571 }
572
EmitOpCode(DynChunk * buf,uint32_t para) const573 uint32_t BackReferenceOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
574 {
575 auto capture = static_cast<uint8_t>(para & 0xffU); // NOLINTNEXTLINE(readability-magic-numbers)
576 buf->EmitChar(GetOpCode());
577 buf->EmitChar(capture);
578 return GetDynChunkfSize(*buf);
579 }
580
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const581 uint32_t BackReferenceOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
582 {
583 out << offset << ":\t"
584 << "backreference\t" << buf.GetU8(offset + 1) << std::endl;
585 return offset + GetSize();
586 }
587
EmitOpCode(DynChunk * buf,uint32_t para) const588 uint32_t BackwardBackReferenceOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
589 {
590 auto capture = static_cast<uint8_t>(para & 0xffU); // NOLINTNEXTLINE(readability-magic-numbers)
591 buf->EmitChar(GetOpCode());
592 buf->EmitChar(capture);
593 return GetDynChunkfSize(*buf);
594 }
595
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const596 uint32_t BackwardBackReferenceOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
597 {
598 out << offset << ":\t"
599 << "backward_backreference\t" << buf.GetU8(offset + 1) << std::endl;
600 return offset + GetSize();
601 }
602
Insert(uint32_t start,uint32_t end)603 void RangeSet::Insert(uint32_t start, uint32_t end)
604 {
605 if (start > end) {
606 return;
607 }
608 std::pair<uint32_t, uint32_t> pairElement = std::make_pair(start, end);
609 if (rangeSet_.empty()) {
610 rangeSet_.emplace_back(pairElement);
611 } else {
612 for (auto iter = rangeSet_.begin(); iter != rangeSet_.end(); iter++) {
613 if (IsIntersect(start, end, iter->first, iter->second) ||
614 IsAdjacent(start, end, iter->first, iter->second)) {
615 iter->first = std::min(iter->first, start);
616 iter->second = std::max(iter->second, end);
617 Compress();
618 return;
619 }
620 if (iter->first > end) {
621 rangeSet_.insert(iter, pairElement);
622 return;
623 }
624 }
625 rangeSet_.emplace_back(pairElement);
626 }
627 }
628 // if RangeResult cross-intersects with [a, z] and [A, Z],
629 // we capitalize the intersection part and insert into RangeResult.
Inter(RangeSet & cr,const RangeSet & s1)630 void RangeSet::Inter(RangeSet &cr, const RangeSet &s1)
631 {
632 if (s1.rangeSet_.empty()) {
633 rangeSet_.clear();
634 return;
635 }
636 if (rangeSet_.empty()) {
637 return;
638 }
639 for (const auto &interItem : s1.rangeSet_) {
640 uint32_t firstMax = 0;
641 uint32_t secondMin = 0;
642 for (const auto &range : rangeSet_) {
643 if (range.first >= interItem.first) {
644 firstMax = range.first;
645 } else {
646 firstMax = interItem.first;
647 }
648 if (range.second >= interItem.second) {
649 secondMin = interItem.second;
650 } else {
651 secondMin = range.second;
652 }
653 if (secondMin < firstMax) {
654 continue;
655 }
656 if (firstMax >= 'a' && firstMax <= 'z') {
657 cr.Insert(firstMax + 'A' - 'a', secondMin + 'A' - 'a');
658 }
659 if (firstMax >= 'A' && firstMax <= 'Z') {
660 cr.Insert(firstMax - 'A' + 'a', secondMin - 'A' + 'a');
661 }
662 }
663 }
664 }
Insert(const RangeSet & s1)665 void RangeSet::Insert(const RangeSet &s1)
666 {
667 if (s1.rangeSet_.empty()) {
668 return;
669 }
670 if (rangeSet_.empty()) {
671 rangeSet_ = s1.rangeSet_;
672 } else {
673 for (auto range : s1.rangeSet_) {
674 Insert(range.first, range.second);
675 }
676 Compress();
677 }
678 }
679
Invert(bool isUtf16)680 void RangeSet::Invert(bool isUtf16)
681 {
682 uint32_t maxValue = isUtf16 ? UINT32_MAX : UINT16_MAX;
683 if (rangeSet_.empty()) {
684 rangeSet_.emplace_back(std::make_pair(0, maxValue));
685 return;
686 }
687
688 auto iter = rangeSet_.begin();
689 auto iter2 = rangeSet_.begin();
690 if (iter->first == 0 && iter->second == maxValue) {
691 rangeSet_.clear();
692 return;
693 }
694 iter2++;
695
696 uint32_t first = iter->first;
697
698 for (iter = rangeSet_.begin(); iter != rangeSet_.end(); iter++) {
699 if (iter->second == maxValue) {
700 rangeSet_.erase(iter);
701 break;
702 }
703 iter->first = iter->second + 1;
704 if (iter2 != rangeSet_.end()) {
705 iter->second = iter2->first - 1;
706 iter2++;
707 } else {
708 iter->second = maxValue;
709 }
710 }
711 if (first > 0) {
712 std::pair<uint32_t, uint32_t> pair1 = std::make_pair(0, first - 1);
713 rangeSet_.push_front(pair1);
714 }
715 Compress();
716 }
717
Compress()718 void RangeSet::Compress()
719 {
720 auto iter = rangeSet_.begin();
721 auto iter2 = rangeSet_.begin();
722 iter2++;
723 while (iter2 != rangeSet_.end()) {
724 if (IsIntersect(iter->first, iter->second, iter2->first, iter2->second) ||
725 IsAdjacent(iter->first, iter->second, iter2->first, iter2->second)) {
726 iter->first = std::min(iter->first, iter2->first);
727 iter->second = std::max(iter->second, iter2->second);
728 iter2 = rangeSet_.erase(iter2);
729 } else {
730 iter++;
731 iter2++;
732 }
733 }
734 }
735 } // namespace panda::ecmascript
736