1 /*
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "ecmascript/regexp/regexp_opcode.h"
17
18 #include "ecmascript/regexp/regexp_executor.h"
19
20 namespace panda::ecmascript {
21 using CaptureState = RegExpExecutor::CaptureState;
22
23 static SaveStartOpCode g_saveStartOpcode = SaveStartOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
24 static SaveEndOpCode g_saveEndOpcode = SaveEndOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
25 static CharOpCode g_charOpcode = CharOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
26 static GotoOpCode g_gotoOpcode = GotoOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
27 static SplitNextOpCode g_splitNextOpcode = SplitNextOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
28 static SplitFirstOpCode g_splitFirstOpcode =
29 SplitFirstOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
30 static MatchOpCode g_matchOpcode = MatchOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
31 static LoopOpCode g_loopOpcode = LoopOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
32 static LoopGreedyOpCode g_loopGreedyOpcode =
33 LoopGreedyOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
34 static PushCharOpCode g_pushCharOpcode = PushCharOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
35 static CheckCharOpCode g_checkCharOpcode = CheckCharOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
36 static PushOpCode g_pushOpcode = PushOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
37 static PopOpCode g_popOpcode = PopOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
38 static SaveResetOpCode g_saveResetOpcode = SaveResetOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
39 static LineStartOpCode g_lineStartOpcode = LineStartOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
40 static LineEndOpCode g_lineEndOpcode = LineEndOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
41 static WordBoundaryOpCode g_wordBoundaryOpcode =
42 WordBoundaryOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
43 static NotWordBoundaryOpCode g_notWordBoundaryOpcode =
44 NotWordBoundaryOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
45 static AllOpCode g_allOpcode = AllOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
46 static DotsOpCode g_dotsOpcode = DotsOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
47 static MatchAheadOpCode g_matchAheadOpcode =
48 MatchAheadOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
49 static NegativeMatchAheadOpCode g_negativeMatchAheadOpcode =
50 NegativeMatchAheadOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
51 static MatchEndOpCode g_matchEndOpcode = MatchEndOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
52 static PrevOpCode g_prevOpcode = PrevOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
53 static RangeOpCode g_rangeOpcode = RangeOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
54 static BackReferenceOpCode g_backreferenceOpcode =
55 BackReferenceOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
56 static BackwardBackReferenceOpCode g_backwardBackreferenceOpcode =
57 BackwardBackReferenceOpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
58 static Char32OpCode g_char32Opcode = Char32OpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
59 static Range32OpCode g_range32Opcode = Range32OpCode(); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
60 // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
61 static std::vector<RegExpOpCode *> g_intrinsicSet = {
62 &g_saveStartOpcode,
63 &g_saveEndOpcode,
64 &g_charOpcode,
65 &g_gotoOpcode,
66 &g_splitFirstOpcode,
67 &g_splitNextOpcode,
68 &g_matchAheadOpcode,
69 &g_negativeMatchAheadOpcode,
70 &g_matchOpcode,
71 &g_loopOpcode,
72 &g_loopGreedyOpcode,
73 &g_pushCharOpcode,
74 &g_checkCharOpcode,
75 &g_pushOpcode,
76 &g_popOpcode,
77 &g_saveResetOpcode,
78 &g_lineStartOpcode,
79 &g_lineEndOpcode,
80 &g_wordBoundaryOpcode,
81 &g_notWordBoundaryOpcode,
82 &g_allOpcode,
83 &g_dotsOpcode,
84 &g_matchEndOpcode,
85 &g_prevOpcode,
86 &g_rangeOpcode,
87 &g_backreferenceOpcode,
88 &g_backwardBackreferenceOpcode,
89 &g_char32Opcode,
90 &g_range32Opcode,
91 };
92
RegExpOpCode(uint8_t opCode,int size)93 RegExpOpCode::RegExpOpCode(uint8_t opCode, int size) : opCode_(opCode), size_(size) {}
94
95 /* static */
GetRegExpOpCode(const DynChunk & buf,int pc)96 RegExpOpCode *RegExpOpCode::GetRegExpOpCode(const DynChunk &buf, int pc)
97 {
98 uint8_t opCode = buf.GetU8(pc);
99 ASSERT_PRINT(opCode <= g_intrinsicSet.size(), "invalid op code");
100 return g_intrinsicSet.at(opCode);
101 }
102
103 /* static */
GetRegExpOpCode(uint8_t opCode)104 RegExpOpCode *RegExpOpCode::GetRegExpOpCode(uint8_t opCode)
105 {
106 ASSERT_PRINT(opCode <= g_intrinsicSet.size(), "invalid op code");
107 return g_intrinsicSet.at(opCode);
108 }
109
110 /* static */
DumpRegExpOpCode(std::ostream & out,const DynChunk & buf)111 void RegExpOpCode::DumpRegExpOpCode(std::ostream &out, const DynChunk &buf)
112 {
113 out << "OpCode:\t" << std::endl;
114 uint32_t pc = RegExpParser::OP_START_OFFSET;
115 do {
116 RegExpOpCode *byteCode = GetRegExpOpCode(buf, pc);
117 pc = byteCode->DumpOpCode(out, buf, pc);
118 } while (pc < buf.size_);
119 }
120
EmitOpCode(DynChunk * buf,uint32_t para) const121 uint32_t SaveStartOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
122 {
123 auto capture = static_cast<uint8_t>(para & 0xffU); // NOLINTNEXTLINE(readability-magic-numbers)
124 buf->EmitChar(GetOpCode());
125 buf->EmitChar(capture);
126 return GetDynChunkfSize(*buf);
127 }
128
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const129 uint32_t SaveStartOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
130 {
131 out << offset << ":\t"
132 << "save_start\t" << buf.GetU8(offset + 1) << std::endl;
133 return offset + GetSize();
134 }
135
EmitOpCode(DynChunk * buf,uint32_t para) const136 uint32_t SaveEndOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
137 {
138 auto capture = static_cast<uint8_t>(para & 0xffU); // NOLINTNEXTLINE(readability-magic-numbers)
139 buf->EmitChar(GetOpCode());
140 buf->EmitChar(capture);
141 return GetDynChunkfSize(*buf);
142 }
143
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const144 uint32_t SaveEndOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
145 {
146 out << offset << ":\t"
147 << "save_end\t" << buf.GetU8(offset + 1) << std::endl;
148 return offset + GetSize();
149 }
150
EmitOpCode(DynChunk * buf,uint32_t para) const151 uint32_t CharOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
152 {
153 auto paraChar = static_cast<uint16_t>(para & 0xffffU); // NOLINTNEXTLINE(readability-magic-numbers)
154 buf->EmitChar(GetOpCode());
155 buf->EmitU16(paraChar);
156 return GetDynChunkfSize(*buf);
157 }
158
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const159 uint32_t CharOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
160 {
161 out << offset << ":\t"
162 << "char\t" << static_cast<char>(buf.GetU16(offset + 1)) << std::endl;
163 return offset + GetSize();
164 }
165
EmitOpCode(DynChunk * buf,uint32_t para) const166 uint32_t Char32OpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
167 {
168 buf->EmitChar(GetOpCode());
169 buf->EmitU32(para);
170 return GetDynChunkfSize(*buf);
171 }
172
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const173 uint32_t Char32OpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
174 {
175 out << offset << ":\t"
176 << "char32\t" << static_cast<char>(buf.GetU32(offset + 1)) << std::endl;
177 return offset + GetSize();
178 }
179
EmitOpCode(DynChunk * buf,uint32_t para) const180 uint32_t GotoOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
181 {
182 buf->EmitChar(GetOpCode());
183 buf->EmitU32(para);
184 return GetDynChunkfSize(*buf);
185 }
186
UpdateOpPara(DynChunk * buf,uint32_t offset,uint32_t para) const187 void GotoOpCode::UpdateOpPara(DynChunk *buf, uint32_t offset, uint32_t para) const
188 {
189 buf->PutU32(offset + 1, para);
190 }
191
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const192 uint32_t GotoOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
193 {
194 out << offset << ":\t"
195 << "goto\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl;
196 return offset + GetSize();
197 }
198
InsertOpCode(DynChunk * buf,uint32_t offset,uint32_t para) const199 uint32_t SplitNextOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const
200 {
201 buf->Insert(offset, GetSize());
202 buf->PutU8(offset, GetOpCode());
203 buf->PutU32(offset + 1, para);
204 return GetDynChunkfSize(*buf);
205 }
206
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const207 uint32_t SplitNextOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
208 {
209 out << offset << ":\t"
210 << "split_next\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl;
211 return offset + GetSize();
212 }
213
InsertOpCode(DynChunk * buf,uint32_t offset,uint32_t para) const214 uint32_t SplitFirstOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const
215 {
216 buf->Insert(offset, GetSize());
217 buf->PutU8(offset, GetOpCode());
218 buf->PutU32(offset + 1, para);
219 return GetDynChunkfSize(*buf);
220 }
221
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const222 uint32_t SplitFirstOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
223 {
224 out << offset << ":\t"
225 << "split_first\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl;
226 return offset + GetSize();
227 }
228
EmitOpCode(DynChunk * buf,uint32_t start,uint32_t min,uint32_t max) const229 uint32_t LoopOpCode::EmitOpCode(DynChunk *buf, uint32_t start, uint32_t min, uint32_t max) const
230 {
231 buf->EmitChar(GetOpCode());
232 buf->EmitU32(start);
233 buf->EmitU32(min);
234 buf->EmitU32(max);
235 return GetDynChunkfSize(*buf);
236 }
237
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const238 uint32_t LoopOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
239 {
240 out << offset << ":\t"
241 << "loop\t" << buf.GetU32(offset + 1) + offset + GetSize() << "\t"
242 << buf.GetU32(offset + RegExpOpCode::OP_SIZE_FIVE) << "\t" << buf.GetU32(offset + RegExpOpCode::OP_SIZE_NINE)
243 << std::endl;
244 return offset + GetSize();
245 }
246
EmitOpCode(DynChunk * buf,uint32_t start,uint32_t min,uint32_t max) const247 uint32_t LoopGreedyOpCode::EmitOpCode(DynChunk *buf, uint32_t start, uint32_t min, uint32_t max) const
248 {
249 buf->EmitChar(GetOpCode());
250 buf->EmitU32(start);
251 buf->EmitU32(min);
252 buf->EmitU32(max);
253 return GetDynChunkfSize(*buf);
254 }
255
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const256 uint32_t LoopGreedyOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
257 {
258 out << offset << ":\t"
259 << "greedy_loop\t" << buf.GetU32(offset + 1) + offset + GetSize() << "\t"
260 << buf.GetU32(offset + RegExpOpCode::OP_SIZE_FIVE) << "\t" << buf.GetU32(offset + RegExpOpCode::OP_SIZE_NINE)
261 << std::endl;
262 return offset + GetSize();
263 }
264
InsertOpCode(DynChunk * buf,uint32_t offset) const265 uint32_t PushCharOpCode::InsertOpCode(DynChunk *buf, uint32_t offset) const
266 {
267 buf->Insert(offset, GetSize());
268 buf->PutU8(offset, GetOpCode());
269 return GetDynChunkfSize(*buf);
270 }
271
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const272 uint32_t PushCharOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
273 {
274 out << offset << ":\t"
275 << "push_char" << std::endl;
276 return offset + GetSize();
277 }
278
InsertOpCode(DynChunk * buf,uint32_t offset) const279 uint32_t PushOpCode::InsertOpCode(DynChunk *buf, uint32_t offset) const
280 {
281 buf->Insert(offset, GetSize());
282 buf->PutU8(offset, GetOpCode());
283 return GetDynChunkfSize(*buf);
284 }
285
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const286 uint32_t PushOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
287 {
288 out << offset << ":\t"
289 << "push" << std::endl;
290 return offset + GetSize();
291 }
292
EmitOpCode(DynChunk * buf) const293 uint32_t PopOpCode::EmitOpCode(DynChunk *buf) const
294 {
295 buf->EmitChar(GetOpCode());
296 return GetDynChunkfSize(*buf);
297 }
298
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const299 uint32_t PopOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
300 {
301 out << offset << ":\t"
302 << "pop" << std::endl;
303 return offset + GetSize();
304 }
305
EmitOpCode(DynChunk * buf,uint32_t offset) const306 uint32_t CheckCharOpCode::EmitOpCode(DynChunk *buf, uint32_t offset) const
307 {
308 buf->EmitChar(GetOpCode());
309 buf->EmitU32(offset);
310 return GetDynChunkfSize(*buf);
311 }
312
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const313 uint32_t CheckCharOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
314 {
315 out << offset << ":\t"
316 << "check_char\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl;
317 return offset + GetSize();
318 }
319
InsertOpCode(DynChunk * buf,uint32_t offset,uint32_t start,uint32_t end) const320 uint32_t SaveResetOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t start, uint32_t end) const
321 {
322 auto captureStart = static_cast<uint8_t>(start & 0xffU); // NOLINTNEXTLINE(readability-magic-numbers)
323 auto captureEnd = static_cast<uint8_t>(end & 0xffU); // NOLINTNEXTLINE(readability-magic-numbers)
324 buf->Insert(offset, GetSize());
325 buf->PutU8(offset, GetOpCode());
326 buf->PutU8(offset + RegExpOpCode::OP_SIZE_ONE, captureStart);
327 buf->PutU8(offset + RegExpOpCode::OP_SIZE_TWO, captureEnd);
328 return GetDynChunkfSize(*buf);
329 }
330
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const331 uint32_t SaveResetOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
332 {
333 out << offset << ":\t"
334 << "save_reset\t" << buf.GetU8(offset + RegExpOpCode::OP_SIZE_ONE) << "\t"
335 << buf.GetU8(offset + RegExpOpCode::OP_SIZE_TWO) << std::endl;
336 return offset + GetSize();
337 }
338
EmitOpCode(DynChunk * buf,uint32_t para) const339 uint32_t MatchOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
340 {
341 buf->EmitChar(GetOpCode());
342 return GetDynChunkfSize(*buf);
343 }
344
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const345 uint32_t MatchOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
346 {
347 out << offset << ":\t"
348 << "match" << std::endl;
349 return offset + GetSize();
350 }
351
EmitOpCode(DynChunk * buf,uint32_t para) const352 uint32_t MatchEndOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
353 {
354 buf->EmitChar(GetOpCode());
355 return GetDynChunkfSize(*buf);
356 }
357
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const358 uint32_t MatchEndOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
359 {
360 out << offset << ":\t"
361 << "match_end" << std::endl;
362 return offset + GetSize();
363 }
364
EmitOpCode(DynChunk * buf,uint32_t para) const365 uint32_t LineStartOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
366 {
367 buf->EmitChar(GetOpCode());
368 return GetDynChunkfSize(*buf);
369 }
370
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const371 uint32_t LineStartOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
372 {
373 out << offset << ":\t"
374 << "line_start" << std::endl;
375 return offset + GetSize();
376 }
377
EmitOpCode(DynChunk * buf,uint32_t para) const378 uint32_t LineEndOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
379 {
380 buf->EmitChar(GetOpCode());
381 return GetDynChunkfSize(*buf);
382 }
383
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const384 uint32_t LineEndOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
385 {
386 out << offset << ":\t"
387 << "line_end" << std::endl;
388 return offset + GetSize();
389 }
390
EmitOpCode(DynChunk * buf,uint32_t para) const391 uint32_t WordBoundaryOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
392 {
393 buf->EmitChar(GetOpCode());
394 return GetDynChunkfSize(*buf);
395 }
396
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const397 uint32_t WordBoundaryOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
398 {
399 out << offset << ":\t"
400 << "word_boundary" << std::endl;
401 return offset + GetSize();
402 }
403
EmitOpCode(DynChunk * buf,uint32_t para) const404 uint32_t NotWordBoundaryOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
405 {
406 buf->EmitChar(GetOpCode());
407 return GetDynChunkfSize(*buf);
408 }
409
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const410 uint32_t NotWordBoundaryOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf,
411 uint32_t offset) const
412 {
413 out << offset << ":\t"
414 << "not_word_boundary" << std::endl;
415 return offset + GetSize();
416 }
417
EmitOpCode(DynChunk * buf,uint32_t para) const418 uint32_t AllOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
419 {
420 buf->EmitChar(GetOpCode());
421 return GetDynChunkfSize(*buf);
422 }
423
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const424 uint32_t AllOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
425 {
426 out << offset << ":\t"
427 << "all" << std::endl;
428 return offset + GetSize();
429 }
430
EmitOpCode(DynChunk * buf,uint32_t para) const431 uint32_t DotsOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
432 {
433 buf->EmitChar(GetOpCode());
434 return GetDynChunkfSize(*buf);
435 }
436
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const437 uint32_t DotsOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
438 {
439 out << offset << ":\t"
440 << "dots" << std::endl;
441 return offset + GetSize();
442 }
443
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const444 uint32_t MatchAheadOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
445 {
446 out << offset << ":\t"
447 << "match_ahead\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl;
448 return offset + GetSize();
449 }
450
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const451 uint32_t RangeOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
452 {
453 out << offset << ":\t"
454 << "range\t";
455 size_t size = buf.GetU16(offset + 1);
456 for (size_t i = 0; i < size; i++) {
457 out << buf.GetU16(offset + RegExpOpCode::OP_SIZE_THREE + (i * RegExpOpCode::OP_SIZE_FOUR)) << "\t"
458 << buf.GetU16(offset + RegExpOpCode::OP_SIZE_THREE +
459 (i * RegExpOpCode::OP_SIZE_FOUR + RegExpOpCode::OP_SIZE_TWO))
460 << "\t";
461 }
462 out << std::endl;
463 return offset + size * RegExpOpCode::OP_SIZE_FOUR + RegExpOpCode::OP_SIZE_THREE;
464 }
465
InsertOpCode(DynChunk * buf,const RangeSet & rangeSet) const466 uint32_t RangeOpCode::InsertOpCode(DynChunk *buf, const RangeSet &rangeSet) const
467 {
468 buf->EmitChar(GetOpCode());
469 size_t size = rangeSet.rangeSet_.size();
470 buf->EmitU16(size);
471 for (auto range : rangeSet.rangeSet_) {
472 buf->EmitU16(range.first);
473 buf->EmitU16(range.second);
474 }
475 return GetDynChunkfSize(*buf);
476 }
477
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const478 uint32_t Range32OpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
479 {
480 out << offset << ":\t"
481 << "range32\t";
482 size_t size = buf.GetU16(offset + 1);
483 for (size_t i = 0; i < size; i++) {
484 out << buf.GetU32(offset + RegExpOpCode::OP_SIZE_THREE + (i * RegExpOpCode::OP_SIZE_EIGHT)) << "\t"
485 << buf.GetU32(offset + RegExpOpCode::OP_SIZE_THREE +
486 (i * RegExpOpCode::OP_SIZE_EIGHT + RegExpOpCode::OP_SIZE_FOUR))
487 << "\t";
488 }
489 out << std::endl;
490 return offset + size * +RegExpOpCode::OP_SIZE_EIGHT + RegExpOpCode::OP_SIZE_THREE;
491 }
492
InsertOpCode(DynChunk * buf,const RangeSet & rangeSet) const493 uint32_t Range32OpCode::InsertOpCode(DynChunk *buf, const RangeSet &rangeSet) const
494 {
495 buf->EmitChar(GetOpCode());
496 size_t size = rangeSet.rangeSet_.size();
497 buf->EmitU16(size);
498 for (auto range : rangeSet.rangeSet_) {
499 buf->EmitU32(range.first);
500 buf->EmitU32(range.second);
501 }
502 return GetDynChunkfSize(*buf);
503 }
504
InsertOpCode(DynChunk * buf,uint32_t offset,uint32_t para) const505 uint32_t MatchAheadOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const
506 {
507 buf->Insert(offset, GetSize());
508 buf->PutU8(offset, GetOpCode());
509 buf->PutU32(offset + 1, para);
510 return GetDynChunkfSize(*buf);
511 }
512
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const513 uint32_t NegativeMatchAheadOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
514 {
515 out << offset << ":\t"
516 << "negative_match_ahead\t" << buf.GetU32(offset + 1) + offset + GetSize() << std::endl;
517 return offset + GetSize();
518 }
519
InsertOpCode(DynChunk * buf,uint32_t offset,uint32_t para) const520 uint32_t NegativeMatchAheadOpCode::InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const
521 {
522 buf->Insert(offset, GetSize());
523 buf->PutU8(offset, GetOpCode());
524 buf->PutU32(offset + 1, para);
525 return GetDynChunkfSize(*buf);
526 }
527
EmitOpCode(DynChunk * buf,uint32_t para) const528 uint32_t PrevOpCode::EmitOpCode(DynChunk *buf, [[maybe_unused]] uint32_t para) const
529 {
530 buf->EmitChar(GetOpCode());
531 return GetDynChunkfSize(*buf);
532 }
533
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const534 uint32_t PrevOpCode::DumpOpCode(std::ostream &out, [[maybe_unused]] const DynChunk &buf, uint32_t offset) const
535 {
536 out << offset << ":\t"
537 << "prev" << std::endl;
538 return offset + GetSize();
539 }
540
EmitOpCode(DynChunk * buf,uint32_t para) const541 uint32_t BackReferenceOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
542 {
543 auto capture = static_cast<uint8_t>(para & 0xffU); // NOLINTNEXTLINE(readability-magic-numbers)
544 buf->EmitChar(GetOpCode());
545 buf->EmitChar(capture);
546 return GetDynChunkfSize(*buf);
547 }
548
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const549 uint32_t BackReferenceOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
550 {
551 out << offset << ":\t"
552 << "backreference\t" << buf.GetU8(offset + 1) << std::endl;
553 return offset + GetSize();
554 }
555
EmitOpCode(DynChunk * buf,uint32_t para) const556 uint32_t BackwardBackReferenceOpCode::EmitOpCode(DynChunk *buf, uint32_t para) const
557 {
558 auto capture = static_cast<uint8_t>(para & 0xffU); // NOLINTNEXTLINE(readability-magic-numbers)
559 buf->EmitChar(GetOpCode());
560 buf->EmitChar(capture);
561 return GetDynChunkfSize(*buf);
562 }
563
DumpOpCode(std::ostream & out,const DynChunk & buf,uint32_t offset) const564 uint32_t BackwardBackReferenceOpCode::DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const
565 {
566 out << offset << ":\t"
567 << "backward_backreference\t" << buf.GetU8(offset + 1) << std::endl;
568 return offset + GetSize();
569 }
570
Insert(uint32_t start,uint32_t end)571 void RangeSet::Insert(uint32_t start, uint32_t end)
572 {
573 if (start > end) {
574 return;
575 }
576 std::pair<uint32_t, uint32_t> pairElement = std::make_pair(start, end);
577 if (rangeSet_.empty()) {
578 rangeSet_.emplace_back(pairElement);
579 } else {
580 for (auto iter = rangeSet_.begin(); iter != rangeSet_.end(); iter++) {
581 if (IsIntersect(start, end, iter->first, iter->second) ||
582 IsAdjacent(start, end, iter->first, iter->second)) {
583 iter->first = std::min(iter->first, start);
584 iter->second = std::max(iter->second, end);
585 Compress();
586 return;
587 }
588 if (iter->first > end) {
589 rangeSet_.insert(iter, pairElement);
590 return;
591 }
592 }
593 rangeSet_.emplace_back(pairElement);
594 }
595 }
596 // if RangeResult cross-intersects with [a, z] and [A, Z],
597 // we capitalize the intersection part and insert into RangeResult.
Inter(RangeSet & cr,const RangeSet & s1)598 void RangeSet::Inter(RangeSet &cr, const RangeSet &s1)
599 {
600 if (s1.rangeSet_.empty()) {
601 rangeSet_.clear();
602 return;
603 }
604 if (rangeSet_.empty()) {
605 return;
606 }
607 for (const auto &interItem : s1.rangeSet_) {
608 uint32_t firstMax = 0;
609 uint32_t secondMin = 0;
610 for (const auto &range : rangeSet_) {
611 if (range.first >= interItem.first) {
612 firstMax = range.first;
613 } else {
614 firstMax = interItem.first;
615 }
616 if (range.second >= interItem.second) {
617 secondMin = interItem.second;
618 } else {
619 secondMin = range.second;
620 }
621 if (secondMin < firstMax) {
622 continue;
623 }
624 if (firstMax >= 'a' && firstMax <= 'z') {
625 cr.Insert(firstMax + 'A' - 'a', secondMin + 'A' - 'a');
626 }
627 if (firstMax >= 'A' && firstMax <= 'Z') {
628 cr.Insert(firstMax - 'A' + 'a', secondMin - 'A' + 'a');
629 }
630 }
631 }
632 }
Insert(const RangeSet & s1)633 void RangeSet::Insert(const RangeSet &s1)
634 {
635 if (s1.rangeSet_.empty()) {
636 return;
637 }
638 if (rangeSet_.empty()) {
639 rangeSet_ = s1.rangeSet_;
640 } else {
641 for (auto range : s1.rangeSet_) {
642 Insert(range.first, range.second);
643 }
644 Compress();
645 }
646 }
647
Invert(bool isUtf16)648 void RangeSet::Invert(bool isUtf16)
649 {
650 uint32_t maxValue = isUtf16 ? UINT32_MAX : UINT16_MAX;
651 if (rangeSet_.empty()) {
652 rangeSet_.emplace_back(std::make_pair(0, maxValue));
653 return;
654 }
655
656 auto iter = rangeSet_.begin();
657 auto iter2 = rangeSet_.begin();
658 if (iter->first == 0 && iter->second == maxValue) {
659 rangeSet_.clear();
660 return;
661 }
662 iter2++;
663
664 uint32_t first = iter->first;
665
666 for (iter = rangeSet_.begin(); iter != rangeSet_.end(); iter++) {
667 if (iter->second == maxValue) {
668 rangeSet_.erase(iter);
669 break;
670 }
671 iter->first = iter->second + 1;
672 if (iter2 != rangeSet_.end()) {
673 iter->second = iter2->first - 1;
674 iter2++;
675 } else {
676 iter->second = maxValue;
677 }
678 }
679 if (first > 0) {
680 std::pair<uint32_t, uint32_t> pair1 = std::make_pair(0, first - 1);
681 rangeSet_.push_front(pair1);
682 }
683 Compress();
684 }
685
Compress()686 void RangeSet::Compress()
687 {
688 auto iter = rangeSet_.begin();
689 auto iter2 = rangeSet_.begin();
690 iter2++;
691 while (iter2 != rangeSet_.end()) {
692 if (IsIntersect(iter->first, iter->second, iter2->first, iter2->second) ||
693 IsAdjacent(iter->first, iter->second, iter2->first, iter2->second)) {
694 iter->first = std::min(iter->first, iter2->first);
695 iter->second = std::max(iter->second, iter2->second);
696 iter2 = rangeSet_.erase(iter2);
697 } else {
698 iter++;
699 iter2++;
700 }
701 }
702 }
703 } // namespace panda::ecmascript
704