1 /** 2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef PANDA_RUNTIME_REGEXP_OPCODE_H 17 #define PANDA_RUNTIME_REGEXP_OPCODE_H 18 19 #include <list> 20 21 #include "runtime/regexp/ecmascript/mem/dyn_chunk.h" 22 23 namespace ark { 24 class RegExpOpCode { 25 public: 26 enum : uint8_t { 27 OP_SAVE_START = 0U, 28 OP_SAVE_END, 29 OP_CHAR, 30 OP_GOTO, 31 OP_SPLIT_FIRST, 32 OP_SPLIT_NEXT, 33 OP_MATCH_AHEAD, 34 OP_NEGATIVE_MATCH_AHEAD, 35 OP_MATCH, 36 OP_LOOP, 37 OP_LOOP_GREEDY, 38 OP_PUSH_CHAR, 39 OP_CHECK_CHAR, 40 OP_PUSH, 41 OP_POP, 42 OP_SAVE_RESET, 43 OP_LINE_START, 44 OP_LINE_END, 45 OP_WORD_BOUNDARY, 46 OP_NOT_WORD_BOUNDARY, 47 OP_ALL, 48 OP_DOTS, 49 OP_MATCH_END, 50 OP_PREV, 51 OP_RANGE, 52 OP_BACKREFERENCE, 53 OP_BACKWARD_BACKREFERENCE, 54 OP_CHAR32, 55 OP_RANGE32, 56 OP_INVALID, 57 }; 58 59 static constexpr size_t OP_SIZE_ONE = 1; 60 static constexpr size_t OP_SIZE_TWO = 2; 61 static constexpr size_t OP_SIZE_THREE = 3; 62 static constexpr size_t OP_SIZE_FOUR = 4; 63 static constexpr size_t OP_SIZE_FIVE = 5; 64 static constexpr size_t OP_SIZE_EIGHT = 8; 65 static constexpr size_t OP_SIZE_NINE = 9; 66 static constexpr size_t OP_SIZE_THIRTEEN = 13; 67 68 RegExpOpCode(uint8_t opCode, int size); 69 NO_COPY_SEMANTIC(RegExpOpCode); 70 NO_MOVE_SEMANTIC(RegExpOpCode); 71 72 virtual ~RegExpOpCode() = default; 73 static RegExpOpCode *GetRegExpOpCode(const DynChunk &buf, int pcOffset); 74 static RegExpOpCode *GetRegExpOpCode(uint8_t opCode); 75 static void DumpRegExpOpCode(std::ostream &out, const DynChunk &buf); GetSize()76 inline uint8_t GetSize() const 77 { 78 return size_; 79 } GetOpCode()80 inline uint8_t GetOpCode() const 81 { 82 return opCode_; 83 } GetDynChunkfSize(const DynChunk & buf)84 inline int GetDynChunkfSize(const DynChunk &buf) const 85 { 86 return buf.size_; 87 } 88 virtual uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const = 0; 89 90 private: 91 uint8_t opCode_ {0}; 92 uint8_t size_ {0}; 93 }; 94 95 class SaveStartOpCode : public RegExpOpCode { 96 public: SaveStartOpCode()97 SaveStartOpCode() : RegExpOpCode(OP_SAVE_START, RegExpOpCode::OP_SIZE_TWO) {} 98 uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; 99 ~SaveStartOpCode() override = default; 100 NO_COPY_SEMANTIC(SaveStartOpCode); 101 NO_MOVE_SEMANTIC(SaveStartOpCode); 102 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 103 }; 104 105 class SaveEndOpCode : public RegExpOpCode { 106 public: SaveEndOpCode()107 SaveEndOpCode() : RegExpOpCode(OP_SAVE_END, RegExpOpCode::OP_SIZE_TWO) {} 108 uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; 109 ~SaveEndOpCode() override = default; 110 NO_COPY_SEMANTIC(SaveEndOpCode); 111 NO_MOVE_SEMANTIC(SaveEndOpCode); 112 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 113 }; 114 115 class CharOpCode : public RegExpOpCode { 116 public: CharOpCode()117 CharOpCode() : RegExpOpCode(OP_CHAR, RegExpOpCode::OP_SIZE_THREE) {} 118 uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; 119 ~CharOpCode() override = default; 120 NO_COPY_SEMANTIC(CharOpCode); 121 NO_MOVE_SEMANTIC(CharOpCode); 122 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 123 }; 124 125 class GotoOpCode : public RegExpOpCode { 126 public: GotoOpCode()127 GotoOpCode() : RegExpOpCode(OP_GOTO, RegExpOpCode::OP_SIZE_FIVE) {} 128 uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; 129 void UpdateOpPara(DynChunk *buf, uint32_t offset, uint32_t para) const; 130 ~GotoOpCode() override = default; 131 NO_COPY_SEMANTIC(GotoOpCode); 132 NO_MOVE_SEMANTIC(GotoOpCode); 133 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 134 }; 135 136 class SplitNextOpCode : public RegExpOpCode { 137 public: SplitNextOpCode()138 SplitNextOpCode() : RegExpOpCode(OP_SPLIT_NEXT, RegExpOpCode::OP_SIZE_FIVE) {} 139 uint32_t InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const; 140 ~SplitNextOpCode() override = default; 141 NO_COPY_SEMANTIC(SplitNextOpCode); 142 NO_MOVE_SEMANTIC(SplitNextOpCode); 143 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 144 }; 145 146 class SplitFirstOpCode : public RegExpOpCode { 147 public: SplitFirstOpCode()148 SplitFirstOpCode() : RegExpOpCode(OP_SPLIT_FIRST, RegExpOpCode::OP_SIZE_FIVE) {} 149 uint32_t InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const; 150 ~SplitFirstOpCode() override = default; 151 NO_COPY_SEMANTIC(SplitFirstOpCode); 152 NO_MOVE_SEMANTIC(SplitFirstOpCode); 153 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 154 }; 155 156 class PushOpCode : public RegExpOpCode { 157 public: PushOpCode()158 PushOpCode() : RegExpOpCode(OP_PUSH, RegExpOpCode::OP_SIZE_ONE) {} 159 uint32_t InsertOpCode(DynChunk *buf, uint32_t offset) const; 160 ~PushOpCode() override = default; 161 NO_COPY_SEMANTIC(PushOpCode); 162 NO_MOVE_SEMANTIC(PushOpCode); 163 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 164 }; 165 166 class PopOpCode : public RegExpOpCode { 167 public: PopOpCode()168 PopOpCode() : RegExpOpCode(OP_POP, RegExpOpCode::OP_SIZE_ONE) {} 169 uint32_t EmitOpCode(DynChunk *buf) const; 170 ~PopOpCode() override = default; 171 NO_COPY_SEMANTIC(PopOpCode); 172 NO_MOVE_SEMANTIC(PopOpCode); 173 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 174 }; 175 176 class PushCharOpCode : public RegExpOpCode { 177 public: PushCharOpCode()178 PushCharOpCode() : RegExpOpCode(OP_PUSH_CHAR, RegExpOpCode::OP_SIZE_ONE) {} 179 uint32_t InsertOpCode(DynChunk *buf, uint32_t offset) const; 180 ~PushCharOpCode() override = default; 181 NO_COPY_SEMANTIC(PushCharOpCode); 182 NO_MOVE_SEMANTIC(PushCharOpCode); 183 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 184 }; 185 186 class CheckCharOpCode : public RegExpOpCode { 187 public: CheckCharOpCode()188 CheckCharOpCode() : RegExpOpCode(OP_CHECK_CHAR, RegExpOpCode::OP_SIZE_FIVE) {} 189 uint32_t EmitOpCode(DynChunk *buf, uint32_t offset) const; 190 ~CheckCharOpCode() override = default; 191 NO_COPY_SEMANTIC(CheckCharOpCode); 192 NO_MOVE_SEMANTIC(CheckCharOpCode); 193 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 194 }; 195 196 class LoopOpCode : public RegExpOpCode { 197 public: LoopOpCode()198 LoopOpCode() : RegExpOpCode(OP_LOOP, RegExpOpCode::OP_SIZE_THIRTEEN) {} 199 uint32_t EmitOpCode(DynChunk *buf, uint32_t start, uint32_t min, uint32_t max) const; 200 ~LoopOpCode() override = default; 201 NO_COPY_SEMANTIC(LoopOpCode); 202 NO_MOVE_SEMANTIC(LoopOpCode); 203 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 204 }; 205 206 class LoopGreedyOpCode : public RegExpOpCode { 207 public: LoopGreedyOpCode()208 LoopGreedyOpCode() : RegExpOpCode(OP_LOOP_GREEDY, RegExpOpCode::OP_SIZE_THIRTEEN) {} 209 uint32_t EmitOpCode(DynChunk *buf, uint32_t start, uint32_t min, uint32_t max) const; 210 ~LoopGreedyOpCode() override = default; 211 NO_COPY_SEMANTIC(LoopGreedyOpCode); 212 NO_MOVE_SEMANTIC(LoopGreedyOpCode); 213 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 214 }; 215 216 class SaveResetOpCode : public RegExpOpCode { 217 public: SaveResetOpCode()218 SaveResetOpCode() : RegExpOpCode(OP_SAVE_RESET, RegExpOpCode::OP_SIZE_THREE) {} 219 uint32_t InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t start, uint32_t end) const; 220 ~SaveResetOpCode() override = default; 221 NO_COPY_SEMANTIC(SaveResetOpCode); 222 NO_MOVE_SEMANTIC(SaveResetOpCode); 223 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 224 }; 225 226 class MatchOpCode : public RegExpOpCode { 227 public: MatchOpCode()228 MatchOpCode() : RegExpOpCode(OP_MATCH, RegExpOpCode::OP_SIZE_ONE) {} 229 uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; 230 ~MatchOpCode() override = default; 231 NO_COPY_SEMANTIC(MatchOpCode); 232 NO_MOVE_SEMANTIC(MatchOpCode); 233 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 234 }; 235 236 class MatchEndOpCode : public RegExpOpCode { 237 public: MatchEndOpCode()238 MatchEndOpCode() : RegExpOpCode(OP_MATCH_END, RegExpOpCode::OP_SIZE_ONE) {} 239 uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; 240 ~MatchEndOpCode() override = default; 241 NO_COPY_SEMANTIC(MatchEndOpCode); 242 NO_MOVE_SEMANTIC(MatchEndOpCode); 243 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 244 }; 245 246 class LineStartOpCode : public RegExpOpCode { 247 public: LineStartOpCode()248 LineStartOpCode() : RegExpOpCode(OP_LINE_START, RegExpOpCode::OP_SIZE_ONE) {} 249 uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; 250 ~LineStartOpCode() override = default; 251 NO_COPY_SEMANTIC(LineStartOpCode); 252 NO_MOVE_SEMANTIC(LineStartOpCode); 253 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 254 }; 255 256 class LineEndOpCode : public RegExpOpCode { 257 public: LineEndOpCode()258 LineEndOpCode() : RegExpOpCode(OP_LINE_END, RegExpOpCode::OP_SIZE_ONE) {} 259 uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; 260 ~LineEndOpCode() override = default; 261 NO_COPY_SEMANTIC(LineEndOpCode); 262 NO_MOVE_SEMANTIC(LineEndOpCode); 263 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 264 }; 265 266 class WordBoundaryOpCode : public RegExpOpCode { 267 public: WordBoundaryOpCode()268 WordBoundaryOpCode() : RegExpOpCode(OP_WORD_BOUNDARY, RegExpOpCode::OP_SIZE_ONE) {} 269 uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; 270 ~WordBoundaryOpCode() override = default; 271 NO_COPY_SEMANTIC(WordBoundaryOpCode); 272 NO_MOVE_SEMANTIC(WordBoundaryOpCode); 273 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 274 }; 275 276 class NotWordBoundaryOpCode : public RegExpOpCode { 277 public: NotWordBoundaryOpCode()278 NotWordBoundaryOpCode() : RegExpOpCode(OP_NOT_WORD_BOUNDARY, RegExpOpCode::OP_SIZE_ONE) {} 279 uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; 280 ~NotWordBoundaryOpCode() override = default; 281 NO_COPY_SEMANTIC(NotWordBoundaryOpCode); 282 NO_MOVE_SEMANTIC(NotWordBoundaryOpCode); 283 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 284 }; 285 286 class AllOpCode : public RegExpOpCode { 287 public: AllOpCode()288 AllOpCode() : RegExpOpCode(OP_ALL, RegExpOpCode::OP_SIZE_ONE) {} 289 uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; 290 ~AllOpCode() override = default; 291 NO_COPY_SEMANTIC(AllOpCode); 292 NO_MOVE_SEMANTIC(AllOpCode); 293 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 294 }; 295 296 class DotsOpCode : public RegExpOpCode { 297 public: DotsOpCode()298 DotsOpCode() : RegExpOpCode(OP_DOTS, RegExpOpCode::OP_SIZE_ONE) {} 299 uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; 300 ~DotsOpCode() override = default; 301 NO_COPY_SEMANTIC(DotsOpCode); 302 NO_MOVE_SEMANTIC(DotsOpCode); 303 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 304 }; 305 306 class RangeSet { 307 public: 308 RangeSet() = default; RangeSet(uint32_t value)309 explicit RangeSet(uint32_t value) 310 { 311 Insert(value, value); 312 } RangeSet(uint32_t start,uint32_t end)313 explicit RangeSet(uint32_t start, uint32_t end) 314 { 315 Insert(start, end); 316 } RangeSet(const std::list<std::pair<uint32_t,uint32_t>> & rangeSet)317 explicit RangeSet(const std::list<std::pair<uint32_t, uint32_t>> &rangeSet) 318 { 319 rangeSet_ = rangeSet; 320 } 321 ~RangeSet() = default; 322 IsIntersect(uint64_t start,uint64_t end,uint64_t start1,uint64_t end1)323 inline bool IsIntersect(uint64_t start, uint64_t end, uint64_t start1, uint64_t end1) const 324 { 325 return ((start1 > start) && (start1 < end)) || ((start > start1) && (start < end1)); 326 } IsAdjacent(uint64_t start,uint64_t end,uint64_t start1,uint64_t end1)327 inline bool IsAdjacent(uint64_t start, uint64_t end, uint64_t start1, uint64_t end1) const 328 { 329 return ((end == start1 || (end + 1) == start1)) || ((end1 == start) || (end1 + 1 == start)); 330 } 331 332 inline bool operator==(const RangeSet &other) const 333 { 334 return rangeSet_ == other.rangeSet_; 335 } 336 IsContain(uint32_t value)337 inline bool IsContain(uint32_t value) const 338 { 339 for (auto range : rangeSet_) { 340 if (value >= range.first && value <= range.second) { 341 return true; 342 } 343 } 344 return false; 345 } HighestValue()346 inline uint32_t HighestValue() const 347 { 348 if (!rangeSet_.empty()) { 349 return rangeSet_.back().second; 350 } 351 return 0; 352 } 353 RangeSet(RangeSet const &) = default; 354 RangeSet &operator=(RangeSet const &) = default; 355 RangeSet(RangeSet &&) = default; 356 RangeSet &operator=(RangeSet &&) = default; 357 358 void Insert(uint32_t start, uint32_t end); 359 void Insert(const RangeSet &s1); 360 void Invert(bool isUtf16); 361 void Compress(); 362 363 private: 364 friend class RangeOpCode; 365 friend class Range32OpCode; 366 std::list<std::pair<uint32_t, uint32_t>> rangeSet_ {}; 367 }; 368 369 class RangeOpCode : public RegExpOpCode { 370 public: RangeOpCode()371 RangeOpCode() : RegExpOpCode(OP_RANGE, RegExpOpCode::OP_SIZE_ONE) {} 372 ~RangeOpCode() override = default; 373 NO_COPY_SEMANTIC(RangeOpCode); 374 NO_MOVE_SEMANTIC(RangeOpCode); 375 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 376 uint32_t InsertOpCode(DynChunk *buf, const RangeSet &rangeSet) const; 377 }; 378 379 class MatchAheadOpCode : public RegExpOpCode { 380 public: MatchAheadOpCode()381 MatchAheadOpCode() : RegExpOpCode(OP_MATCH_AHEAD, RegExpOpCode::OP_SIZE_FIVE) {} 382 ~MatchAheadOpCode() override = default; 383 NO_COPY_SEMANTIC(MatchAheadOpCode); 384 NO_MOVE_SEMANTIC(MatchAheadOpCode); 385 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 386 uint32_t InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const; 387 }; 388 389 class NegativeMatchAheadOpCode : public RegExpOpCode { 390 public: NegativeMatchAheadOpCode()391 NegativeMatchAheadOpCode() : RegExpOpCode(OP_NEGATIVE_MATCH_AHEAD, RegExpOpCode::OP_SIZE_FIVE) {} 392 uint32_t InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const; 393 ~NegativeMatchAheadOpCode() override = default; 394 NO_COPY_SEMANTIC(NegativeMatchAheadOpCode); 395 NO_MOVE_SEMANTIC(NegativeMatchAheadOpCode); 396 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 397 }; 398 399 class PrevOpCode : public RegExpOpCode { 400 public: PrevOpCode()401 PrevOpCode() : RegExpOpCode(OP_PREV, RegExpOpCode::OP_SIZE_ONE) {} 402 uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; 403 ~PrevOpCode() override = default; 404 NO_COPY_SEMANTIC(PrevOpCode); 405 NO_MOVE_SEMANTIC(PrevOpCode); 406 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 407 }; 408 409 class BackReferenceOpCode : public RegExpOpCode { 410 public: BackReferenceOpCode()411 BackReferenceOpCode() : RegExpOpCode(OP_BACKREFERENCE, RegExpOpCode::OP_SIZE_TWO) {} 412 uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; 413 ~BackReferenceOpCode() override = default; 414 NO_COPY_SEMANTIC(BackReferenceOpCode); 415 NO_MOVE_SEMANTIC(BackReferenceOpCode); 416 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 417 }; 418 419 class BackwardBackReferenceOpCode : public RegExpOpCode { 420 public: BackwardBackReferenceOpCode()421 BackwardBackReferenceOpCode() : RegExpOpCode(OP_BACKWARD_BACKREFERENCE, RegExpOpCode::OP_SIZE_TWO) {} 422 uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; 423 ~BackwardBackReferenceOpCode() override = default; 424 NO_COPY_SEMANTIC(BackwardBackReferenceOpCode); 425 NO_MOVE_SEMANTIC(BackwardBackReferenceOpCode); 426 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 427 }; 428 429 class Char32OpCode : public RegExpOpCode { 430 public: Char32OpCode()431 Char32OpCode() : RegExpOpCode(OP_CHAR32, RegExpOpCode::OP_SIZE_FIVE) {} 432 uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; 433 ~Char32OpCode() override = default; 434 NO_COPY_SEMANTIC(Char32OpCode); 435 NO_MOVE_SEMANTIC(Char32OpCode); 436 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 437 }; 438 439 class Range32OpCode : public RegExpOpCode { 440 public: Range32OpCode()441 Range32OpCode() : RegExpOpCode(OP_RANGE32, RegExpOpCode::OP_SIZE_ONE) {} 442 ~Range32OpCode() override = default; 443 NO_COPY_SEMANTIC(Range32OpCode); 444 NO_MOVE_SEMANTIC(Range32OpCode); 445 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 446 uint32_t InsertOpCode(DynChunk *buf, const RangeSet &rangeSet) const; 447 }; 448 } // namespace ark 449 #endif 450