1 /* 2 * Copyright (c) 2021 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef ECMASCRIPT_REGEXP_OPCODE_H 17 #define ECMASCRIPT_REGEXP_OPCODE_H 18 19 #include <list> 20 21 #include "ecmascript/mem/dyn_chunk.h" 22 23 namespace panda { 24 namespace ecmascript { 25 class RegExpOpCode { 26 public: 27 enum : uint8_t { 28 OP_SAVE_START = 0U, 29 OP_SAVE_END, 30 OP_CHAR, 31 OP_GOTO, 32 OP_SPLIT_FIRST, 33 OP_SPLIT_NEXT, 34 OP_MATCH_AHEAD, 35 OP_NEGATIVE_MATCH_AHEAD, 36 OP_MATCH, 37 OP_LOOP, 38 OP_LOOP_GREEDY, 39 OP_PUSH_CHAR, 40 OP_CHECK_CHAR, 41 OP_PUSH, 42 OP_POP, 43 OP_SAVE_RESET, 44 OP_LINE_START, 45 OP_LINE_END, 46 OP_WORD_BOUNDARY, 47 OP_NOT_WORD_BOUNDARY, 48 OP_ALL, 49 OP_DOTS, 50 OP_MATCH_END, 51 OP_PREV, 52 OP_RANGE, 53 OP_BACKREFERENCE, 54 OP_BACKWARD_BACKREFERENCE, 55 OP_CHAR32, 56 OP_RANGE32, 57 OP_INVALID, 58 }; 59 60 static constexpr size_t OP_SIZE_ONE = 1; 61 static constexpr size_t OP_SIZE_TWO = 2; 62 static constexpr size_t OP_SIZE_THREE = 3; 63 static constexpr size_t OP_SIZE_FOUR = 4; 64 static constexpr size_t OP_SIZE_FIVE = 5; 65 static constexpr size_t OP_SIZE_EIGHT = 8; 66 static constexpr size_t OP_SIZE_NINE = 9; 67 static constexpr size_t OP_SIZE_THIRTEEN = 13; 68 69 RegExpOpCode(uint8_t opCode, int size); 70 NO_COPY_SEMANTIC(RegExpOpCode); 71 NO_MOVE_SEMANTIC(RegExpOpCode); 72 73 virtual ~RegExpOpCode() = default; 74 static RegExpOpCode *GetRegExpOpCode(const DynChunk &buf, int pcOffset); 75 static RegExpOpCode *GetRegExpOpCode(uint8_t opCode); 76 static void DumpRegExpOpCode(std::ostream &out, const DynChunk &buf); GetSize()77 inline uint8_t GetSize() const 78 { 79 return size_; 80 } GetOpCode()81 inline uint8_t GetOpCode() const 82 { 83 return opCode_; 84 } GetDynChunkfSize(const DynChunk & buf)85 inline int GetDynChunkfSize(const DynChunk &buf) const 86 { 87 return buf.size_; 88 } 89 virtual uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const = 0; 90 91 private: 92 uint8_t opCode_ {0}; 93 uint8_t size_ {0}; 94 }; 95 96 class SaveStartOpCode : public RegExpOpCode { 97 public: SaveStartOpCode()98 SaveStartOpCode() : RegExpOpCode(OP_SAVE_START, RegExpOpCode::OP_SIZE_TWO) {} 99 uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; 100 ~SaveStartOpCode() override = default; 101 NO_COPY_SEMANTIC(SaveStartOpCode); 102 NO_MOVE_SEMANTIC(SaveStartOpCode); 103 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 104 }; 105 106 class SaveEndOpCode : public RegExpOpCode { 107 public: SaveEndOpCode()108 SaveEndOpCode() : RegExpOpCode(OP_SAVE_END, RegExpOpCode::OP_SIZE_TWO) {} 109 uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; 110 ~SaveEndOpCode() override = default; 111 NO_COPY_SEMANTIC(SaveEndOpCode); 112 NO_MOVE_SEMANTIC(SaveEndOpCode); 113 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 114 }; 115 116 class CharOpCode : public RegExpOpCode { 117 public: CharOpCode()118 CharOpCode() : RegExpOpCode(OP_CHAR, RegExpOpCode::OP_SIZE_THREE) {} 119 uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; 120 ~CharOpCode() override = default; 121 NO_COPY_SEMANTIC(CharOpCode); 122 NO_MOVE_SEMANTIC(CharOpCode); 123 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 124 }; 125 126 class GotoOpCode : public RegExpOpCode { 127 public: GotoOpCode()128 GotoOpCode() : RegExpOpCode(OP_GOTO, RegExpOpCode::OP_SIZE_FIVE) {} 129 uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; 130 void UpdateOpPara(DynChunk *buf, uint32_t offset, uint32_t para) const; 131 ~GotoOpCode() override = default; 132 NO_COPY_SEMANTIC(GotoOpCode); 133 NO_MOVE_SEMANTIC(GotoOpCode); 134 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 135 }; 136 137 class SplitNextOpCode : public RegExpOpCode { 138 public: SplitNextOpCode()139 SplitNextOpCode() : RegExpOpCode(OP_SPLIT_NEXT, RegExpOpCode::OP_SIZE_FIVE) {} 140 uint32_t InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const; 141 ~SplitNextOpCode() override = default; 142 NO_COPY_SEMANTIC(SplitNextOpCode); 143 NO_MOVE_SEMANTIC(SplitNextOpCode); 144 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 145 }; 146 147 class SplitFirstOpCode : public RegExpOpCode { 148 public: SplitFirstOpCode()149 SplitFirstOpCode() : RegExpOpCode(OP_SPLIT_FIRST, RegExpOpCode::OP_SIZE_FIVE) {} 150 uint32_t InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const; 151 ~SplitFirstOpCode() override = default; 152 NO_COPY_SEMANTIC(SplitFirstOpCode); 153 NO_MOVE_SEMANTIC(SplitFirstOpCode); 154 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 155 }; 156 157 class PushOpCode : public RegExpOpCode { 158 public: PushOpCode()159 PushOpCode() : RegExpOpCode(OP_PUSH, RegExpOpCode::OP_SIZE_ONE) {} 160 uint32_t InsertOpCode(DynChunk *buf, uint32_t offset) const; 161 ~PushOpCode() override = default; 162 NO_COPY_SEMANTIC(PushOpCode); 163 NO_MOVE_SEMANTIC(PushOpCode); 164 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 165 }; 166 167 class PopOpCode : public RegExpOpCode { 168 public: PopOpCode()169 PopOpCode() : RegExpOpCode(OP_POP, RegExpOpCode::OP_SIZE_ONE) {} 170 uint32_t EmitOpCode(DynChunk *buf) const; 171 ~PopOpCode() override = default; 172 NO_COPY_SEMANTIC(PopOpCode); 173 NO_MOVE_SEMANTIC(PopOpCode); 174 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 175 }; 176 177 class PushCharOpCode : public RegExpOpCode { 178 public: PushCharOpCode()179 PushCharOpCode() : RegExpOpCode(OP_PUSH_CHAR, RegExpOpCode::OP_SIZE_ONE) {} 180 uint32_t InsertOpCode(DynChunk *buf, uint32_t offset) const; 181 ~PushCharOpCode() override = default; 182 NO_COPY_SEMANTIC(PushCharOpCode); 183 NO_MOVE_SEMANTIC(PushCharOpCode); 184 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 185 }; 186 187 class CheckCharOpCode : public RegExpOpCode { 188 public: CheckCharOpCode()189 CheckCharOpCode() : RegExpOpCode(OP_CHECK_CHAR, RegExpOpCode::OP_SIZE_FIVE) {} 190 uint32_t EmitOpCode(DynChunk *buf, uint32_t offset) const; 191 ~CheckCharOpCode() override = default; 192 NO_COPY_SEMANTIC(CheckCharOpCode); 193 NO_MOVE_SEMANTIC(CheckCharOpCode); 194 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 195 }; 196 197 class LoopOpCode : public RegExpOpCode { 198 public: LoopOpCode()199 LoopOpCode() : RegExpOpCode(OP_LOOP, RegExpOpCode::OP_SIZE_THIRTEEN) {} 200 uint32_t EmitOpCode(DynChunk *buf, uint32_t start, uint32_t min, uint32_t max) const; 201 ~LoopOpCode() override = default; 202 NO_COPY_SEMANTIC(LoopOpCode); 203 NO_MOVE_SEMANTIC(LoopOpCode); 204 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 205 }; 206 207 class LoopGreedyOpCode : public RegExpOpCode { 208 public: LoopGreedyOpCode()209 LoopGreedyOpCode() : RegExpOpCode(OP_LOOP_GREEDY, RegExpOpCode::OP_SIZE_THIRTEEN) {} 210 uint32_t EmitOpCode(DynChunk *buf, uint32_t start, uint32_t min, uint32_t max) const; 211 ~LoopGreedyOpCode() override = default; 212 NO_COPY_SEMANTIC(LoopGreedyOpCode); 213 NO_MOVE_SEMANTIC(LoopGreedyOpCode); 214 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 215 }; 216 217 class SaveResetOpCode : public RegExpOpCode { 218 public: SaveResetOpCode()219 SaveResetOpCode() : RegExpOpCode(OP_SAVE_RESET, RegExpOpCode::OP_SIZE_THREE) {} 220 uint32_t InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t start, uint32_t end) const; 221 ~SaveResetOpCode() override = default; 222 NO_COPY_SEMANTIC(SaveResetOpCode); 223 NO_MOVE_SEMANTIC(SaveResetOpCode); 224 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 225 }; 226 227 class MatchOpCode : public RegExpOpCode { 228 public: MatchOpCode()229 MatchOpCode() : RegExpOpCode(OP_MATCH, RegExpOpCode::OP_SIZE_ONE) {} 230 uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; 231 ~MatchOpCode() override = default; 232 NO_COPY_SEMANTIC(MatchOpCode); 233 NO_MOVE_SEMANTIC(MatchOpCode); 234 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 235 }; 236 237 class MatchEndOpCode : public RegExpOpCode { 238 public: MatchEndOpCode()239 MatchEndOpCode() : RegExpOpCode(OP_MATCH_END, RegExpOpCode::OP_SIZE_ONE) {} 240 uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; 241 ~MatchEndOpCode() override = default; 242 NO_COPY_SEMANTIC(MatchEndOpCode); 243 NO_MOVE_SEMANTIC(MatchEndOpCode); 244 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 245 }; 246 247 class LineStartOpCode : public RegExpOpCode { 248 public: LineStartOpCode()249 LineStartOpCode() : RegExpOpCode(OP_LINE_START, RegExpOpCode::OP_SIZE_ONE) {} 250 uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; 251 ~LineStartOpCode() override = default; 252 NO_COPY_SEMANTIC(LineStartOpCode); 253 NO_MOVE_SEMANTIC(LineStartOpCode); 254 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 255 }; 256 257 class LineEndOpCode : public RegExpOpCode { 258 public: LineEndOpCode()259 LineEndOpCode() : RegExpOpCode(OP_LINE_END, RegExpOpCode::OP_SIZE_ONE) {} 260 uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; 261 ~LineEndOpCode() override = default; 262 NO_COPY_SEMANTIC(LineEndOpCode); 263 NO_MOVE_SEMANTIC(LineEndOpCode); 264 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 265 }; 266 267 class WordBoundaryOpCode : public RegExpOpCode { 268 public: WordBoundaryOpCode()269 WordBoundaryOpCode() : RegExpOpCode(OP_WORD_BOUNDARY, RegExpOpCode::OP_SIZE_ONE) {} 270 uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; 271 ~WordBoundaryOpCode() override = default; 272 NO_COPY_SEMANTIC(WordBoundaryOpCode); 273 NO_MOVE_SEMANTIC(WordBoundaryOpCode); 274 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 275 }; 276 277 class NotWordBoundaryOpCode : public RegExpOpCode { 278 public: NotWordBoundaryOpCode()279 NotWordBoundaryOpCode() : RegExpOpCode(OP_NOT_WORD_BOUNDARY, RegExpOpCode::OP_SIZE_ONE) {} 280 uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; 281 ~NotWordBoundaryOpCode() override = default; 282 NO_COPY_SEMANTIC(NotWordBoundaryOpCode); 283 NO_MOVE_SEMANTIC(NotWordBoundaryOpCode); 284 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 285 }; 286 287 class AllOpCode : public RegExpOpCode { 288 public: AllOpCode()289 AllOpCode() : RegExpOpCode(OP_ALL, RegExpOpCode::OP_SIZE_ONE) {} 290 uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; 291 ~AllOpCode() override = default; 292 NO_COPY_SEMANTIC(AllOpCode); 293 NO_MOVE_SEMANTIC(AllOpCode); 294 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 295 }; 296 297 class DotsOpCode : public RegExpOpCode { 298 public: DotsOpCode()299 DotsOpCode() : RegExpOpCode(OP_DOTS, RegExpOpCode::OP_SIZE_ONE) {} 300 uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; 301 ~DotsOpCode() override = default; 302 NO_COPY_SEMANTIC(DotsOpCode); 303 NO_MOVE_SEMANTIC(DotsOpCode); 304 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 305 }; 306 307 class RangeSet { 308 public: 309 RangeSet() = default; RangeSet(uint32_t value)310 explicit RangeSet(uint32_t value) 311 { 312 Insert(value, value); 313 } RangeSet(uint32_t start,uint32_t end)314 RangeSet(uint32_t start, uint32_t end) 315 { 316 Insert(start, end); 317 } RangeSet(const std::list<std::pair<uint32_t,uint32_t>> & rangeSet)318 explicit RangeSet(const std::list<std::pair<uint32_t, uint32_t>> &rangeSet) 319 { 320 rangeSet_ = rangeSet; 321 } 322 ~RangeSet() = default; 323 IsIntersect(uint64_t start,uint64_t end,uint64_t start1,uint64_t end1)324 inline bool IsIntersect(uint64_t start, uint64_t end, uint64_t start1, uint64_t end1) const 325 { 326 return ((start1 > start) && (start1 < end)) || ((start > start1) && (start < end1)); 327 } IsAdjacent(uint64_t start,uint64_t end,uint64_t start1,uint64_t end1)328 inline bool IsAdjacent(uint64_t start, uint64_t end, uint64_t start1, uint64_t end1) const 329 { 330 return ((end == start1 || (end + 1) == start1)) || ((end1 == start) || (end1 + 1 == start)); 331 } 332 333 inline bool operator==(const RangeSet &other) const 334 { 335 return rangeSet_ == other.rangeSet_; 336 } 337 IsContain(uint32_t value)338 inline bool IsContain(uint32_t value) const 339 { 340 for (auto range : rangeSet_) { 341 if (value >= range.first && value <= range.second) { 342 return true; 343 } 344 } 345 return false; 346 } HighestValue()347 inline uint32_t HighestValue() const 348 { 349 if (!rangeSet_.empty()) { 350 return rangeSet_.back().second; 351 } 352 return 0; 353 } 354 RangeSet(RangeSet const &) = default; 355 RangeSet &operator=(RangeSet const &) = default; 356 RangeSet(RangeSet &&) = default; 357 RangeSet &operator=(RangeSet &&) = default; 358 359 void Insert(uint32_t start, uint32_t end); 360 void Insert(const RangeSet &s1); 361 void Invert(bool isUtf16); 362 void Inter(RangeSet &cr, const RangeSet &s1); 363 void Compress(); 364 365 private: 366 friend class RangeOpCode; 367 friend class Range32OpCode; 368 std::list<std::pair<uint32_t, uint32_t>> rangeSet_ {}; 369 }; 370 371 class RangeOpCode : public RegExpOpCode { 372 public: RangeOpCode()373 RangeOpCode() : RegExpOpCode(OP_RANGE, RegExpOpCode::OP_SIZE_ONE) {} 374 ~RangeOpCode() override = default; 375 NO_COPY_SEMANTIC(RangeOpCode); 376 NO_MOVE_SEMANTIC(RangeOpCode); 377 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 378 uint32_t InsertOpCode(DynChunk *buf, const RangeSet &rangeSet) const; 379 }; 380 381 class MatchAheadOpCode : public RegExpOpCode { 382 public: MatchAheadOpCode()383 MatchAheadOpCode() : RegExpOpCode(OP_MATCH_AHEAD, RegExpOpCode::OP_SIZE_FIVE) {} 384 ~MatchAheadOpCode() override = default; 385 NO_COPY_SEMANTIC(MatchAheadOpCode); 386 NO_MOVE_SEMANTIC(MatchAheadOpCode); 387 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 388 uint32_t InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const; 389 }; 390 391 class NegativeMatchAheadOpCode : public RegExpOpCode { 392 public: NegativeMatchAheadOpCode()393 NegativeMatchAheadOpCode() : RegExpOpCode(OP_NEGATIVE_MATCH_AHEAD, RegExpOpCode::OP_SIZE_FIVE) {} 394 uint32_t InsertOpCode(DynChunk *buf, uint32_t offset, uint32_t para) const; 395 ~NegativeMatchAheadOpCode() override = default; 396 NO_COPY_SEMANTIC(NegativeMatchAheadOpCode); 397 NO_MOVE_SEMANTIC(NegativeMatchAheadOpCode); 398 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 399 }; 400 401 class PrevOpCode : public RegExpOpCode { 402 public: PrevOpCode()403 PrevOpCode() : RegExpOpCode(OP_PREV, RegExpOpCode::OP_SIZE_ONE) {} 404 uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; 405 ~PrevOpCode() override = default; 406 NO_COPY_SEMANTIC(PrevOpCode); 407 NO_MOVE_SEMANTIC(PrevOpCode); 408 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 409 }; 410 411 class BackReferenceOpCode : public RegExpOpCode { 412 public: BackReferenceOpCode()413 BackReferenceOpCode() : RegExpOpCode(OP_BACKREFERENCE, RegExpOpCode::OP_SIZE_TWO) {} 414 uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; 415 ~BackReferenceOpCode() override = default; 416 NO_COPY_SEMANTIC(BackReferenceOpCode); 417 NO_MOVE_SEMANTIC(BackReferenceOpCode); 418 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 419 }; 420 421 class BackwardBackReferenceOpCode : public RegExpOpCode { 422 public: BackwardBackReferenceOpCode()423 BackwardBackReferenceOpCode() : RegExpOpCode(OP_BACKWARD_BACKREFERENCE, RegExpOpCode::OP_SIZE_TWO) {} 424 uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; 425 ~BackwardBackReferenceOpCode() override = default; 426 NO_COPY_SEMANTIC(BackwardBackReferenceOpCode); 427 NO_MOVE_SEMANTIC(BackwardBackReferenceOpCode); 428 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 429 }; 430 431 class Char32OpCode : public RegExpOpCode { 432 public: Char32OpCode()433 Char32OpCode() : RegExpOpCode(OP_CHAR32, RegExpOpCode::OP_SIZE_FIVE) {} 434 uint32_t EmitOpCode(DynChunk *buf, uint32_t para) const; 435 ~Char32OpCode() override = default; 436 NO_COPY_SEMANTIC(Char32OpCode); 437 NO_MOVE_SEMANTIC(Char32OpCode); 438 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 439 }; 440 441 class Range32OpCode : public RegExpOpCode { 442 public: Range32OpCode()443 Range32OpCode() : RegExpOpCode(OP_RANGE32, RegExpOpCode::OP_SIZE_ONE) {} 444 ~Range32OpCode() override = default; 445 NO_COPY_SEMANTIC(Range32OpCode); 446 NO_MOVE_SEMANTIC(Range32OpCode); 447 uint32_t DumpOpCode(std::ostream &out, const DynChunk &buf, uint32_t offset) const override; 448 uint32_t InsertOpCode(DynChunk *buf, const RangeSet &rangeSet) const; 449 }; 450 } // namespace ecmascript 451 } // namespace panda 452 #endif 453