1 /* 2 * Copyright (c) 2022 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef ECMASCRIPT_COMPILER_BYTECODE_INFO_COLLECTOR_H 17 #define ECMASCRIPT_COMPILER_BYTECODE_INFO_COLLECTOR_H 18 19 #include "ecmascript/jspandafile/js_pandafile.h" 20 #include "ecmascript/pgo_profiler/pgo_profiler_loader.h" 21 #include "libpandafile/bytecode_instruction-inl.h" 22 23 namespace panda::ecmascript::kungfu { 24 /* ts source code 25 * let a:number = 1; 26 * function f() { 27 * let b:number = 1; 28 * function g() { 29 * return a + b; 30 * } 31 * return g(); 32 * } 33 * 34 * The structure of Lexical Environment 35 * 36 * Lexical Environment Lexical Environment 37 * Global Environment of function f of function g 38 * +-------------------+ <----+ +-------------------+ <----+ +-------------------+ 39 * null <----| Outer Reference | +----| Outer Reference | +----| Outer Reference | 40 * +-------------------+ +-------------------+ +-------------------+ 41 * |Environment Recoder| |Environment Recoder| |Environment Recoder| 42 * +-------------------+ +-------------------+ +-------------------+ 43 * 44 * We only record the type of the variable in Environment Recoder. 45 * In the design of the Ark bytecode, if a method does not have any 46 * lex-env variable in its Lexical Environment, then there will be 47 * no EcmaOpcode::NEWLEXENV in it which leads to ARK runtime will 48 * not create a Lexical Environment when the method is executed. 49 * In order to simulate the state of the runtime as much as possible, 50 * a field named 'status' will be added into the class LexEnv to 51 * measure this state. Take the above code as an example, although in 52 * static analysis, we will create LexEnv for each method, only Lexenvs 53 * of global and function f will be created when methods are executed. 54 */ 55 56 enum class LexicalEnvStatus : uint8_t { 57 VIRTUAL_LEXENV, 58 REALITY_LEXENV 59 }; 60 61 class LexEnv { 62 public: 63 LexEnv() = default; 64 ~LexEnv() = default; 65 66 static constexpr uint32_t DEFAULT_ROOT = std::numeric_limits<uint32_t>::max(); 67 Inilialize(uint32_t outMethodId,uint32_t numOfLexVars,LexicalEnvStatus status)68 inline void Inilialize(uint32_t outMethodId, uint32_t numOfLexVars, LexicalEnvStatus status) 69 { 70 outerMethodId_ = outMethodId; 71 lexVarTypes_.resize(numOfLexVars, GateType::AnyType()); 72 status_ = status; 73 } 74 GetOutMethodId()75 inline uint32_t GetOutMethodId() const 76 { 77 return outerMethodId_; 78 } 79 GetLexEnvStatus()80 inline LexicalEnvStatus GetLexEnvStatus() const 81 { 82 return status_; 83 } 84 GetLexVarType(uint32_t slot)85 inline GateType GetLexVarType(uint32_t slot) const 86 { 87 if (slot < lexVarTypes_.size()) { 88 return lexVarTypes_[slot]; 89 } 90 return GateType::AnyType(); 91 } 92 SetLexVarType(uint32_t slot,const GateType & type)93 inline void SetLexVarType(uint32_t slot, const GateType &type) 94 { 95 if (slot < lexVarTypes_.size()) { 96 lexVarTypes_[slot] = type; 97 } 98 } 99 100 private: 101 uint32_t outerMethodId_ { DEFAULT_ROOT }; 102 std::vector<GateType> lexVarTypes_ {}; 103 LexicalEnvStatus status_ { LexicalEnvStatus::VIRTUAL_LEXENV }; 104 }; 105 106 // each method in the abc file corresponds to one MethodInfo and 107 // methods with the same instructions share one common MethodPcInfo 108 struct MethodPcInfo { 109 std::vector<const uint8_t*> pcOffsets {}; 110 uint32_t methodsSize {0}; 111 }; 112 113 class MethodInfo { 114 public: 115 explicit MethodInfo(uint32_t methodInfoIndex, uint32_t methodPcInfoIndex, uint32_t outMethodIdx, 116 uint32_t outMethodOffset = MethodInfo::DEFAULT_OUTMETHOD_OFFSET, uint32_t num = 0, 117 LexicalEnvStatus lexEnvStatus = LexicalEnvStatus::VIRTUAL_LEXENV) methodInfoIndex_(methodInfoIndex)118 : methodInfoIndex_(methodInfoIndex), methodPcInfoIndex_(methodPcInfoIndex), outerMethodId_(outMethodIdx), 119 outerMethodOffset_(outMethodOffset), numOfLexVars_(num), status_(lexEnvStatus) 120 { 121 } 122 123 ~MethodInfo() = default; 124 125 static constexpr uint32_t DEFAULT_OUTMETHOD_OFFSET = 0; 126 GetOutMethodId()127 inline uint32_t GetOutMethodId() const 128 { 129 return outerMethodId_; 130 } 131 SetOutMethodId(uint32_t outMethodId)132 inline uint32_t SetOutMethodId(uint32_t outMethodId) 133 { 134 return outerMethodId_ = outMethodId; 135 } 136 GetOutMethodOffset()137 inline uint32_t GetOutMethodOffset() const 138 { 139 return outerMethodOffset_; 140 } 141 SetOutMethodOffset(uint32_t outMethodOffset)142 inline uint32_t SetOutMethodOffset(uint32_t outMethodOffset) 143 { 144 return outerMethodOffset_ = outMethodOffset; 145 } 146 GetNumOfLexVars()147 inline uint32_t GetNumOfLexVars() const 148 { 149 return numOfLexVars_; 150 } 151 SetNumOfLexVars(uint32_t numOfLexVars)152 inline uint32_t SetNumOfLexVars(uint32_t numOfLexVars) 153 { 154 return numOfLexVars_ = numOfLexVars; 155 } 156 GetLexEnvStatus()157 inline LexicalEnvStatus GetLexEnvStatus() const 158 { 159 return status_; 160 } 161 SetLexEnvStatus(LexicalEnvStatus status)162 inline LexicalEnvStatus SetLexEnvStatus(LexicalEnvStatus status) 163 { 164 return status_ = status; 165 } 166 GetMethodPcInfoIndex()167 inline uint32_t GetMethodPcInfoIndex() const 168 { 169 return methodPcInfoIndex_; 170 } 171 SetMethodPcInfoIndex(uint32_t methodPcInfoIndex)172 inline uint32_t SetMethodPcInfoIndex(uint32_t methodPcInfoIndex) 173 { 174 return methodPcInfoIndex_ = methodPcInfoIndex; 175 } 176 GetMethodInfoIndex()177 inline uint32_t GetMethodInfoIndex() const 178 { 179 return methodInfoIndex_; 180 } 181 SetMethodInfoIndex(uint32_t methodInfoIndex)182 inline uint32_t SetMethodInfoIndex(uint32_t methodInfoIndex) 183 { 184 return methodInfoIndex_ = methodInfoIndex; 185 } 186 AddInnerMethod(uint32_t offset)187 inline void AddInnerMethod(uint32_t offset) 188 { 189 innerMethods_.emplace_back(offset); 190 } 191 GetInnerMethods()192 inline const std::vector<uint32_t> &GetInnerMethods() const 193 { 194 return innerMethods_; 195 } 196 IsPGO()197 bool IsPGO() const 198 { 199 return isPgoMarked_; 200 } 201 SetIsPGO(bool pgoMark)202 void SetIsPGO(bool pgoMark) 203 { 204 isPgoMarked_ = pgoMark; 205 } 206 IsCompiled()207 bool IsCompiled() const 208 { 209 return isCompiled_; 210 } 211 SetIsCompiled(bool isCompiled)212 void SetIsCompiled(bool isCompiled) 213 { 214 isCompiled_ = isCompiled; 215 } 216 217 private: 218 // used to record the index of the current MethodInfo to speed up the lookup of lexEnv 219 uint32_t methodInfoIndex_ { 0 }; 220 // used to obtain MethodPcInfo from the vector methodPcInfos of struct BCInfo 221 uint32_t methodPcInfoIndex_ { 0 }; 222 std::vector<uint32_t> innerMethods_ {}; 223 uint32_t outerMethodId_ { LexEnv::DEFAULT_ROOT }; 224 uint32_t outerMethodOffset_ { MethodInfo::DEFAULT_OUTMETHOD_OFFSET }; 225 uint32_t numOfLexVars_ { 0 }; 226 LexicalEnvStatus status_ { LexicalEnvStatus::VIRTUAL_LEXENV }; 227 bool isPgoMarked_ {false}; 228 bool isCompiled_ {false}; 229 }; 230 231 232 class ConstantPoolInfo { 233 public: 234 enum ItemType { 235 STRING = 0, 236 METHOD, 237 CLASS_LITERAL, 238 OBJECT_LITERAL, 239 ARRAY_LITERAL, 240 241 ITEM_TYPE_NUM, 242 ITEM_TYPE_FIRST = STRING, 243 ITEM_TYPE_LAST = ARRAY_LITERAL, 244 }; 245 246 struct ItemData { 247 uint32_t index {0}; 248 uint32_t outerMethodOffset {0}; 249 CString *recordName {nullptr}; 250 }; 251 252 // key:constantpool index, value:ItemData 253 using Item = std::unordered_map<uint32_t, ItemData>; 254 ConstantPoolInfo()255 ConstantPoolInfo() : items_(ItemType::ITEM_TYPE_NUM, Item{}) {} 256 GetCPItem(ItemType type)257 Item& GetCPItem(ItemType type) 258 { 259 ASSERT(ItemType::ITEM_TYPE_FIRST <= type && type <= ItemType::ITEM_TYPE_LAST); 260 return items_[type]; 261 } 262 263 void AddIndexToCPItem(ItemType type, uint32_t index, uint32_t methodOffset); 264 private: 265 std::vector<Item> items_; 266 }; 267 268 class BCInfo { 269 public: BCInfo(size_t maxAotMethodSize)270 explicit BCInfo(size_t maxAotMethodSize) 271 : maxMethodSize_(maxAotMethodSize) 272 { 273 } 274 GetMainMethodIndexes()275 std::vector<uint32_t>& GetMainMethodIndexes() 276 { 277 return mainMethodIndexes_; 278 } 279 GetRecordNames()280 std::vector<CString>& GetRecordNames() 281 { 282 return recordNames_; 283 } 284 GetMethodPcInfos()285 std::vector<MethodPcInfo>& GetMethodPcInfos() 286 { 287 return methodPcInfos_; 288 } 289 GetMethodList()290 std::unordered_map<uint32_t, MethodInfo>& GetMethodList() 291 { 292 return methodList_; 293 } 294 GetMaxMethodSize()295 size_t GetMaxMethodSize() const 296 { 297 return maxMethodSize_; 298 } 299 IsSkippedMethod(uint32_t methodOffset)300 bool IsSkippedMethod(uint32_t methodOffset) const 301 { 302 if (skippedMethods_.find(methodOffset) == skippedMethods_.end()) { 303 return false; 304 } 305 return true; 306 } 307 AddSkippedMethod(uint32_t methodOffset)308 void AddSkippedMethod(uint32_t methodOffset) 309 { 310 skippedMethods_.insert(methodOffset); 311 } 312 EraseSkippedMethod(uint32_t methodOffset)313 void EraseSkippedMethod(uint32_t methodOffset) 314 { 315 if (skippedMethods_.find(methodOffset) != skippedMethods_.end()) { 316 skippedMethods_.erase(methodOffset); 317 } 318 } 319 AddRecordName(const CString & recordName)320 void AddRecordName(const CString &recordName) 321 { 322 recordNames_.emplace_back(recordName); 323 } 324 GetRecordName(uint32_t index)325 CString GetRecordName(uint32_t index) const 326 { 327 return recordNames_[index]; 328 } 329 AddMethodOffsetToRecordName(uint32_t methodOffset,CString recordName)330 void AddMethodOffsetToRecordName(uint32_t methodOffset, CString recordName) 331 { 332 methodOffsetToRecordName_.emplace(methodOffset, recordName); 333 } 334 GetSkippedMethodSize()335 size_t GetSkippedMethodSize() const 336 { 337 return skippedMethods_.size(); 338 } 339 AddIndexToCPInfo(ConstantPoolInfo::ItemType type,uint32_t index,uint32_t methodOffset)340 void AddIndexToCPInfo(ConstantPoolInfo::ItemType type, uint32_t index, uint32_t methodOffset) 341 { 342 cpInfo_.AddIndexToCPItem(type, index, methodOffset); 343 } 344 345 template <class Callback> IterateConstantPoolInfo(ConstantPoolInfo::ItemType type,const Callback & cb)346 void IterateConstantPoolInfo(ConstantPoolInfo::ItemType type, const Callback &cb) 347 { 348 auto &item = cpInfo_.GetCPItem(type); 349 for (auto &iter : item) { 350 ConstantPoolInfo::ItemData &data = iter.second; 351 data.recordName = &methodOffsetToRecordName_[data.outerMethodOffset]; 352 cb(data); 353 } 354 } 355 GetDefineMethod(const uint32_t classLiteralOffset)356 uint32_t GetDefineMethod(const uint32_t classLiteralOffset) const 357 { 358 return classTypeLOffsetToDefMethod_.at(classLiteralOffset); 359 } 360 HasClassDefMethod(const uint32_t classLiteralOffset)361 bool HasClassDefMethod(const uint32_t classLiteralOffset) const 362 { 363 return classTypeLOffsetToDefMethod_.find(classLiteralOffset) != classTypeLOffsetToDefMethod_.end(); 364 } 365 SetClassTypeOffsetAndDefMethod(uint32_t classLiteralOffset,uint32_t methodOffset)366 void SetClassTypeOffsetAndDefMethod(uint32_t classLiteralOffset, uint32_t methodOffset) 367 { 368 if (classTypeLOffsetToDefMethod_.find(classLiteralOffset) == classTypeLOffsetToDefMethod_.end()) { 369 classTypeLOffsetToDefMethod_.emplace(classLiteralOffset, methodOffset); 370 } 371 } 372 private: 373 std::vector<uint32_t> mainMethodIndexes_ {}; 374 std::vector<CString> recordNames_ {}; 375 std::vector<MethodPcInfo> methodPcInfos_ {}; 376 std::unordered_map<uint32_t, MethodInfo> methodList_ {}; 377 std::unordered_map<uint32_t, CString> methodOffsetToRecordName_ {}; 378 std::set<uint32_t> skippedMethods_ {}; 379 ConstantPoolInfo cpInfo_; 380 size_t maxMethodSize_; 381 std::unordered_map<uint32_t, uint32_t> classTypeLOffsetToDefMethod_ {}; 382 }; 383 384 class LexEnvManager { 385 public: 386 explicit LexEnvManager(BCInfo &bcInfo); 387 ~LexEnvManager() = default; 388 NO_COPY_SEMANTIC(LexEnvManager); 389 NO_MOVE_SEMANTIC(LexEnvManager); 390 391 void SetLexEnvElementType(uint32_t methodId, uint32_t level, uint32_t slot, const GateType &type); 392 GateType GetLexEnvElementType(uint32_t methodId, uint32_t level, uint32_t slot) const; 393 394 private: 395 uint32_t GetTargetLexEnv(uint32_t methodId, uint32_t level) const; 396 GetOutMethodId(uint32_t methodId)397 inline uint32_t GetOutMethodId(uint32_t methodId) const 398 { 399 return lexEnvs_[methodId].GetOutMethodId(); 400 } 401 GetLexEnvStatus(uint32_t methodId)402 inline LexicalEnvStatus GetLexEnvStatus(uint32_t methodId) const 403 { 404 return lexEnvs_[methodId].GetLexEnvStatus(); 405 } 406 HasDefaultRoot(uint32_t methodId)407 inline bool HasDefaultRoot(uint32_t methodId) const 408 { 409 return GetOutMethodId(methodId) == LexEnv::DEFAULT_ROOT; 410 } 411 412 std::vector<LexEnv> lexEnvs_ {}; 413 }; 414 415 class BytecodeInfoCollector { 416 public: BytecodeInfoCollector(EcmaVM * vm,JSPandaFile * jsPandaFile,size_t maxAotMethodSize,bool enableCollectLiteralInfo)417 explicit BytecodeInfoCollector(EcmaVM *vm, JSPandaFile *jsPandaFile, 418 size_t maxAotMethodSize, bool enableCollectLiteralInfo) 419 : vm_(vm), jsPandaFile_(jsPandaFile), bytecodeInfo_(maxAotMethodSize), 420 enableCollectLiteralInfo_(enableCollectLiteralInfo) 421 { 422 ProcessClasses(); 423 } 424 ~BytecodeInfoCollector() = default; 425 NO_COPY_SEMANTIC(BytecodeInfoCollector); 426 NO_MOVE_SEMANTIC(BytecodeInfoCollector); 427 EnableCollectLiteralInfo()428 bool EnableCollectLiteralInfo() const 429 { 430 return enableCollectLiteralInfo_; 431 } 432 GetBytecodeInfo()433 BCInfo& GetBytecodeInfo() 434 { 435 return bytecodeInfo_; 436 } 437 IsSkippedMethod(uint32_t methodOffset)438 bool IsSkippedMethod(uint32_t methodOffset) const 439 { 440 return bytecodeInfo_.IsSkippedMethod(methodOffset); 441 } 442 GetJSPandaFile()443 const JSPandaFile* GetJSPandaFile() 444 { 445 return jsPandaFile_; 446 } 447 448 template <class Callback> IterateConstantPoolInfo(ConstantPoolInfo::ItemType type,const Callback & cb)449 void IterateConstantPoolInfo(ConstantPoolInfo::ItemType type, const Callback &cb) 450 { 451 bytecodeInfo_.IterateConstantPoolInfo(type, cb); 452 } 453 454 private: GetMethodInfoID()455 inline size_t GetMethodInfoID() 456 { 457 return methodInfoIndex_++; 458 } 459 AddConstantPoolIndexToBCInfo(ConstantPoolInfo::ItemType type,uint32_t index,uint32_t methodOffset)460 void AddConstantPoolIndexToBCInfo(ConstantPoolInfo::ItemType type, 461 uint32_t index, uint32_t methodOffset) 462 { 463 bytecodeInfo_.AddIndexToCPInfo(type, index, methodOffset); 464 } 465 GetClassName(const EntityId entityId)466 inline std::string GetClassName(const EntityId entityId) 467 { 468 std::string className(MethodLiteral::GetMethodName(jsPandaFile_, entityId)); 469 if (LIKELY(className.find('#') != std::string::npos)) { 470 size_t poiIndex = className.find_last_of('#'); 471 className = className.substr(poiIndex + 1); 472 } 473 return className; 474 } 475 476 const CString GetEntryFunName(const std::string_view &entryPoint) const; 477 void ProcessClasses(); 478 void CollectMethodPcsFromBC(const uint32_t insSz, const uint8_t *insArr, 479 const MethodLiteral *method, std::vector<std::string> &classNameVec); 480 void SetMethodPcInfoIndex(uint32_t methodOffset, const std::pair<size_t, uint32_t> &processedMethodInfo); 481 void CollectInnerMethods(const MethodLiteral *method, uint32_t innerMethodOffset); 482 void CollectInnerMethods(uint32_t methodId, uint32_t innerMethodOffset); 483 void CollectInnerMethodsFromLiteral(const MethodLiteral *method, uint64_t index); 484 void NewLexEnvWithSize(const MethodLiteral *method, uint64_t numOfLexVars); 485 void CollectInnerMethodsFromNewLiteral(const MethodLiteral *method, panda_file::File::EntityId literalId); 486 void CollectMethodInfoFromBC(const BytecodeInstruction &bcIns, const MethodLiteral *method, 487 std::vector<std::string> &classNameVec, int32_t bcIndex); 488 void CollectConstantPoolIndexInfoFromBC(const BytecodeInstruction &bcIns, const MethodLiteral *method); 489 void IterateLiteral(const MethodLiteral *method, std::vector<uint32_t> &classOffsetVector); 490 void CollectClassLiteralInfo(const MethodLiteral *method, const std::vector<std::string> &classNameVec); 491 492 EcmaVM *vm_; 493 JSPandaFile *jsPandaFile_ {nullptr}; 494 BCInfo bytecodeInfo_; 495 size_t methodInfoIndex_ {0}; 496 bool enableCollectLiteralInfo_ {false}; 497 std::set<int32_t> classDefBCIndexes_ {}; 498 }; 499 } // namespace panda::ecmascript::kungfu 500 #endif // ECMASCRIPT_COMPILER_BYTECODE_INFO_COLLECTOR_H 501