• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef ECMASCRIPT_COMPILER_BYTECODE_INFO_COLLECTOR_H
17 #define ECMASCRIPT_COMPILER_BYTECODE_INFO_COLLECTOR_H
18 
19 #include "ecmascript/jspandafile/js_pandafile.h"
20 #include "ecmascript/pgo_profiler/pgo_profiler_loader.h"
21 #include "libpandafile/bytecode_instruction-inl.h"
22 
23 namespace panda::ecmascript::kungfu {
24 /*    ts source code
25  *    let a:number = 1;
26  *    function f() {
27  *        let b:number = 1;
28  *        function g() {
29  *            return a + b;
30  *        }
31  *        return g();
32  *    }
33  *
34  *                                     The structure of Lexical Environment
35  *
36  *                                               Lexical Environment             Lexical Environment
37  *               Global Environment                 of function f                   of function g
38  *              +-------------------+ <----+    +-------------------+ <----+    +-------------------+
39  *    null <----|  Outer Reference  |      +----|  Outer Reference  |      +----|  Outer Reference  |
40  *              +-------------------+           +-------------------+           +-------------------+
41  *              |Environment Recoder|           |Environment Recoder|           |Environment Recoder|
42  *              +-------------------+           +-------------------+           +-------------------+
43  *
44  *    We only record the type of the variable in Environment Recoder.
45  *    In the design of the Ark bytecode, if a method does not have any
46  *    lex-env variable in its Lexical Environment, then there will be
47  *    no EcmaOpcode::NEWLEXENV in it which leads to ARK runtime will
48  *    not create a Lexical Environment when the method is executed.
49  *    In order to simulate the state of the runtime as much as possible,
50  *    a field named 'status' will be added into the class LexEnv to
51  *    measure this state. Take the above code as an example, although in
52  *    static analysis, we will create LexEnv for each method, only Lexenvs
53  *    of global and function f will be created when methods are executed.
54  */
55 
56 enum class LexicalEnvStatus : uint8_t {
57     VIRTUAL_LEXENV,
58     REALITY_LEXENV
59 };
60 
61 class LexEnv {
62 public:
63     LexEnv() = default;
64     ~LexEnv() = default;
65 
66     static constexpr uint32_t DEFAULT_ROOT = std::numeric_limits<uint32_t>::max();
67 
Inilialize(uint32_t outMethodId,uint32_t numOfLexVars,LexicalEnvStatus status)68     inline void Inilialize(uint32_t outMethodId, uint32_t numOfLexVars, LexicalEnvStatus status)
69     {
70         outerMethodId_ = outMethodId;
71         lexVarTypes_.resize(numOfLexVars, GateType::AnyType());
72         status_ = status;
73     }
74 
GetOutMethodId()75     inline uint32_t GetOutMethodId() const
76     {
77         return outerMethodId_;
78     }
79 
GetLexEnvStatus()80     inline LexicalEnvStatus GetLexEnvStatus() const
81     {
82         return status_;
83     }
84 
GetLexVarType(uint32_t slot)85     inline GateType GetLexVarType(uint32_t slot) const
86     {
87         if (slot < lexVarTypes_.size()) {
88             return lexVarTypes_[slot];
89         }
90         return GateType::AnyType();
91     }
92 
SetLexVarType(uint32_t slot,const GateType & type)93     inline void SetLexVarType(uint32_t slot, const GateType &type)
94     {
95         if (slot < lexVarTypes_.size()) {
96             lexVarTypes_[slot] = type;
97         }
98     }
99 
100 private:
101     uint32_t outerMethodId_ { DEFAULT_ROOT };
102     std::vector<GateType> lexVarTypes_ {};
103     LexicalEnvStatus status_ { LexicalEnvStatus::VIRTUAL_LEXENV };
104 };
105 
106 // each method in the abc file corresponds to one MethodInfo and
107 // methods with the same instructions share one common MethodPcInfo
108 struct MethodPcInfo {
109     std::vector<const uint8_t*> pcOffsets {};
110     uint32_t methodsSize {0};
111 };
112 
113 class MethodInfo {
114 public:
115     explicit MethodInfo(uint32_t methodInfoIndex, uint32_t methodPcInfoIndex, uint32_t outMethodIdx,
116                         uint32_t outMethodOffset = MethodInfo::DEFAULT_OUTMETHOD_OFFSET, uint32_t num = 0,
117                         LexicalEnvStatus lexEnvStatus = LexicalEnvStatus::VIRTUAL_LEXENV)
methodInfoIndex_(methodInfoIndex)118         : methodInfoIndex_(methodInfoIndex), methodPcInfoIndex_(methodPcInfoIndex), outerMethodId_(outMethodIdx),
119           outerMethodOffset_(outMethodOffset), numOfLexVars_(num), status_(lexEnvStatus)
120     {
121     }
122 
123     ~MethodInfo() = default;
124 
125     static constexpr uint32_t DEFAULT_OUTMETHOD_OFFSET = 0;
126 
GetOutMethodId()127     inline uint32_t GetOutMethodId() const
128     {
129         return outerMethodId_;
130     }
131 
SetOutMethodId(uint32_t outMethodId)132     inline uint32_t SetOutMethodId(uint32_t outMethodId)
133     {
134         return outerMethodId_ = outMethodId;
135     }
136 
GetOutMethodOffset()137     inline uint32_t GetOutMethodOffset() const
138     {
139         return outerMethodOffset_;
140     }
141 
SetOutMethodOffset(uint32_t outMethodOffset)142     inline uint32_t SetOutMethodOffset(uint32_t outMethodOffset)
143     {
144         return outerMethodOffset_ = outMethodOffset;
145     }
146 
GetNumOfLexVars()147     inline uint32_t GetNumOfLexVars() const
148     {
149         return numOfLexVars_;
150     }
151 
SetNumOfLexVars(uint32_t numOfLexVars)152     inline uint32_t SetNumOfLexVars(uint32_t numOfLexVars)
153     {
154         return numOfLexVars_ = numOfLexVars;
155     }
156 
GetLexEnvStatus()157     inline LexicalEnvStatus GetLexEnvStatus() const
158     {
159         return status_;
160     }
161 
SetLexEnvStatus(LexicalEnvStatus status)162     inline LexicalEnvStatus SetLexEnvStatus(LexicalEnvStatus status)
163     {
164         return status_ = status;
165     }
166 
GetMethodPcInfoIndex()167     inline uint32_t GetMethodPcInfoIndex() const
168     {
169         return methodPcInfoIndex_;
170     }
171 
SetMethodPcInfoIndex(uint32_t methodPcInfoIndex)172     inline uint32_t SetMethodPcInfoIndex(uint32_t methodPcInfoIndex)
173     {
174         return methodPcInfoIndex_ = methodPcInfoIndex;
175     }
176 
GetMethodInfoIndex()177     inline uint32_t GetMethodInfoIndex() const
178     {
179         return methodInfoIndex_;
180     }
181 
SetMethodInfoIndex(uint32_t methodInfoIndex)182     inline uint32_t SetMethodInfoIndex(uint32_t methodInfoIndex)
183     {
184         return methodInfoIndex_ = methodInfoIndex;
185     }
186 
AddInnerMethod(uint32_t offset)187     inline void AddInnerMethod(uint32_t offset)
188     {
189         innerMethods_.emplace_back(offset);
190     }
191 
GetInnerMethods()192     inline const std::vector<uint32_t> &GetInnerMethods() const
193     {
194         return innerMethods_;
195     }
196 
IsPGO()197     bool IsPGO() const
198     {
199         return isPgoMarked_;
200     }
201 
SetIsPGO(bool pgoMark)202     void SetIsPGO(bool pgoMark)
203     {
204         isPgoMarked_ = pgoMark;
205     }
206 
IsCompiled()207     bool IsCompiled() const
208     {
209         return isCompiled_;
210     }
211 
SetIsCompiled(bool isCompiled)212     void SetIsCompiled(bool isCompiled)
213     {
214         isCompiled_ = isCompiled;
215     }
216 
217 private:
218     // used to record the index of the current MethodInfo to speed up the lookup of lexEnv
219     uint32_t methodInfoIndex_ { 0 };
220     // used to obtain MethodPcInfo from the vector methodPcInfos of struct BCInfo
221     uint32_t methodPcInfoIndex_ { 0 };
222     std::vector<uint32_t> innerMethods_ {};
223     uint32_t outerMethodId_ { LexEnv::DEFAULT_ROOT };
224     uint32_t outerMethodOffset_ { MethodInfo::DEFAULT_OUTMETHOD_OFFSET };
225     uint32_t numOfLexVars_ { 0 };
226     LexicalEnvStatus status_ { LexicalEnvStatus::VIRTUAL_LEXENV };
227     bool isPgoMarked_ {false};
228     bool isCompiled_ {false};
229 };
230 
231 
232 class ConstantPoolInfo {
233 public:
234     enum ItemType {
235         STRING = 0,
236         METHOD,
237         CLASS_LITERAL,
238         OBJECT_LITERAL,
239         ARRAY_LITERAL,
240 
241         ITEM_TYPE_NUM,
242         ITEM_TYPE_FIRST = STRING,
243         ITEM_TYPE_LAST = ARRAY_LITERAL,
244     };
245 
246     struct ItemData {
247         uint32_t index {0};
248         uint32_t outerMethodOffset {0};
249         CString *recordName {nullptr};
250     };
251 
252     // key:constantpool index, value:ItemData
253     using Item = std::unordered_map<uint32_t, ItemData>;
254 
ConstantPoolInfo()255     ConstantPoolInfo() : items_(ItemType::ITEM_TYPE_NUM, Item{}) {}
256 
GetCPItem(ItemType type)257     Item& GetCPItem(ItemType type)
258     {
259         ASSERT(ItemType::ITEM_TYPE_FIRST <= type && type <= ItemType::ITEM_TYPE_LAST);
260         return items_[type];
261     }
262 
263     void AddIndexToCPItem(ItemType type, uint32_t index, uint32_t methodOffset);
264 private:
265     std::vector<Item> items_;
266 };
267 
268 class BCInfo {
269 public:
BCInfo(size_t maxAotMethodSize)270     explicit BCInfo(size_t maxAotMethodSize)
271         : maxMethodSize_(maxAotMethodSize)
272     {
273     }
274 
GetMainMethodIndexes()275     std::vector<uint32_t>& GetMainMethodIndexes()
276     {
277         return mainMethodIndexes_;
278     }
279 
GetRecordNames()280     std::vector<CString>& GetRecordNames()
281     {
282         return recordNames_;
283     }
284 
GetMethodPcInfos()285     std::vector<MethodPcInfo>& GetMethodPcInfos()
286     {
287         return methodPcInfos_;
288     }
289 
GetMethodList()290     std::unordered_map<uint32_t, MethodInfo>& GetMethodList()
291     {
292         return methodList_;
293     }
294 
GetMaxMethodSize()295     size_t GetMaxMethodSize() const
296     {
297         return maxMethodSize_;
298     }
299 
IsSkippedMethod(uint32_t methodOffset)300     bool IsSkippedMethod(uint32_t methodOffset) const
301     {
302         if (skippedMethods_.find(methodOffset) == skippedMethods_.end()) {
303             return false;
304         }
305         return true;
306     }
307 
AddSkippedMethod(uint32_t methodOffset)308     void AddSkippedMethod(uint32_t methodOffset)
309     {
310         skippedMethods_.insert(methodOffset);
311     }
312 
EraseSkippedMethod(uint32_t methodOffset)313     void EraseSkippedMethod(uint32_t methodOffset)
314     {
315         if (skippedMethods_.find(methodOffset) != skippedMethods_.end()) {
316             skippedMethods_.erase(methodOffset);
317         }
318     }
319 
AddRecordName(const CString & recordName)320     void AddRecordName(const CString &recordName)
321     {
322         recordNames_.emplace_back(recordName);
323     }
324 
GetRecordName(uint32_t index)325     CString GetRecordName(uint32_t index) const
326     {
327         return recordNames_[index];
328     }
329 
AddMethodOffsetToRecordName(uint32_t methodOffset,CString recordName)330     void AddMethodOffsetToRecordName(uint32_t methodOffset, CString recordName)
331     {
332         methodOffsetToRecordName_.emplace(methodOffset, recordName);
333     }
334 
GetSkippedMethodSize()335     size_t GetSkippedMethodSize() const
336     {
337         return skippedMethods_.size();
338     }
339 
AddIndexToCPInfo(ConstantPoolInfo::ItemType type,uint32_t index,uint32_t methodOffset)340     void AddIndexToCPInfo(ConstantPoolInfo::ItemType type, uint32_t index, uint32_t methodOffset)
341     {
342         cpInfo_.AddIndexToCPItem(type, index, methodOffset);
343     }
344 
345     template <class Callback>
IterateConstantPoolInfo(ConstantPoolInfo::ItemType type,const Callback & cb)346     void IterateConstantPoolInfo(ConstantPoolInfo::ItemType type, const Callback &cb)
347     {
348         auto &item = cpInfo_.GetCPItem(type);
349         for (auto &iter : item) {
350             ConstantPoolInfo::ItemData &data = iter.second;
351             data.recordName = &methodOffsetToRecordName_[data.outerMethodOffset];
352             cb(data);
353         }
354     }
355 
GetDefineMethod(const uint32_t classLiteralOffset)356     uint32_t GetDefineMethod(const uint32_t classLiteralOffset) const
357     {
358         return classTypeLOffsetToDefMethod_.at(classLiteralOffset);
359     }
360 
HasClassDefMethod(const uint32_t classLiteralOffset)361     bool HasClassDefMethod(const uint32_t classLiteralOffset) const
362     {
363         return classTypeLOffsetToDefMethod_.find(classLiteralOffset) != classTypeLOffsetToDefMethod_.end();
364     }
365 
SetClassTypeOffsetAndDefMethod(uint32_t classLiteralOffset,uint32_t methodOffset)366     void SetClassTypeOffsetAndDefMethod(uint32_t classLiteralOffset, uint32_t methodOffset)
367     {
368         if (classTypeLOffsetToDefMethod_.find(classLiteralOffset) == classTypeLOffsetToDefMethod_.end()) {
369             classTypeLOffsetToDefMethod_.emplace(classLiteralOffset, methodOffset);
370         }
371     }
372 private:
373     std::vector<uint32_t> mainMethodIndexes_ {};
374     std::vector<CString> recordNames_ {};
375     std::vector<MethodPcInfo> methodPcInfos_ {};
376     std::unordered_map<uint32_t, MethodInfo> methodList_ {};
377     std::unordered_map<uint32_t, CString> methodOffsetToRecordName_ {};
378     std::set<uint32_t> skippedMethods_ {};
379     ConstantPoolInfo cpInfo_;
380     size_t maxMethodSize_;
381     std::unordered_map<uint32_t, uint32_t> classTypeLOffsetToDefMethod_ {};
382 };
383 
384 class LexEnvManager {
385 public:
386     explicit LexEnvManager(BCInfo &bcInfo);
387     ~LexEnvManager() = default;
388     NO_COPY_SEMANTIC(LexEnvManager);
389     NO_MOVE_SEMANTIC(LexEnvManager);
390 
391     void SetLexEnvElementType(uint32_t methodId, uint32_t level, uint32_t slot, const GateType &type);
392     GateType GetLexEnvElementType(uint32_t methodId, uint32_t level, uint32_t slot) const;
393 
394 private:
395     uint32_t GetTargetLexEnv(uint32_t methodId, uint32_t level) const;
396 
GetOutMethodId(uint32_t methodId)397     inline uint32_t GetOutMethodId(uint32_t methodId) const
398     {
399         return lexEnvs_[methodId].GetOutMethodId();
400     }
401 
GetLexEnvStatus(uint32_t methodId)402     inline LexicalEnvStatus GetLexEnvStatus(uint32_t methodId) const
403     {
404         return lexEnvs_[methodId].GetLexEnvStatus();
405     }
406 
HasDefaultRoot(uint32_t methodId)407     inline bool HasDefaultRoot(uint32_t methodId) const
408     {
409         return GetOutMethodId(methodId) == LexEnv::DEFAULT_ROOT;
410     }
411 
412     std::vector<LexEnv> lexEnvs_ {};
413 };
414 
415 class BytecodeInfoCollector {
416 public:
BytecodeInfoCollector(EcmaVM * vm,JSPandaFile * jsPandaFile,size_t maxAotMethodSize,bool enableCollectLiteralInfo)417     explicit BytecodeInfoCollector(EcmaVM *vm, JSPandaFile *jsPandaFile,
418                                    size_t maxAotMethodSize, bool enableCollectLiteralInfo)
419         : vm_(vm), jsPandaFile_(jsPandaFile), bytecodeInfo_(maxAotMethodSize),
420           enableCollectLiteralInfo_(enableCollectLiteralInfo)
421     {
422         ProcessClasses();
423     }
424     ~BytecodeInfoCollector() = default;
425     NO_COPY_SEMANTIC(BytecodeInfoCollector);
426     NO_MOVE_SEMANTIC(BytecodeInfoCollector);
427 
EnableCollectLiteralInfo()428     bool EnableCollectLiteralInfo() const
429     {
430         return enableCollectLiteralInfo_;
431     }
432 
GetBytecodeInfo()433     BCInfo& GetBytecodeInfo()
434     {
435         return bytecodeInfo_;
436     }
437 
IsSkippedMethod(uint32_t methodOffset)438     bool IsSkippedMethod(uint32_t methodOffset) const
439     {
440         return bytecodeInfo_.IsSkippedMethod(methodOffset);
441     }
442 
GetJSPandaFile()443     const JSPandaFile* GetJSPandaFile()
444     {
445         return jsPandaFile_;
446     }
447 
448     template <class Callback>
IterateConstantPoolInfo(ConstantPoolInfo::ItemType type,const Callback & cb)449     void IterateConstantPoolInfo(ConstantPoolInfo::ItemType type, const Callback &cb)
450     {
451         bytecodeInfo_.IterateConstantPoolInfo(type, cb);
452     }
453 
454 private:
GetMethodInfoID()455     inline size_t GetMethodInfoID()
456     {
457         return methodInfoIndex_++;
458     }
459 
AddConstantPoolIndexToBCInfo(ConstantPoolInfo::ItemType type,uint32_t index,uint32_t methodOffset)460     void AddConstantPoolIndexToBCInfo(ConstantPoolInfo::ItemType type,
461                                       uint32_t index, uint32_t methodOffset)
462     {
463         bytecodeInfo_.AddIndexToCPInfo(type, index, methodOffset);
464     }
465 
GetClassName(const EntityId entityId)466     inline std::string GetClassName(const EntityId entityId)
467     {
468         std::string className(MethodLiteral::GetMethodName(jsPandaFile_, entityId));
469         if (LIKELY(className.find('#') != std::string::npos)) {
470             size_t poiIndex = className.find_last_of('#');
471             className = className.substr(poiIndex + 1);
472         }
473         return className;
474     }
475 
476     const CString GetEntryFunName(const std::string_view &entryPoint) const;
477     void ProcessClasses();
478     void CollectMethodPcsFromBC(const uint32_t insSz, const uint8_t *insArr,
479         const MethodLiteral *method, std::vector<std::string> &classNameVec);
480     void SetMethodPcInfoIndex(uint32_t methodOffset, const std::pair<size_t, uint32_t> &processedMethodInfo);
481     void CollectInnerMethods(const MethodLiteral *method, uint32_t innerMethodOffset);
482     void CollectInnerMethods(uint32_t methodId, uint32_t innerMethodOffset);
483     void CollectInnerMethodsFromLiteral(const MethodLiteral *method, uint64_t index);
484     void NewLexEnvWithSize(const MethodLiteral *method, uint64_t numOfLexVars);
485     void CollectInnerMethodsFromNewLiteral(const MethodLiteral *method, panda_file::File::EntityId literalId);
486     void CollectMethodInfoFromBC(const BytecodeInstruction &bcIns, const MethodLiteral *method,
487                                  std::vector<std::string> &classNameVec, int32_t bcIndex);
488     void CollectConstantPoolIndexInfoFromBC(const BytecodeInstruction &bcIns, const MethodLiteral *method);
489     void IterateLiteral(const MethodLiteral *method, std::vector<uint32_t> &classOffsetVector);
490     void CollectClassLiteralInfo(const MethodLiteral *method, const std::vector<std::string> &classNameVec);
491 
492     EcmaVM *vm_;
493     JSPandaFile *jsPandaFile_ {nullptr};
494     BCInfo bytecodeInfo_;
495     size_t methodInfoIndex_ {0};
496     bool enableCollectLiteralInfo_ {false};
497     std::set<int32_t> classDefBCIndexes_ {};
498 };
499 }  // namespace panda::ecmascript::kungfu
500 #endif  // ECMASCRIPT_COMPILER_BYTECODE_INFO_COLLECTOR_H
501