• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "disassembler.h"
17 #include "class_data_accessor.h"
18 #include "field_data_accessor.h"
19 #include "libpandafile/type_helper.h"
20 #include "mangling.h"
21 #include "utils/logger.h"
22 
23 #include <iomanip>
24 
25 #include "get_language_specific_metadata.inc"
26 
27 namespace ark::disasm {
28 
Disassemble(std::string_view filenameIn,const bool quiet,const bool skipStrings)29 void Disassembler::Disassemble(std::string_view filenameIn, const bool quiet, const bool skipStrings)
30 {
31     auto file = panda_file::File::Open(filenameIn);
32     if (file == nullptr) {
33         LOG(FATAL, DISASSEMBLER) << "> unable to open specified pandafile: <" << filenameIn << ">";
34     }
35 
36     Disassemble(file, quiet, skipStrings);
37 }
38 
Disassemble(const panda_file::File & file,const bool quiet,const bool skipStrings)39 void Disassembler::Disassemble(const panda_file::File &file, const bool quiet, const bool skipStrings)
40 {
41     SetFile(file);
42     DisassembleImpl(quiet, skipStrings);
43 }
44 
Disassemble(std::unique_ptr<const panda_file::File> & file,const bool quiet,const bool skipStrings)45 void Disassembler::Disassemble(std::unique_ptr<const panda_file::File> &file, const bool quiet, const bool skipStrings)
46 {
47     SetFile(file);
48     DisassembleImpl(quiet, skipStrings);
49 }
50 
DisassembleImpl(const bool quiet,const bool skipStrings)51 void Disassembler::DisassembleImpl(const bool quiet, const bool skipStrings)
52 {
53     prog_ = pandasm::Program {};
54 
55     recordNameToId_.clear();
56     methodNameToId_.clear();
57 
58     skipStrings_ = skipStrings;
59     quiet_ = quiet;
60 
61     progInfo_ = ProgInfo {};
62 
63     progAnn_ = ProgAnnotations {};
64 
65     GetLiteralArrays();
66     GetRecords();
67 
68     AddExternalFieldsToRecords();
69     GetLanguageSpecificMetadata();
70 }
71 
SetFile(std::unique_ptr<const panda_file::File> & file)72 void Disassembler::SetFile(std::unique_ptr<const panda_file::File> &file)
73 {
74     fileHolder_.swap(file);
75     file_ = fileHolder_.get();
76 }
77 
SetFile(const panda_file::File & file)78 void Disassembler::SetFile(const panda_file::File &file)
79 {
80     fileHolder_.reset();
81     file_ = &file;
82 }
83 
SetProfile(std::string_view fname)84 void Disassembler::SetProfile(std::string_view fname)
85 {
86     std::ifstream stm(fname.data(), std::ios::binary);
87     if (!stm.is_open()) {
88         LOG(FATAL, DISASSEMBLER) << "Cannot open profile file";
89     }
90 
91     auto res = profiling::ReadProfile(stm, fileLanguage_);
92     if (!res) {
93         LOG(FATAL, DISASSEMBLER) << "Failed to deserialize: " << res.Error();
94     }
95     profile_ = res.Value();
96 }
97 
GetInsInfo(panda_file::MethodDataAccessor & mda,const panda_file::File::EntityId & codeId,MethodInfo * methodInfo) const98 void Disassembler::GetInsInfo(panda_file::MethodDataAccessor &mda, const panda_file::File::EntityId &codeId,
99                               MethodInfo *methodInfo /* out */) const
100 {
101     const static size_t FORMAT_WIDTH = 20;
102     const static size_t INSTRUCTION_WIDTH = 2;
103 
104     panda_file::CodeDataAccessor codeAccessor(*file_, codeId);
105 
106     std::string methodName = mda.GetFullName();
107     auto prof = profiling::INVALID_PROFILE;
108     if (profile_ != profiling::INVALID_PROFILE) {
109         prof = profiling::FindMethodInProfile(profile_, fileLanguage_, methodName);
110     }
111 
112     auto insSz = codeAccessor.GetCodeSize();
113     auto insArr = codeAccessor.GetInstructions();
114 
115     auto bcIns = BytecodeInstruction(insArr);
116     auto bcInsLast = bcIns.JumpTo(insSz);
117 
118     while (bcIns.GetAddress() != bcInsLast.GetAddress()) {
119         std::stringstream ss;
120 
121         uintptr_t bc = bcIns.GetAddress() - BytecodeInstruction(insArr).GetAddress();
122         ss << "offset: 0x" << std::setfill('0') << std::setw(4U) << std::hex << bc;
123         ss << ", " << std::setfill('.');
124 
125         BytecodeInstruction::Format format = bcIns.GetFormat();
126 
127         auto formatStr = std::string("[") + BytecodeInstruction::GetFormatString(format) + ']';
128         ss << std::setw(FORMAT_WIDTH) << std::left << formatStr;
129 
130         ss << "[";
131 
132         const uint8_t *pc = bcIns.GetAddress();
133         const size_t sz = bcIns.GetSize();
134 
135         for (size_t i = 0; i < sz; i++) {
136             ss << "0x" << std::setw(INSTRUCTION_WIDTH) << std::setfill('0') << std::right << std::hex
137                << static_cast<int>(pc[i]);  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
138 
139             if (i != sz - 1) {
140                 ss << " ";
141             }
142         }
143 
144         ss << "]";
145 
146         if (profile_ != profiling::INVALID_PROFILE && prof != profiling::INVALID_PROFILE) {
147             auto profId = bcIns.GetProfileId();
148             if (profId != -1) {
149                 ss << ", Profile: ";
150                 profiling::DumpProfile(prof, fileLanguage_, &bcIns, ss);
151             }
152         }
153 
154         methodInfo->instructionsInfo.push_back(ss.str());
155 
156         bcIns = bcIns.GetNext();
157     }
158 }
159 
CollectInfo()160 void Disassembler::CollectInfo()
161 {
162     LOG(DEBUG, DISASSEMBLER) << "\n[getting program info]\n";
163 
164     debugInfoExtractor_ = std::make_unique<panda_file::DebugInfoExtractor>(file_);
165 
166     for (const auto &pair : recordNameToId_) {
167         GetRecordInfo(pair.second, &progInfo_.recordsInfo[pair.first]);
168     }
169 
170     for (const auto &pair : methodNameToId_) {
171         GetMethodInfo(pair.second, &progInfo_.methodsInfo[pair.first]);
172     }
173 
174     AddExternalFieldsInfoToRecords();
175 }
176 
Serialize(std::ostream & os,bool addSeparators,bool printInformation) const177 void Disassembler::Serialize(std::ostream &os, bool addSeparators, bool printInformation) const
178 {
179     if (os.bad()) {
180         LOG(DEBUG, DISASSEMBLER) << "> serialization failed. os bad\n";
181 
182         return;
183     }
184 
185     SerializeFilename(os);
186     SerializeLanguage(os);
187     SerializeLitArrays(os, addSeparators);
188     SerializeRecords(os, addSeparators, printInformation);
189     SerializeMethods(os, addSeparators, printInformation);
190 }
191 
SerializePrintStartInfo(const pandasm::Function & method,std::ostringstream & headerSs) const192 void Disassembler::SerializePrintStartInfo(const pandasm::Function &method, std::ostringstream &headerSs) const
193 {
194     headerSs << ".function " << method.returnType.GetPandasmName() << " " << method.name << "(";
195 
196     if (!method.params.empty()) {
197         headerSs << method.params[0].type.GetPandasmName() << " a0";
198 
199         for (size_t i = 1; i < method.params.size(); i++) {
200             headerSs << ", " << method.params[i].type.GetPandasmName() << " a" << (size_t)i;
201         }
202     }
203     headerSs << ")";
204 }
205 
SerializeCheckEnd(const pandasm::Function & method,std::ostream & os,bool printMethodInfo,const MethodInfo * & methodInfo) const206 void Disassembler::SerializeCheckEnd(const pandasm::Function &method, std::ostream &os, bool printMethodInfo,
207                                      const MethodInfo *&methodInfo) const
208 {
209     if (!method.catchBlocks.empty()) {
210         os << "\n";
211 
212         for (const auto &catchBlock : method.catchBlocks) {
213             Serialize(catchBlock, os);
214             os << "\n";
215         }
216     }
217 
218     if (printMethodInfo) {
219         ASSERT(methodInfo != nullptr);
220         SerializeLineNumberTable(methodInfo->lineNumberTable, os);
221         SerializeLocalVariableTable(methodInfo->localVariableTable, method, os);
222     }
223 
224     os << "}\n\n";
225 }
226 
SerializeIfPrintMethodInfo(const pandasm::Function & method,bool printMethodInfo,std::ostringstream & headerSs,const MethodInfo * & methodInfo,std::map<std::string,ark::disasm::MethodInfo>::const_iterator & methodInfoIt) const227 size_t Disassembler::SerializeIfPrintMethodInfo(
228     const pandasm::Function &method, bool printMethodInfo, std::ostringstream &headerSs, const MethodInfo *&methodInfo,
229     std::map<std::string, ark::disasm::MethodInfo>::const_iterator &methodInfoIt) const
230 {
231     size_t width = 0;
232     if (printMethodInfo) {
233         methodInfo = &methodInfoIt->second;
234 
235         for (const auto &i : method.ins) {
236             if (i.ToString().size() > width) {
237                 width = i.ToString().size();
238             }
239         }
240 
241         headerSs << " # " << methodInfo->methodInfo << "\n#   CODE:";
242     }
243 
244     headerSs << "\n";
245     return width;
246 }
247 
Serialize(const pandasm::Function & method,std::ostream & os,bool printInformation,panda_file::LineNumberTable * lineTable) const248 void Disassembler::Serialize(const pandasm::Function &method, std::ostream &os, bool printInformation,
249                              panda_file::LineNumberTable *lineTable) const
250 {
251     std::ostringstream headerSs;
252     SerializePrintStartInfo(method, headerSs);
253     const std::string signature = pandasm::GetFunctionSignatureFromName(method.name, method.params);
254     const auto methodIter = progAnn_.methodAnnotations.find(signature);
255     if (methodIter != progAnn_.methodAnnotations.end()) {
256         Serialize(*method.metadata, methodIter->second, headerSs);
257     } else {
258         Serialize(*method.metadata, {}, headerSs);
259     }
260 
261     if (!method.HasImplementation()) {
262         headerSs << "\n\n";
263         os << headerSs.str();
264         return;
265     }
266 
267     headerSs << " {";
268 
269     const MethodInfo *methodInfo = nullptr;
270     auto methodInfoIt = progInfo_.methodsInfo.find(signature);
271     bool printMethodInfo = printInformation && methodInfoIt != progInfo_.methodsInfo.end();
272     size_t width = SerializeIfPrintMethodInfo(method, printMethodInfo, headerSs, methodInfo, methodInfoIt);
273 
274     auto headerSsStr = headerSs.str();
275     size_t lineNumber = std::count(headerSsStr.begin(), headerSsStr.end(), '\n') + 1;
276 
277     os << headerSsStr;
278 
279     for (size_t i = 0; i < method.ins.size(); i++) {
280         std::ostringstream insSs;
281 
282         std::string ins = method.ins[i].ToString("", method.GetParamsNum() != 0, method.regsNum);
283         if (method.ins[i].setLabel) {
284             insSs << ins.substr(0, ins.find(": ")) << ":\n";
285             ins.erase(0, ins.find(": ") + std::string(": ").length());
286         }
287 
288         insSs << "\t";
289         if (printMethodInfo) {
290             insSs << std::setw(width) << std::left;
291         }
292         insSs << ins;
293         if (printMethodInfo) {
294             ASSERT(methodInfo != nullptr);
295             insSs << " # " << methodInfo->instructionsInfo[i];
296         }
297         insSs << "\n";
298 
299         auto insSsStr = insSs.str();
300         lineNumber += std::count(insSsStr.begin(), insSsStr.end(), '\n');
301 
302         if (lineTable != nullptr) {
303             lineTable->emplace_back(
304                 panda_file::LineTableEntry {static_cast<uint32_t>(method.ins[i].insDebug.boundLeft), lineNumber - 1});
305         }
306 
307         os << insSsStr;
308     }
309 
310     SerializeCheckEnd(method, os, printMethodInfo, methodInfo);
311 }
312 
IsSystemType(const std::string & typeName)313 inline bool Disassembler::IsSystemType(const std::string &typeName)
314 {
315     bool isArrayType = typeName.back() == ']';
316     bool isGlobal = typeName == "_GLOBAL";
317 
318     return isArrayType || isGlobal;
319 }
320 
GetRecord(pandasm::Record & record,const panda_file::File::EntityId & recordId)321 void Disassembler::GetRecord(pandasm::Record &record, const panda_file::File::EntityId &recordId)
322 {
323     LOG(DEBUG, DISASSEMBLER) << "\n[getting record]\nid: " << recordId << " (0x" << std::hex << recordId << ")";
324 
325     record.name = GetFullRecordName(recordId);
326 
327     LOG(DEBUG, DISASSEMBLER) << "name: " << record.name;
328 
329     GetMetaData(&record, recordId);
330 
331     if (!file_->IsExternal(recordId)) {
332         GetMethods(recordId);
333         GetFields(record, recordId);
334     }
335 }
336 
AddMethodToTables(const panda_file::File::EntityId & methodId)337 void Disassembler::AddMethodToTables(const panda_file::File::EntityId &methodId)
338 {
339     pandasm::Function newMethod("", fileLanguage_);
340     GetMethod(&newMethod, methodId);
341 
342     const auto signature = pandasm::GetFunctionSignatureFromName(newMethod.name, newMethod.params);
343     if (prog_.functionTable.find(signature) != prog_.functionTable.end()) {
344         return;
345     }
346 
347     methodNameToId_.emplace(signature, methodId);
348     prog_.functionSynonyms[newMethod.name].push_back(signature);
349     prog_.functionTable.emplace(signature, std::move(newMethod));
350 }
351 
GetMethod(pandasm::Function * method,const panda_file::File::EntityId & methodId)352 void Disassembler::GetMethod(pandasm::Function *method, const panda_file::File::EntityId &methodId)
353 {
354     LOG(DEBUG, DISASSEMBLER) << "\n[getting method]\nid: " << methodId << " (0x" << std::hex << methodId << ")";
355 
356     if (method == nullptr) {
357         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
358 
359         return;
360     }
361 
362     panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
363 
364     method->name = GetFullMethodName(methodId);
365 
366     LOG(DEBUG, DISASSEMBLER) << "name: " << method->name;
367 
368     GetParams(method, methodAccessor.GetProtoId());
369     GetMetaData(method, methodId);
370 
371     if (!method->HasImplementation()) {
372         return;
373     }
374 
375     if (methodAccessor.GetCodeId().has_value()) {
376         const IdList idList = GetInstructions(method, methodId, methodAccessor.GetCodeId().value());
377 
378         for (const auto &id : idList) {
379             AddMethodToTables(id);
380         }
381     } else {
382         LOG(ERROR, DISASSEMBLER) << "> error encountered at " << methodId << " (0x" << std::hex << methodId
383                                  << "). implementation of method expected, but no \'CODE\' tag was found!";
384 
385         return;
386     }
387 }
388 
389 template <typename T>
FillLiteralArrayData(pandasm::LiteralArray * litArray,const panda_file::LiteralTag & tag,const panda_file::LiteralDataAccessor::LiteralValue & value) const390 void Disassembler::FillLiteralArrayData(pandasm::LiteralArray *litArray, const panda_file::LiteralTag &tag,
391                                         const panda_file::LiteralDataAccessor::LiteralValue &value) const
392 {
393     panda_file::File::EntityId id(std::get<uint32_t>(value));
394     auto sp = file_->GetSpanFromId(id);
395     auto len = panda_file::helpers::Read<sizeof(uint32_t)>(&sp);
396     if (tag != panda_file::LiteralTag::ARRAY_STRING) {
397         for (size_t i = 0; i < len; i++) {
398             pandasm::LiteralArray::Literal lit;
399             lit.tag = tag;
400             lit.value = bit_cast<T>(panda_file::helpers::Read<sizeof(T)>(&sp));
401             litArray->literals.push_back(lit);
402         }
403     } else {
404         for (size_t i = 0; i < len; i++) {
405             auto strId = panda_file::helpers::Read<sizeof(T)>(&sp);
406             pandasm::LiteralArray::Literal lit;
407             lit.tag = tag;
408             lit.value = StringDataToString(file_->GetStringData(panda_file::File::EntityId(strId)));
409             litArray->literals.push_back(lit);
410         }
411     }
412 }
413 
FillLiteralData(pandasm::LiteralArray * litArray,const panda_file::LiteralDataAccessor::LiteralValue & value,const panda_file::LiteralTag & tag) const414 void Disassembler::FillLiteralData(pandasm::LiteralArray *litArray,
415                                    const panda_file::LiteralDataAccessor::LiteralValue &value,
416                                    const panda_file::LiteralTag &tag) const
417 {
418     pandasm::LiteralArray::Literal lit;
419     lit.tag = tag;
420     switch (tag) {
421         case panda_file::LiteralTag::BOOL: {
422             lit.value = std::get<bool>(value);
423             break;
424         }
425         case panda_file::LiteralTag::ACCESSOR:
426         case panda_file::LiteralTag::NULLVALUE: {
427             lit.value = std::get<uint8_t>(value);
428             break;
429         }
430         case panda_file::LiteralTag::METHODAFFILIATE: {
431             lit.value = std::get<uint16_t>(value);
432             break;
433         }
434         case panda_file::LiteralTag::INTEGER: {
435             lit.value = std::get<uint32_t>(value);
436             break;
437         }
438         case panda_file::LiteralTag::BIGINT: {
439             lit.value = std::get<uint64_t>(value);
440             break;
441         }
442         case panda_file::LiteralTag::FLOAT: {
443             lit.value = std::get<float>(value);
444             break;
445         }
446         case panda_file::LiteralTag::DOUBLE: {
447             lit.value = std::get<double>(value);
448             break;
449         }
450         case panda_file::LiteralTag::STRING:
451         case panda_file::LiteralTag::METHOD:
452         case panda_file::LiteralTag::GENERATORMETHOD: {
453             auto strData = file_->GetStringData(panda_file::File::EntityId(std::get<uint32_t>(value)));
454             lit.value = StringDataToString(strData);
455             break;
456         }
457         case panda_file::LiteralTag::TAGVALUE: {
458             return;
459         }
460         default: {
461             LOG(ERROR, DISASSEMBLER) << "Unsupported literal with tag 0x" << std::hex << static_cast<uint32_t>(tag);
462             UNREACHABLE();
463         }
464     }
465     litArray->literals.push_back(lit);
466 }
467 
GetLiteralArray(pandasm::LiteralArray * litArray,const size_t index)468 void Disassembler::GetLiteralArray(pandasm::LiteralArray *litArray, const size_t index)
469 {
470     LOG(DEBUG, DISASSEMBLER) << "\n[getting literal array]\nindex: " << index;
471 
472     panda_file::LiteralDataAccessor litArrayAccessor(*file_, file_->GetLiteralArraysId());
473 
474     // clang-format off
475     litArrayAccessor.EnumerateLiteralVals(index,
476                                           [this, litArray](const panda_file::LiteralDataAccessor::LiteralValue &value,
477                                                            const panda_file::LiteralTag &tag) {
478                                             switch (tag) {
479                                                 case panda_file::LiteralTag::ARRAY_U1: {
480                                                     FillLiteralArrayData<bool>(litArray, tag, value);
481                                                     break;
482                                                 }
483                                                 case panda_file::LiteralTag::ARRAY_I8:
484                                                 case panda_file::LiteralTag::ARRAY_U8: {
485                                                     FillLiteralArrayData<uint8_t>(litArray, tag, value);
486                                                     break;
487                                                 }
488                                                 case panda_file::LiteralTag::ARRAY_I16:
489                                                 case panda_file::LiteralTag::ARRAY_U16: {
490                                                     FillLiteralArrayData<uint16_t>(litArray, tag, value);
491                                                     break;
492                                                 }
493                                                 case panda_file::LiteralTag::ARRAY_I32:
494                                                 case panda_file::LiteralTag::ARRAY_U32: {
495                                                     FillLiteralArrayData<uint32_t>(litArray, tag, value);
496                                                     break;
497                                                 }
498                                                 case panda_file::LiteralTag::ARRAY_I64:
499                                                 case panda_file::LiteralTag::ARRAY_U64: {
500                                                     FillLiteralArrayData<uint64_t>(litArray, tag, value);
501                                                     break;
502                                                 }
503                                                 case panda_file::LiteralTag::ARRAY_F32: {
504                                                     FillLiteralArrayData<float>(litArray, tag, value);
505                                                     break;
506                                                 }
507                                                 case panda_file::LiteralTag::ARRAY_F64: {
508                                                     FillLiteralArrayData<double>(litArray, tag, value);
509                                                     break;
510                                                 }
511                                                 case panda_file::LiteralTag::ARRAY_STRING: {
512                                                     FillLiteralArrayData<uint32_t>(litArray, tag, value);
513                                                     break;
514                                                 }
515                                                 default: {
516                                                     FillLiteralData(litArray, value, tag);
517                                                 }
518                                             }
519                                         });
520     // clang-format on
521 }
522 
GetLiteralArrays()523 void Disassembler::GetLiteralArrays()
524 {
525     const auto litArraysId = file_->GetLiteralArraysId();
526 
527     LOG(DEBUG, DISASSEMBLER) << "\n[getting literal arrays]\nid: " << litArraysId << " (0x" << std::hex << litArraysId
528                              << ")";
529 
530     panda_file::LiteralDataAccessor litArrayAccessor(*file_, litArraysId);
531     size_t numLitarrays = litArrayAccessor.GetLiteralNum();
532     for (size_t index = 0; index < numLitarrays; index++) {
533         ark::pandasm::LiteralArray litAr;
534         GetLiteralArray(&litAr, index);
535         prog_.literalarrayTable.emplace(std::to_string(index), litAr);
536     }
537 }
538 
GetRecords()539 void Disassembler::GetRecords()
540 {
541     LOG(DEBUG, DISASSEMBLER) << "\n[getting records]\n";
542 
543     const auto classIdx = file_->GetClasses();
544 
545     for (size_t i = 0; i < classIdx.size(); i++) {
546         uint32_t classId = classIdx[i];
547         auto classOff = file_->GetHeader()->classIdxOff + sizeof(uint32_t) * i;
548 
549         if (classId > file_->GetHeader()->fileSize) {
550             LOG(ERROR, DISASSEMBLER) << "> error encountered in record at " << classOff << " (0x" << std::hex
551                                      << classOff << "). binary file corrupted. record offset (0x" << classId
552                                      << ") out of bounds (0x" << file_->GetHeader()->fileSize << ")!";
553             break;
554         }
555 
556         const panda_file::File::EntityId recordId {classId};
557         auto language = GetRecordLanguage(recordId);
558         if (language != fileLanguage_) {
559             if (fileLanguage_ == panda_file::SourceLang::PANDA_ASSEMBLY) {
560                 fileLanguage_ = language;
561             } else if (language != panda_file::SourceLang::PANDA_ASSEMBLY) {
562                 LOG(ERROR, DISASSEMBLER) << "> possible error encountered in record at" << classOff << " (0x"
563                                          << std::hex << classOff << "). record's language  ("
564                                          << panda_file::LanguageToString(language)
565                                          << ")  differs from file's language ("
566                                          << panda_file::LanguageToString(fileLanguage_) << ")!";
567             }
568         }
569 
570         pandasm::Record record("", fileLanguage_);
571         GetRecord(record, recordId);
572 
573         if (prog_.recordTable.find(record.name) == prog_.recordTable.end()) {
574             recordNameToId_.emplace(record.name, recordId);
575             prog_.recordTable.emplace(record.name, std::move(record));
576         }
577     }
578 }
579 
GetField(pandasm::Field & field,const panda_file::FieldDataAccessor & fieldAccessor)580 void Disassembler::GetField(pandasm::Field &field, const panda_file::FieldDataAccessor &fieldAccessor)
581 {
582     panda_file::File::EntityId fieldNameId = fieldAccessor.GetNameId();
583     field.name = StringDataToString(file_->GetStringData(fieldNameId));
584 
585     uint32_t fieldType = fieldAccessor.GetType();
586     field.type = FieldTypeToPandasmType(fieldType);
587 
588     GetMetaData(&field, fieldAccessor.GetFieldId());
589 }
590 
GetFields(pandasm::Record & record,const panda_file::File::EntityId & recordId)591 void Disassembler::GetFields(pandasm::Record &record, const panda_file::File::EntityId &recordId)
592 {
593     panda_file::ClassDataAccessor classAccessor {*file_, recordId};
594 
595     classAccessor.EnumerateFields([&](panda_file::FieldDataAccessor &fieldAccessor) -> void {
596         pandasm::Field field(fileLanguage_);
597 
598         GetField(field, fieldAccessor);
599 
600         record.fieldList.push_back(std::move(field));
601     });
602 }
603 
AddExternalFieldsToRecords()604 void Disassembler::AddExternalFieldsToRecords()
605 {
606     for (auto &[recordName, record] : prog_.recordTable) {
607         auto iter = externalFieldTable_.find(recordName);
608         if (iter == externalFieldTable_.end() || iter->second.empty()) {
609             continue;
610         }
611         for (auto &fieldIter : iter->second) {
612             record.fieldList.push_back(std::move(fieldIter));
613         }
614         externalFieldTable_.erase(recordName);
615     }
616 }
617 
AddExternalFieldsInfoToRecords()618 void Disassembler::AddExternalFieldsInfoToRecords()
619 {
620     for (auto &[recordName, recordInfo] : progInfo_.recordsInfo) {
621         auto iter = externalFieldsInfoTable_.find(recordName);
622         if (iter == externalFieldsInfoTable_.end() || iter->second.empty()) {
623             continue;
624         }
625         for (auto &info : iter->second) {
626             recordInfo.fieldsInfo.push_back(std::move(info));
627         }
628         externalFieldsInfoTable_.erase(recordName);
629     }
630 }
631 
GetMethods(const panda_file::File::EntityId & recordId)632 void Disassembler::GetMethods(const panda_file::File::EntityId &recordId)
633 {
634     panda_file::ClassDataAccessor classAccessor {*file_, recordId};
635 
636     classAccessor.EnumerateMethods([&](panda_file::MethodDataAccessor &methodAccessor) -> void {
637         AddMethodToTables(methodAccessor.GetMethodId());
638     });
639 }
640 
GetParams(pandasm::Function * method,const panda_file::File::EntityId & protoId) const641 void Disassembler::GetParams(pandasm::Function *method, const panda_file::File::EntityId &protoId) const
642 {
643     /// frame size - 2^16 - 1
644     static const uint32_t MAX_ARG_NUM = 0xFFFF;
645 
646     LOG(DEBUG, DISASSEMBLER) << "[getting params]\nproto id: " << protoId << " (0x" << std::hex << protoId << ")";
647 
648     if (method == nullptr) {
649         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
650 
651         return;
652     }
653 
654     panda_file::ProtoDataAccessor protoAccessor(*file_, protoId);
655 
656     if (protoAccessor.GetNumArgs() > MAX_ARG_NUM) {
657         LOG(ERROR, DISASSEMBLER) << "> error encountered at " << protoId << " (0x" << std::hex << protoId
658                                  << "). number of function's arguments (" << std::dec << protoAccessor.GetNumArgs()
659                                  << ") exceeds MAX_ARG_NUM (" << MAX_ARG_NUM << ") !";
660 
661         return;
662     }
663 
664     size_t refIdx = 0;
665     method->returnType = PFTypeToPandasmType(protoAccessor.GetReturnType(), protoAccessor, refIdx);
666 
667     for (size_t i = 0; i < protoAccessor.GetNumArgs(); i++) {
668         auto argType = PFTypeToPandasmType(protoAccessor.GetArgType(i), protoAccessor, refIdx);
669         method->params.emplace_back(argType, fileLanguage_);
670     }
671 }
672 
GetExceptions(pandasm::Function * method,panda_file::File::EntityId methodId,panda_file::File::EntityId codeId) const673 LabelTable Disassembler::GetExceptions(pandasm::Function *method, panda_file::File::EntityId methodId,
674                                        panda_file::File::EntityId codeId) const
675 {
676     LOG(DEBUG, DISASSEMBLER) << "[getting exceptions]\ncode id: " << codeId << " (0x" << std::hex << codeId << ")";
677 
678     if (method == nullptr) {
679         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!\n";
680         return LabelTable {};
681     }
682 
683     panda_file::CodeDataAccessor codeAccessor(*file_, codeId);
684 
685     const auto bcIns = BytecodeInstruction(codeAccessor.GetInstructions());
686     const auto bcInsLast = bcIns.JumpTo(codeAccessor.GetCodeSize());
687 
688     size_t tryIdx = 0;
689     LabelTable labelTable {};
690     codeAccessor.EnumerateTryBlocks([&](panda_file::CodeDataAccessor::TryBlock &tryBlock) {
691         pandasm::Function::CatchBlock catchBlockPa {};
692         if (!LocateTryBlock(bcIns, bcInsLast, tryBlock, &catchBlockPa, &labelTable, tryIdx)) {
693             return false;
694         }
695         size_t catchIdx = 0;
696         tryBlock.EnumerateCatchBlocks([&](panda_file::CodeDataAccessor::CatchBlock &catchBlock) {
697             auto classIdx = catchBlock.GetTypeIdx();
698             if (classIdx == panda_file::INVALID_INDEX) {
699                 catchBlockPa.exceptionRecord = "";
700             } else {
701                 const auto classId = file_->ResolveClassIndex(methodId, classIdx);
702                 catchBlockPa.exceptionRecord = GetFullRecordName(classId);
703             }
704             if (!LocateCatchBlock(bcIns, bcInsLast, catchBlock, &catchBlockPa, &labelTable, tryIdx, catchIdx)) {
705                 return false;
706             }
707 
708             method->catchBlocks.push_back(catchBlockPa);
709             catchBlockPa.catchBeginLabel = "";
710             catchBlockPa.catchEndLabel = "";
711             catchIdx++;
712 
713             return true;
714         });
715         tryIdx++;
716 
717         return true;
718     });
719 
720     return labelTable;
721 }
722 
GetBytecodeInstructionNumber(BytecodeInstruction bcInsFirst,BytecodeInstruction bcInsCur)723 static size_t GetBytecodeInstructionNumber(BytecodeInstruction bcInsFirst, BytecodeInstruction bcInsCur)
724 {
725     size_t count = 0;
726 
727     while (bcInsFirst.GetAddress() != bcInsCur.GetAddress()) {
728         count++;
729         bcInsFirst = bcInsFirst.GetNext();
730         if (bcInsFirst.GetAddress() > bcInsCur.GetAddress()) {
731             return std::numeric_limits<size_t>::max();
732         }
733     }
734 
735     return count;
736 }
737 
LocateTryBlock(const BytecodeInstruction & bcIns,const BytecodeInstruction & bcInsLast,const panda_file::CodeDataAccessor::TryBlock & tryBlock,pandasm::Function::CatchBlock * catchBlockPa,LabelTable * labelTable,size_t tryIdx) const738 bool Disassembler::LocateTryBlock(const BytecodeInstruction &bcIns, const BytecodeInstruction &bcInsLast,
739                                   const panda_file::CodeDataAccessor::TryBlock &tryBlock,
740                                   pandasm::Function::CatchBlock *catchBlockPa, LabelTable *labelTable,
741                                   size_t tryIdx) const
742 {
743     const auto tryBeginBcIns = bcIns.JumpTo(tryBlock.GetStartPc());
744     const auto tryEndBcIns = bcIns.JumpTo(tryBlock.GetStartPc() + tryBlock.GetLength());
745 
746     const size_t tryBeginIdx = GetBytecodeInstructionNumber(bcIns, tryBeginBcIns);
747     const size_t tryEndIdx = GetBytecodeInstructionNumber(bcIns, tryEndBcIns);
748 
749     const bool tryBeginOffsetInRange = bcInsLast.GetAddress() > tryBeginBcIns.GetAddress();
750     const bool tryEndOffsetInRange = bcInsLast.GetAddress() >= tryEndBcIns.GetAddress();
751     const bool tryBeginOffsetValid = tryBeginIdx != std::numeric_limits<size_t>::max();
752     const bool tryEndOffsetValid = tryEndIdx != std::numeric_limits<size_t>::max();
753 
754     if (!tryBeginOffsetInRange || !tryBeginOffsetValid) {
755         LOG(ERROR, DISASSEMBLER) << "> invalid try block begin offset! address is: 0x" << std::hex
756                                  << tryBeginBcIns.GetAddress();
757         return false;
758     }
759 
760     auto itBegin = labelTable->find(tryBeginIdx);
761     if (itBegin == labelTable->end()) {
762         std::stringstream ss {};
763         ss << "try_begin_label_" << tryIdx;
764         catchBlockPa->tryBeginLabel = ss.str();
765         labelTable->insert(std::pair<size_t, std::string>(tryBeginIdx, ss.str()));
766     } else {
767         catchBlockPa->tryBeginLabel = itBegin->second;
768     }
769 
770     if (!tryEndOffsetInRange || !tryEndOffsetValid) {
771         LOG(ERROR, DISASSEMBLER) << "> invalid try block end offset! address is: 0x" << std::hex
772                                  << tryEndBcIns.GetAddress();
773         return false;
774     }
775 
776     auto itEnd = labelTable->find(tryEndIdx);
777     if (itEnd == labelTable->end()) {
778         std::stringstream ss {};
779         ss << "try_end_label_" << tryIdx;
780         catchBlockPa->tryEndLabel = ss.str();
781         labelTable->insert(std::pair<size_t, std::string>(tryEndIdx, ss.str()));
782     } else {
783         catchBlockPa->tryEndLabel = itEnd->second;
784     }
785 
786     return true;
787 }
788 
LocateCatchBlock(const BytecodeInstruction & bcIns,const BytecodeInstruction & bcInsLast,const panda_file::CodeDataAccessor::CatchBlock & catchBlock,pandasm::Function::CatchBlock * catchBlockPa,LabelTable * labelTable,size_t tryIdx,size_t catchIdx) const789 bool Disassembler::LocateCatchBlock(const BytecodeInstruction &bcIns, const BytecodeInstruction &bcInsLast,
790                                     const panda_file::CodeDataAccessor::CatchBlock &catchBlock,
791                                     pandasm::Function::CatchBlock *catchBlockPa, LabelTable *labelTable, size_t tryIdx,
792                                     size_t catchIdx) const
793 {
794     const auto handlerBeginOffset = catchBlock.GetHandlerPc();
795     const auto handlerEndOffset = handlerBeginOffset + catchBlock.GetCodeSize();
796 
797     const auto handlerBeginBcIns = bcIns.JumpTo(handlerBeginOffset);
798     const auto handlerEndBcIns = bcIns.JumpTo(handlerEndOffset);
799 
800     const size_t handlerBeginIdx = GetBytecodeInstructionNumber(bcIns, handlerBeginBcIns);
801     const size_t handlerEndIdx = GetBytecodeInstructionNumber(bcIns, handlerEndBcIns);
802 
803     const bool handlerBeginOffsetInRange = bcInsLast.GetAddress() > handlerBeginBcIns.GetAddress();
804     const bool handlerEndOffsetInRange = bcInsLast.GetAddress() > handlerEndBcIns.GetAddress();
805     const bool handlerEndPresent = catchBlock.GetCodeSize() != 0;
806     const bool handlerBeginOffsetValid = handlerBeginIdx != std::numeric_limits<size_t>::max();
807     const bool handlerEndOffsetValid = handlerEndIdx != std::numeric_limits<size_t>::max();
808 
809     if (!handlerBeginOffsetInRange || !handlerBeginOffsetValid) {
810         LOG(ERROR, DISASSEMBLER) << "> invalid catch block begin offset! address is: 0x" << std::hex
811                                  << handlerBeginBcIns.GetAddress();
812         return false;
813     }
814 
815     auto itBegin = labelTable->find(handlerBeginIdx);
816     if (itBegin == labelTable->end()) {
817         std::stringstream ss {};
818         ss << "handler_begin_label_" << tryIdx << "_" << catchIdx;
819         catchBlockPa->catchBeginLabel = ss.str();
820         labelTable->insert(std::pair<size_t, std::string>(handlerBeginIdx, ss.str()));
821     } else {
822         catchBlockPa->catchBeginLabel = itBegin->second;
823     }
824 
825     if (!handlerEndOffsetInRange || !handlerEndOffsetValid) {
826         LOG(ERROR, DISASSEMBLER) << "> invalid catch block end offset! address is: 0x" << std::hex
827                                  << handlerEndBcIns.GetAddress();
828         return false;
829     }
830 
831     if (handlerEndPresent) {
832         auto itEnd = labelTable->find(handlerEndIdx);
833         if (itEnd == labelTable->end()) {
834             std::stringstream ss {};
835             ss << "handler_end_label_" << tryIdx << "_" << catchIdx;
836             catchBlockPa->catchEndLabel = ss.str();
837             labelTable->insert(std::pair<size_t, std::string>(handlerEndIdx, ss.str()));
838         } else {
839             catchBlockPa->catchEndLabel = itEnd->second;
840         }
841     }
842 
843     return true;
844 }
845 
846 template <typename T>
SetEntityAttribute(T * entity,const std::function<bool ()> & shouldSet,std::string_view attribute)847 static void SetEntityAttribute(T *entity, const std::function<bool()> &shouldSet, std::string_view attribute)
848 {
849     if (shouldSet()) {
850         auto err = entity->metadata->SetAttribute(attribute);
851         if (err.has_value()) {
852             LOG(ERROR, DISASSEMBLER) << err.value().GetMessage();
853         }
854     }
855 }
856 
857 template <typename T>
SetEntityAttributeValue(T * entity,const std::function<bool ()> & shouldSet,std::string_view attribute,const char * value)858 static void SetEntityAttributeValue(T *entity, const std::function<bool()> &shouldSet, std::string_view attribute,
859                                     const char *value)
860 {
861     if (shouldSet()) {
862         auto err = entity->metadata->SetAttributeValue(attribute, value);
863         if (err.has_value()) {
864             LOG(ERROR, DISASSEMBLER) << err.value().GetMessage();
865         }
866     }
867 }
868 
GetMetaData(pandasm::Function * method,const panda_file::File::EntityId & methodId) const869 void Disassembler::GetMetaData(pandasm::Function *method, const panda_file::File::EntityId &methodId) const
870 {
871     LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nmethod id: " << methodId << " (0x" << std::hex << methodId << ")";
872 
873     if (method == nullptr) {
874         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
875 
876         return;
877     }
878 
879     panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
880 
881     const auto methodNameRaw = StringDataToString(file_->GetStringData(methodAccessor.GetNameId()));
882 
883     if (!methodAccessor.IsStatic()) {
884         const auto className = StringDataToString(file_->GetStringData(methodAccessor.GetClassId()));
885         auto thisType = pandasm::Type::FromDescriptor(className);
886 
887         LOG(DEBUG, DISASSEMBLER) << "method (raw: \'" << methodNameRaw
888                                  << "\') is not static. emplacing self-argument of type " << thisType.GetName();
889 
890         method->params.insert(method->params.begin(), pandasm::Function::Parameter(thisType, fileLanguage_));
891     }
892     SetEntityAttribute(
893         method, [&methodAccessor]() { return methodAccessor.IsStatic(); }, "static");
894 
895     SetEntityAttribute(
896         method, [this, &methodAccessor]() { return file_->IsExternal(methodAccessor.GetMethodId()); }, "external");
897 
898     SetEntityAttribute(
899         method, [&methodAccessor]() { return methodAccessor.IsNative(); }, "native");
900 
901     SetEntityAttribute(
902         method, [&methodAccessor]() { return methodAccessor.IsAbstract(); }, "noimpl");
903 
904     SetEntityAttribute(
905         method, [&methodAccessor]() { return methodAccessor.IsVarArgs(); }, "varargs");
906 
907     SetEntityAttributeValue(
908         method, [&methodAccessor]() { return methodAccessor.IsPublic(); }, "access.function", "public");
909 
910     SetEntityAttributeValue(
911         method, [&methodAccessor]() { return methodAccessor.IsProtected(); }, "access.function", "protected");
912 
913     SetEntityAttributeValue(
914         method, [&methodAccessor]() { return methodAccessor.IsPrivate(); }, "access.function", "private");
915 
916     SetEntityAttribute(
917         method, [&methodAccessor]() { return methodAccessor.IsFinal(); }, "final");
918 
919     std::string ctorName = ark::panda_file::GetCtorName(fileLanguage_);
920     std::string cctorName = ark::panda_file::GetCctorName(fileLanguage_);
921 
922     const bool isCtor = (methodNameRaw == ctorName);
923     const bool isCctor = (methodNameRaw == cctorName);
924 
925     if (isCtor) {
926         method->metadata->SetAttribute("ctor");
927         method->name.replace(method->name.find(ctorName), ctorName.length(), "_ctor_");
928     } else if (isCctor) {
929         method->metadata->SetAttribute("cctor");
930         method->name.replace(method->name.find(cctorName), cctorName.length(), "_cctor_");
931     }
932 }
933 
GetMetaData(pandasm::Record * record,const panda_file::File::EntityId & recordId) const934 void Disassembler::GetMetaData(pandasm::Record *record, const panda_file::File::EntityId &recordId) const
935 {
936     LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nrecord id: " << recordId << " (0x" << std::hex << recordId << ")";
937 
938     if (record == nullptr) {
939         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but record ptr expected!";
940 
941         return;
942     }
943 
944     SetEntityAttribute(
945         record, [this, recordId]() { return file_->IsExternal(recordId); }, "external");
946 
947     auto external = file_->IsExternal(recordId);
948     if (!external) {
949         auto cda = panda_file::ClassDataAccessor {*file_, recordId};
950         SetEntityAttributeValue(
951             record, [&cda]() { return cda.IsPublic(); }, "access.record", "public");
952 
953         SetEntityAttributeValue(
954             record, [&cda]() { return cda.IsProtected(); }, "access.record", "protected");
955 
956         SetEntityAttributeValue(
957             record, [&cda]() { return cda.IsPrivate(); }, "access.record", "private");
958 
959         SetEntityAttribute(
960             record, [&cda]() { return cda.IsFinal(); }, "final");
961     }
962 }
963 
GetMetaData(pandasm::Field * field,const panda_file::File::EntityId & fieldId) const964 void Disassembler::GetMetaData(pandasm::Field *field, const panda_file::File::EntityId &fieldId) const
965 {
966     LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nfield id: " << fieldId << " (0x" << std::hex << fieldId << ")";
967 
968     if (field == nullptr) {
969         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
970 
971         return;
972     }
973 
974     panda_file::FieldDataAccessor fieldAccessor(*file_, fieldId);
975 
976     SetEntityAttribute(
977         field, [&fieldAccessor]() { return fieldAccessor.IsExternal(); }, "external");
978 
979     SetEntityAttribute(
980         field, [&fieldAccessor]() { return fieldAccessor.IsStatic(); }, "static");
981 
982     SetEntityAttributeValue(
983         field, [&fieldAccessor]() { return fieldAccessor.IsPublic(); }, "access.field", "public");
984 
985     SetEntityAttributeValue(
986         field, [&fieldAccessor]() { return fieldAccessor.IsProtected(); }, "access.field", "protected");
987 
988     SetEntityAttributeValue(
989         field, [&fieldAccessor]() { return fieldAccessor.IsPrivate(); }, "access.field", "private");
990 
991     SetEntityAttribute(
992         field, [&fieldAccessor]() { return fieldAccessor.IsFinal(); }, "final");
993 }
994 
AnnotationTagToString(const char tag) const995 std::string Disassembler::AnnotationTagToString(const char tag) const
996 {
997     static const std::unordered_map<char, std::string> TAG_TO_STRING = {{'1', "u1"},
998                                                                         {'2', "i8"},
999                                                                         {'3', "u8"},
1000                                                                         {'4', "i16"},
1001                                                                         {'5', "u16"},
1002                                                                         {'6', "i32"},
1003                                                                         {'7', "u32"},
1004                                                                         {'8', "i64"},
1005                                                                         {'9', "u64"},
1006                                                                         {'A', "f32"},
1007                                                                         {'B', "f64"},
1008                                                                         {'C', "string"},
1009                                                                         {'D', "record"},
1010                                                                         {'E', "method"},
1011                                                                         {'F', "enum"},
1012                                                                         {'G', "annotation"},
1013                                                                         {'J', "method_handle"},
1014                                                                         {'H', "array"},
1015                                                                         {'K', "u1[]"},
1016                                                                         {'L', "i8[]"},
1017                                                                         {'M', "u8[]"},
1018                                                                         {'N', "i16[]"},
1019                                                                         {'O', "u16[]"},
1020                                                                         {'P', "i32[]"},
1021                                                                         {'Q', "u32[]"},
1022                                                                         {'R', "i64[]"},
1023                                                                         {'S', "u64[]"},
1024                                                                         {'T', "f32[]"},
1025                                                                         {'U', "f64[]"},
1026                                                                         {'V', "string[]"},
1027                                                                         {'W', "record[]"},
1028                                                                         {'X', "method[]"},
1029                                                                         {'Y', "enum[]"},
1030                                                                         {'Z', "annotation[]"},
1031                                                                         {'@', "method_handle[]"},
1032                                                                         {'*', "nullptr_string"}};
1033 
1034     return TAG_TO_STRING.at(tag);
1035 }
1036 
ScalarValueToString(const panda_file::ScalarValue & value,const std::string & type)1037 std::string Disassembler::ScalarValueToString(const panda_file::ScalarValue &value, const std::string &type)
1038 {
1039     std::stringstream ss;
1040 
1041     if (type == "i8") {
1042         auto res = value.Get<int8_t>();
1043         ss << static_cast<int>(res);
1044     } else if (type == "u1" || type == "u8") {
1045         auto res = value.Get<uint8_t>();
1046         ss << static_cast<unsigned int>(res);
1047     } else if (type == "i16") {
1048         ss << value.Get<int16_t>();
1049     } else if (type == "u16") {
1050         ss << value.Get<uint16_t>();
1051     } else if (type == "i32") {
1052         ss << value.Get<int32_t>();
1053     } else if (type == "u32") {
1054         ss << value.Get<uint32_t>();
1055     } else if (type == "i64") {
1056         ss << value.Get<int64_t>();
1057     } else if (type == "u64") {
1058         ss << value.Get<uint64_t>();
1059     } else if (type == "f32") {
1060         ss << value.Get<float>();
1061     } else if (type == "f64") {
1062         ss << value.Get<double>();
1063     } else if (type == "string") {
1064         const auto id = value.Get<panda_file::File::EntityId>();
1065         ss << "\"" << StringDataToString(file_->GetStringData(id)) << "\"";
1066     } else if (type == "record") {
1067         const auto id = value.Get<panda_file::File::EntityId>();
1068         ss << GetFullRecordName(id);
1069     } else if (type == "method") {
1070         const auto id = value.Get<panda_file::File::EntityId>();
1071         AddMethodToTables(id);
1072         ss << GetMethodSignature(id);
1073     } else if (type == "enum") {
1074         const auto id = value.Get<panda_file::File::EntityId>();
1075         panda_file::FieldDataAccessor fieldAccessor(*file_, id);
1076         ss << GetFullRecordName(fieldAccessor.GetClassId()) << "."
1077            << StringDataToString(file_->GetStringData(fieldAccessor.GetNameId()));
1078     } else if (type == "annotation") {
1079         const auto id = value.Get<panda_file::File::EntityId>();
1080         ss << "id_" << id;
1081     } else if (type == "void") {
1082         return std::string();
1083     } else if (type == "method_handle") {
1084     } else if (type == "nullptr_string") {
1085         ss << static_cast<uint32_t>(0);
1086     }
1087 
1088     return ss.str();
1089 }
1090 
ArrayValueToString(const panda_file::ArrayValue & value,const std::string & type,const size_t idx)1091 std::string Disassembler::ArrayValueToString(const panda_file::ArrayValue &value, const std::string &type,
1092                                              const size_t idx)
1093 {
1094     std::stringstream ss;
1095 
1096     if (type == "i8") {
1097         auto res = value.Get<int8_t>(idx);
1098         ss << static_cast<int>(res);
1099     } else if (type == "u1" || type == "u8") {
1100         auto res = value.Get<uint8_t>(idx);
1101         ss << static_cast<unsigned int>(res);
1102     } else if (type == "i16") {
1103         ss << value.Get<int16_t>(idx);
1104     } else if (type == "u16") {
1105         ss << value.Get<uint16_t>(idx);
1106     } else if (type == "i32") {
1107         ss << value.Get<int32_t>(idx);
1108     } else if (type == "u32") {
1109         ss << value.Get<uint32_t>(idx);
1110     } else if (type == "i64") {
1111         ss << value.Get<int64_t>(idx);
1112     } else if (type == "u64") {
1113         ss << value.Get<uint64_t>(idx);
1114     } else if (type == "f32") {
1115         ss << value.Get<float>(idx);
1116     } else if (type == "f64") {
1117         ss << value.Get<double>(idx);
1118     } else if (type == "string") {
1119         const auto id = value.Get<panda_file::File::EntityId>(idx);
1120         ss << '\"' << StringDataToString(file_->GetStringData(id)) << '\"';
1121     } else if (type == "record") {
1122         const auto id = value.Get<panda_file::File::EntityId>(idx);
1123         ss << GetFullRecordName(id);
1124     } else if (type == "method") {
1125         const auto id = value.Get<panda_file::File::EntityId>(idx);
1126         AddMethodToTables(id);
1127         ss << GetMethodSignature(id);
1128     } else if (type == "enum") {
1129         const auto id = value.Get<panda_file::File::EntityId>(idx);
1130         panda_file::FieldDataAccessor fieldAccessor(*file_, id);
1131         ss << GetFullRecordName(fieldAccessor.GetClassId()) << "."
1132            << StringDataToString(file_->GetStringData(fieldAccessor.GetNameId()));
1133     } else if (type == "annotation") {
1134         const auto id = value.Get<panda_file::File::EntityId>(idx);
1135         ss << "id_" << id;
1136     } else if (type == "method_handle") {
1137     } else if (type == "nullptr_string") {
1138     }
1139 
1140     return ss.str();
1141 }
1142 
GetFullMethodName(const panda_file::File::EntityId & methodId) const1143 std::string Disassembler::GetFullMethodName(const panda_file::File::EntityId &methodId) const
1144 {
1145     ark::panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
1146 
1147     const auto methodNameRaw = StringDataToString(file_->GetStringData(methodAccessor.GetNameId()));
1148 
1149     std::string className = GetFullRecordName(methodAccessor.GetClassId());
1150     if (IsSystemType(className)) {
1151         className = "";
1152     } else {
1153         className += ".";
1154     }
1155 
1156     return className + methodNameRaw;
1157 }
1158 
GetMethodSignature(const panda_file::File::EntityId & methodId) const1159 std::string Disassembler::GetMethodSignature(const panda_file::File::EntityId &methodId) const
1160 {
1161     ark::panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
1162 
1163     pandasm::Function method(GetFullMethodName(methodId), fileLanguage_);
1164     GetParams(&method, methodAccessor.GetProtoId());
1165     GetMetaData(&method, methodId);
1166 
1167     return pandasm::GetFunctionSignatureFromName(method.name, method.params);
1168 }
1169 
GetFullRecordName(const panda_file::File::EntityId & classId) const1170 std::string Disassembler::GetFullRecordName(const panda_file::File::EntityId &classId) const
1171 {
1172     std::string name = StringDataToString(file_->GetStringData(classId));
1173 
1174     auto type = pandasm::Type::FromDescriptor(name);
1175     type = pandasm::Type(type.GetComponentName(), type.GetRank());
1176 
1177     return type.GetPandasmName();
1178 }
1179 
1180 static constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
1181 
GetFieldInfo(const panda_file::FieldDataAccessor & fieldAccessor,std::stringstream & ss)1182 static void GetFieldInfo(const panda_file::FieldDataAccessor &fieldAccessor, std::stringstream &ss)
1183 {
1184     ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex << fieldAccessor.GetFieldId()
1185        << ", type: 0x" << fieldAccessor.GetType();
1186 }
1187 
GetFieldInfo(const panda_file::FieldDataAccessor & fieldAccessor)1188 static std::string GetFieldInfo(const panda_file::FieldDataAccessor &fieldAccessor)
1189 {
1190     std::stringstream ss;
1191     ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex << fieldAccessor.GetFieldId()
1192        << ", type: 0x" << fieldAccessor.GetType();
1193     return ss.str();
1194 }
1195 
GetRecordInfo(const panda_file::File::EntityId & recordId,RecordInfo * recordInfo) const1196 void Disassembler::GetRecordInfo(const panda_file::File::EntityId &recordId, RecordInfo *recordInfo) const
1197 {
1198     if (file_->IsExternal(recordId)) {
1199         return;
1200     }
1201 
1202     panda_file::ClassDataAccessor classAccessor {*file_, recordId};
1203     std::stringstream ss;
1204 
1205     ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex << classAccessor.GetClassId()
1206        << ", size: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << classAccessor.GetSize() << " ("
1207        << std::dec << classAccessor.GetSize() << ")";
1208 
1209     recordInfo->recordInfo = ss.str();
1210     ss.str(std::string());
1211 
1212     classAccessor.EnumerateFields([&](panda_file::FieldDataAccessor &fieldAccessor) -> void {
1213         GetFieldInfo(fieldAccessor, ss);
1214 
1215         recordInfo->fieldsInfo.push_back(ss.str());
1216 
1217         ss.str(std::string());
1218     });
1219 }
1220 
GetMethodInfo(const panda_file::File::EntityId & methodId,MethodInfo * methodInfo) const1221 void Disassembler::GetMethodInfo(const panda_file::File::EntityId &methodId, MethodInfo *methodInfo) const
1222 {
1223     panda_file::MethodDataAccessor methodAccessor {*file_, methodId};
1224     std::stringstream ss;
1225 
1226     ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1227        << methodAccessor.GetMethodId();
1228 
1229     if (methodAccessor.GetCodeId().has_value()) {
1230         ss << ", code offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1231            << methodAccessor.GetCodeId().value();
1232 
1233         GetInsInfo(methodAccessor, methodAccessor.GetCodeId().value(), methodInfo);
1234     } else {
1235         ss << ", <no code>";
1236     }
1237 
1238     auto profileSize = methodAccessor.GetProfileSize();
1239     if (profileSize) {
1240         ss << ", profile size: " << profileSize.value();
1241     }
1242 
1243     methodInfo->methodInfo = ss.str();
1244 
1245     if (methodAccessor.GetCodeId()) {
1246         ASSERT(debugInfoExtractor_ != nullptr);
1247         methodInfo->lineNumberTable = debugInfoExtractor_->GetLineNumberTable(methodId);
1248         methodInfo->localVariableTable = debugInfoExtractor_->GetLocalVariableTable(methodId);
1249 
1250         // Add information about parameters into the table
1251         panda_file::CodeDataAccessor codeda(*file_, methodAccessor.GetCodeId().value());
1252         auto argIdx = static_cast<int32_t>(codeda.GetNumVregs());
1253         uint32_t codeSize = codeda.GetCodeSize();
1254         for (const auto &info : debugInfoExtractor_->GetParameterInfo(methodId)) {
1255             panda_file::LocalVariableInfo argInfo {info.name, info.signature, "", argIdx++, 0, codeSize};
1256             methodInfo->localVariableTable.emplace_back(argInfo);
1257         }
1258     }
1259 }
1260 
Serialize(const std::string & name,const pandasm::LiteralArray & litArray,std::ostream & os) const1261 void Disassembler::Serialize(const std::string &name, const pandasm::LiteralArray &litArray, std::ostream &os) const
1262 {
1263     if (litArray.literals.empty()) {
1264         return;
1265     }
1266 
1267     bool isConst = litArray.literals[0].IsArray();
1268 
1269     std::stringstream specifiers {};
1270 
1271     if (isConst) {
1272         specifiers << LiteralTagToString(litArray.literals[0].tag) << " " << litArray.literals.size() << " ";
1273     }
1274 
1275     os << ".array array_" << name << " " << specifiers.str() << "{";
1276 
1277     SerializeValues(litArray, isConst, os);
1278 
1279     os << "}\n";
1280 }
1281 
LiteralTagToString(const panda_file::LiteralTag & tag) const1282 std::string Disassembler::LiteralTagToString(const panda_file::LiteralTag &tag) const
1283 {
1284     switch (tag) {
1285         case panda_file::LiteralTag::BOOL:
1286         case panda_file::LiteralTag::ARRAY_U1:
1287             return "u1";
1288         case panda_file::LiteralTag::ARRAY_U8:
1289             return "u8";
1290         case panda_file::LiteralTag::ARRAY_I8:
1291             return "i8";
1292         case panda_file::LiteralTag::ARRAY_U16:
1293             return "u16";
1294         case panda_file::LiteralTag::ARRAY_I16:
1295             return "i16";
1296         case panda_file::LiteralTag::ARRAY_U32:
1297             return "u32";
1298         case panda_file::LiteralTag::INTEGER:
1299         case panda_file::LiteralTag::ARRAY_I32:
1300             return "i32";
1301         case panda_file::LiteralTag::ARRAY_U64:
1302             return "u64";
1303         case panda_file::LiteralTag::BIGINT:
1304         case panda_file::LiteralTag::ARRAY_I64:
1305             return "i64";
1306         case panda_file::LiteralTag::FLOAT:
1307         case panda_file::LiteralTag::ARRAY_F32:
1308             return "f32";
1309         case panda_file::LiteralTag::DOUBLE:
1310         case panda_file::LiteralTag::ARRAY_F64:
1311             return "f64";
1312         case panda_file::LiteralTag::STRING:
1313         case panda_file::LiteralTag::ARRAY_STRING:
1314             return pandasm::Type::FromDescriptor(panda_file::GetStringClassDescriptor(fileLanguage_)).GetPandasmName();
1315         case panda_file::LiteralTag::ACCESSOR:
1316             return "accessor";
1317         case panda_file::LiteralTag::NULLVALUE:
1318             return "nullvalue";
1319         case panda_file::LiteralTag::METHODAFFILIATE:
1320             return "method_affiliate";
1321         case panda_file::LiteralTag::METHOD:
1322             return "method";
1323         case panda_file::LiteralTag::GENERATORMETHOD:
1324             return "generator_method";
1325         default:
1326             LOG(ERROR, DISASSEMBLER) << "Unsupported literal with tag 0x" << std::hex << static_cast<uint32_t>(tag);
1327             UNREACHABLE();
1328     }
1329 }
1330 
LiteralValueToString(const pandasm::LiteralArray::Literal & lit) const1331 std::string Disassembler::LiteralValueToString(const pandasm::LiteralArray::Literal &lit) const
1332 {
1333     if (lit.IsBoolValue()) {
1334         std::stringstream res {};
1335         res << (std::get<bool>(lit.value));
1336         return res.str();
1337     }
1338 
1339     if (lit.IsByteValue()) {
1340         return LiteralIntegralValueToString<uint8_t>(lit);
1341     }
1342 
1343     if (lit.IsShortValue()) {
1344         return LiteralIntegralValueToString<uint16_t>(lit);
1345     }
1346 
1347     if (lit.IsIntegerValue()) {
1348         return LiteralIntegralValueToString<uint32_t>(lit);
1349     }
1350 
1351     if (lit.IsLongValue()) {
1352         return LiteralIntegralValueToString<uint64_t>(lit);
1353     }
1354 
1355     if (lit.IsDoubleValue()) {
1356         std::stringstream res {};
1357         res << std::get<double>(lit.value);
1358         return res.str();
1359     }
1360 
1361     if (lit.IsFloatValue()) {
1362         std::stringstream res {};
1363         res << std::get<float>(lit.value);
1364         return res.str();
1365     }
1366 
1367     if (lit.IsStringValue()) {
1368         std::stringstream res {};
1369         res << "\"" << std::get<std::string>(lit.value) << "\"";
1370         return res.str();
1371     }
1372 
1373     UNREACHABLE();
1374 }
1375 
SerializeValues(const pandasm::LiteralArray & litArray,const bool isConst,std::ostream & os) const1376 void Disassembler::SerializeValues(const pandasm::LiteralArray &litArray, const bool isConst, std::ostream &os) const
1377 {
1378     std::string separator = (isConst) ? (" ") : ("\n");
1379 
1380     os << separator;
1381 
1382     if (isConst) {
1383         for (const auto &l : litArray.literals) {
1384             os << LiteralValueToString(l) << separator;
1385         }
1386     } else {
1387         for (const auto &l : litArray.literals) {
1388             os << "\t" << LiteralTagToString(l.tag) << " " << LiteralValueToString(l) << separator;
1389         }
1390     }
1391 }
1392 
Serialize(const pandasm::Record & record,std::ostream & os,bool printInformation) const1393 void Disassembler::Serialize(const pandasm::Record &record, std::ostream &os, bool printInformation) const
1394 {
1395     if (IsSystemType(record.name)) {
1396         return;
1397     }
1398 
1399     os << ".record " << record.name;
1400 
1401     const auto recordIter = progAnn_.recordAnnotations.find(record.name);
1402     const bool recordInTable = recordIter != progAnn_.recordAnnotations.end();
1403     if (recordInTable) {
1404         Serialize(*record.metadata, recordIter->second.annList, os);
1405     } else {
1406         Serialize(*record.metadata, {}, os);
1407     }
1408 
1409     if (record.metadata->IsForeign() && record.fieldList.empty()) {
1410         os << "\n\n";
1411         return;
1412     }
1413 
1414     os << " {";
1415 
1416     if (printInformation && progInfo_.recordsInfo.find(record.name) != progInfo_.recordsInfo.end()) {
1417         os << " # " << progInfo_.recordsInfo.at(record.name).recordInfo << "\n";
1418         SerializeFields(record, os, true);
1419     } else {
1420         os << "\n";
1421         SerializeFields(record, os, false);
1422     }
1423 
1424     os << "}\n\n";
1425 }
1426 
SerializeUnionFields(const pandasm::Record & record,std::ostream & os,bool printInformation) const1427 void Disassembler::SerializeUnionFields(const pandasm::Record &record, std::ostream &os, bool printInformation) const
1428 {
1429     if (printInformation && progInfo_.recordsInfo.find(record.name) != progInfo_.recordsInfo.end()) {
1430         os << " # " << progInfo_.recordsInfo.at(record.name).recordInfo << "\n";
1431         SerializeFields(record, os, true, true);
1432     } else {
1433         SerializeFields(record, os, false, true);
1434     }
1435     os << "\n";
1436 }
1437 
SerializeFields(const pandasm::Record & record,std::ostream & os,bool printInformation,bool isUnion) const1438 void Disassembler::SerializeFields(const pandasm::Record &record, std::ostream &os, bool printInformation,
1439                                    bool isUnion) const
1440 {
1441     constexpr size_t INFO_OFFSET = 80;
1442 
1443     const auto recordIter = progAnn_.recordAnnotations.find(record.name);
1444     const bool recordInTable = recordIter != progAnn_.recordAnnotations.end();
1445 
1446     const auto recInf = (printInformation) ? (progInfo_.recordsInfo.at(record.name)) : (RecordInfo {});
1447 
1448     size_t fieldIdx = 0;
1449 
1450     std::stringstream ss;
1451     for (const auto &f : record.fieldList) {
1452         if (isUnion) {
1453             ss << ".union_field ";
1454         } else {
1455             ss << "\t";
1456         }
1457         ss << f.type.GetPandasmName() << " " << f.name;
1458         if (!isUnion && recordInTable) {
1459             const auto fieldIter = recordIter->second.fieldAnnotations.find(f.name);
1460             if (fieldIter != recordIter->second.fieldAnnotations.end()) {
1461                 Serialize(*f.metadata, fieldIter->second, ss);
1462             } else {
1463                 Serialize(*f.metadata, {}, ss);
1464             }
1465         } else if (!isUnion && !recordInTable) {
1466             Serialize(*f.metadata, {}, ss);
1467         }
1468 
1469         if (printInformation) {
1470             os << std::setw(INFO_OFFSET) << std::left << ss.str() << " # " << recInf.fieldsInfo.at(fieldIdx) << "\n";
1471         } else {
1472             os << ss.str() << "\n";
1473         }
1474 
1475         ss.str(std::string());
1476         ss.clear();
1477 
1478         fieldIdx++;
1479     }
1480 }
1481 
Serialize(const pandasm::Function::CatchBlock & catchBlock,std::ostream & os) const1482 void Disassembler::Serialize(const pandasm::Function::CatchBlock &catchBlock, std::ostream &os) const
1483 {
1484     if (catchBlock.exceptionRecord.empty()) {
1485         os << ".catchall ";
1486     } else {
1487         os << ".catch " << catchBlock.exceptionRecord << ", ";
1488     }
1489 
1490     os << catchBlock.tryBeginLabel << ", " << catchBlock.tryEndLabel << ", " << catchBlock.catchBeginLabel;
1491 
1492     if (!catchBlock.catchEndLabel.empty()) {
1493         os << ", " << catchBlock.catchEndLabel;
1494     }
1495 }
1496 
Serialize(const pandasm::ItemMetadata & meta,const AnnotationList & annList,std::ostream & os) const1497 void Disassembler::Serialize(const pandasm::ItemMetadata &meta, const AnnotationList &annList, std::ostream &os) const
1498 {
1499     auto boolAttributes = meta.GetBoolAttributes();
1500     auto attributes = meta.GetAttributes();
1501     if (boolAttributes.empty() && attributes.empty() && annList.empty()) {
1502         return;
1503     }
1504 
1505     os << " <";
1506 
1507     size_t size = boolAttributes.size();
1508     size_t idx = 0;
1509     for (const auto &attr : boolAttributes) {
1510         os << attr;
1511         ++idx;
1512 
1513         if (!attributes.empty() || !annList.empty() || idx < size) {
1514             os << ", ";
1515         }
1516     }
1517 
1518     size = attributes.size();
1519     idx = 0;
1520     for (const auto &[key, values] : attributes) {
1521         for (size_t i = 0; i < values.size(); i++) {
1522             os << key << "=" << values[i];
1523 
1524             if (i < values.size() - 1) {
1525                 os << ", ";
1526             }
1527         }
1528 
1529         ++idx;
1530 
1531         if (!annList.empty() || idx < size) {
1532             os << ", ";
1533         }
1534     }
1535 
1536     size = annList.size();
1537     idx = 0;
1538     for (const auto &[key, value] : annList) {
1539         os << key << "=" << value;
1540 
1541         ++idx;
1542 
1543         if (idx < size) {
1544             os << ", ";
1545         }
1546     }
1547 
1548     os << ">";
1549 }
1550 
SerializeLineNumberTable(const panda_file::LineNumberTable & lineNumberTable,std::ostream & os) const1551 void Disassembler::SerializeLineNumberTable(const panda_file::LineNumberTable &lineNumberTable, std::ostream &os) const
1552 {
1553     if (lineNumberTable.empty()) {
1554         return;
1555     }
1556 
1557     os << "\n#   LINE_NUMBER_TABLE:\n";
1558     for (const auto &lineInfo : lineNumberTable) {
1559         os << "#\tline " << lineInfo.line << ": " << lineInfo.offset << "\n";
1560     }
1561 }
1562 
SerializeLocalVariableTable(const panda_file::LocalVariableTable & localVariableTable,const pandasm::Function & method,std::ostream & os) const1563 void Disassembler::SerializeLocalVariableTable(const panda_file::LocalVariableTable &localVariableTable,
1564                                                const pandasm::Function &method, std::ostream &os) const
1565 {
1566     if (localVariableTable.empty()) {
1567         return;
1568     }
1569 
1570     os << "\n#   LOCAL_VARIABLE_TABLE:\n";
1571     os << "#\t Start   End  Register           Name   Signature\n";
1572     const int startWidth = 5;
1573     const int endWidth = 4;
1574     const int regWidth = 8;
1575     const int nameWidth = 14;
1576     for (const auto &variableInfo : localVariableTable) {
1577         std::ostringstream regStream;
1578         regStream << variableInfo.regNumber << '(';
1579         if (variableInfo.regNumber < 0) {
1580             regStream << "acc";
1581         } else {
1582             uint32_t vreg = variableInfo.regNumber;
1583             uint32_t firstArgReg = method.GetTotalRegs();
1584             if (vreg < firstArgReg) {
1585                 regStream << 'v' << vreg;
1586             } else {
1587                 regStream << 'a' << vreg - firstArgReg;
1588             }
1589         }
1590         regStream << ')';
1591 
1592         os << "#\t " << std::setw(startWidth) << std::right << variableInfo.startOffset << "  ";
1593         os << std::setw(endWidth) << std::right << variableInfo.endOffset << "  ";
1594         os << std::setw(regWidth) << std::right << regStream.str() << " ";
1595         os << std::setw(nameWidth) << std::right << variableInfo.name << "   " << variableInfo.type;
1596         if (!variableInfo.typeSignature.empty() && variableInfo.typeSignature != variableInfo.type) {
1597             os << " (" << variableInfo.typeSignature << ")";
1598         }
1599         os << "\n";
1600     }
1601 }
1602 
SerializeLanguage(std::ostream & os) const1603 void Disassembler::SerializeLanguage(std::ostream &os) const
1604 {
1605     os << ".language " << ark::panda_file::LanguageToString(fileLanguage_) << "\n\n";
1606 }
1607 
SerializeFilename(std::ostream & os) const1608 void Disassembler::SerializeFilename(std::ostream &os) const
1609 {
1610     if (file_ == nullptr || file_->GetFilename().empty()) {
1611         return;
1612     }
1613 
1614     os << "# source binary: " << file_->GetFilename() << "\n\n";
1615 }
1616 
SerializeLitArrays(std::ostream & os,bool addSeparators) const1617 void Disassembler::SerializeLitArrays(std::ostream &os, bool addSeparators) const
1618 {
1619     LOG(DEBUG, DISASSEMBLER) << "[serializing literals]";
1620 
1621     if (prog_.literalarrayTable.empty()) {
1622         return;
1623     }
1624 
1625     if (addSeparators) {
1626         os << "# ====================\n"
1627               "# LITERALS\n\n";
1628     }
1629 
1630     for (const auto &pair : prog_.literalarrayTable) {
1631         Serialize(pair.first, pair.second, os);
1632     }
1633 
1634     os << "\n";
1635 }
1636 
SerializeRecords(std::ostream & os,bool addSeparators,bool printInformation) const1637 void Disassembler::SerializeRecords(std::ostream &os, bool addSeparators, bool printInformation) const
1638 {
1639     LOG(DEBUG, DISASSEMBLER) << "[serializing records]";
1640 
1641     if (prog_.recordTable.empty()) {
1642         return;
1643     }
1644 
1645     if (addSeparators) {
1646         os << "# ====================\n"
1647               "# RECORDS\n\n";
1648     }
1649 
1650     for (const auto &r : prog_.recordTable) {
1651         if (!panda_file::IsDummyClassName(r.first)) {
1652             Serialize(r.second, os, printInformation);
1653         } else {
1654             SerializeUnionFields(r.second, os, printInformation);
1655         }
1656     }
1657 }
1658 
SerializeMethods(std::ostream & os,bool addSeparators,bool printInformation) const1659 void Disassembler::SerializeMethods(std::ostream &os, bool addSeparators, bool printInformation) const
1660 {
1661     LOG(DEBUG, DISASSEMBLER) << "[serializing methods]";
1662 
1663     if (prog_.functionTable.empty()) {
1664         return;
1665     }
1666 
1667     if (addSeparators) {
1668         os << "# ====================\n"
1669               "# METHODS\n\n";
1670     }
1671 
1672     for (const auto &m : prog_.functionTable) {
1673         Serialize(m.second, os, printInformation);
1674     }
1675 }
1676 
BytecodeOpcodeToPandasmOpcode(uint8_t o) const1677 pandasm::Opcode Disassembler::BytecodeOpcodeToPandasmOpcode(uint8_t o) const
1678 {
1679     return BytecodeOpcodeToPandasmOpcode(BytecodeInstruction::Opcode(o));
1680 }
1681 
IDToString(BytecodeInstruction bcIns,panda_file::File::EntityId methodId) const1682 std::string Disassembler::IDToString(BytecodeInstruction bcIns, panda_file::File::EntityId methodId) const
1683 {
1684     std::stringstream name;
1685 
1686     if (bcIns.HasFlag(BytecodeInstruction::Flags::TYPE_ID)) {
1687         auto idx = bcIns.GetId().AsIndex();
1688         auto id = file_->ResolveClassIndex(methodId, idx);
1689         auto type = pandasm::Type::FromDescriptor(StringDataToString(file_->GetStringData(id)));
1690 
1691         name.str("");
1692         name << type.GetPandasmName();
1693     } else if (bcIns.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) {
1694         auto idx = bcIns.GetId().AsIndex();
1695         auto id = file_->ResolveMethodIndex(methodId, idx);
1696 
1697         name << GetMethodSignature(id);
1698     } else if (bcIns.HasFlag(BytecodeInstruction::Flags::STRING_ID)) {
1699         name << '\"';
1700 
1701         if (skipStrings_ || quiet_) {
1702             name << std::hex << "0x" << bcIns.GetId().AsFileId();
1703         } else {
1704             name << StringDataToString(file_->GetStringData(bcIns.GetId().AsFileId()));
1705         }
1706 
1707         name << '\"';
1708     } else if (bcIns.HasFlag(BytecodeInstruction::Flags::FIELD_ID)) {
1709         auto idx = bcIns.GetId().AsIndex();
1710         auto id = file_->ResolveFieldIndex(methodId, idx);
1711         panda_file::FieldDataAccessor fieldAccessor(*file_, id);
1712 
1713         auto recordName = GetFullRecordName(fieldAccessor.GetClassId());
1714         if (!panda_file::IsDummyClassName(recordName)) {
1715             name << recordName;
1716             name << '.';
1717         }
1718         name << StringDataToString(file_->GetStringData(fieldAccessor.GetNameId()));
1719     } else if (bcIns.HasFlag(BytecodeInstruction::Flags::LITERALARRAY_ID)) {
1720         auto index = bcIns.GetId().AsIndex();
1721         name << "array_" << index;
1722     }
1723 
1724     return name.str();
1725 }
1726 
GetRecordLanguage(panda_file::File::EntityId classId) const1727 ark::panda_file::SourceLang Disassembler::GetRecordLanguage(panda_file::File::EntityId classId) const
1728 {
1729     if (file_->IsExternal(classId)) {
1730         return ark::panda_file::SourceLang::PANDA_ASSEMBLY;
1731     }
1732 
1733     panda_file::ClassDataAccessor cda(*file_, classId);
1734     return cda.GetSourceLang().value_or(panda_file::SourceLang::PANDA_ASSEMBLY);
1735 }
1736 
TranslateImmToLabel(pandasm::Ins * paIns,LabelTable * labelTable,const uint8_t * insArr,BytecodeInstruction bcIns,BytecodeInstruction bcInsLast,panda_file::File::EntityId codeId)1737 static void TranslateImmToLabel(pandasm::Ins *paIns, LabelTable *labelTable, const uint8_t *insArr,
1738                                 BytecodeInstruction bcIns, BytecodeInstruction bcInsLast,
1739                                 panda_file::File::EntityId codeId)
1740 {
1741     const int32_t jmpOffset = std::get<int64_t>(paIns->imms.at(0));
1742     const auto bcInsDest = bcIns.JumpTo(jmpOffset);
1743     if (bcInsLast.GetAddress() > bcInsDest.GetAddress()) {
1744         size_t idx = GetBytecodeInstructionNumber(BytecodeInstruction(insArr), bcInsDest);
1745         if (idx != std::numeric_limits<size_t>::max()) {
1746             if (labelTable->find(idx) == labelTable->end()) {
1747                 std::stringstream ss {};
1748                 ss << "jump_label_" << labelTable->size();
1749                 (*labelTable)[idx] = ss.str();
1750             }
1751 
1752             paIns->imms.clear();
1753             paIns->ids.push_back(labelTable->at(idx));
1754         } else {
1755             LOG(ERROR, DISASSEMBLER) << "> error encountered at " << codeId << " (0x" << std::hex << codeId
1756                                      << "). incorrect instruction at offset: 0x" << (bcIns.GetAddress() - insArr)
1757                                      << ": invalid jump offset 0x" << jmpOffset
1758                                      << " - jumping in the middle of another instruction!";
1759         }
1760     } else {
1761         LOG(ERROR, DISASSEMBLER) << "> error encountered at " << codeId << " (0x" << std::hex << codeId
1762                                  << "). incorrect instruction at offset: 0x" << (bcIns.GetAddress() - insArr)
1763                                  << ": invalid jump offset 0x" << jmpOffset << " - jumping out of bounds!";
1764     }
1765 }
1766 
CollectExternalFields(const panda_file::FieldDataAccessor & fieldAccessor)1767 void Disassembler::CollectExternalFields(const panda_file::FieldDataAccessor &fieldAccessor)
1768 {
1769     auto recordName = GetFullRecordName(fieldAccessor.GetClassId());
1770 
1771     pandasm::Field field(fileLanguage_);
1772     GetField(field, fieldAccessor);
1773     if (field.name.empty()) {
1774         return;
1775     }
1776 
1777     auto &fieldList = externalFieldTable_[recordName];
1778     auto retField = std::find_if(fieldList.begin(), fieldList.end(),
1779                                  [&field](pandasm::Field &fieldFromList) { return field.name == fieldFromList.name; });
1780     if (retField == fieldList.end()) {
1781         fieldList.emplace_back(std::move(field));
1782 
1783         externalFieldsInfoTable_[recordName].emplace_back(GetFieldInfo(fieldAccessor));
1784     }
1785 }
1786 
GetInstructions(pandasm::Function * method,panda_file::File::EntityId methodId,panda_file::File::EntityId codeId)1787 IdList Disassembler::GetInstructions(pandasm::Function *method, panda_file::File::EntityId methodId,
1788                                      panda_file::File::EntityId codeId)
1789 {
1790     panda_file::CodeDataAccessor codeAccessor(*file_, codeId);
1791 
1792     const auto insSz = codeAccessor.GetCodeSize();
1793     const auto insArr = codeAccessor.GetInstructions();
1794 
1795     method->regsNum = codeAccessor.GetNumVregs();
1796 
1797     auto bcIns = BytecodeInstruction(insArr);
1798     auto from = bcIns.GetAddress();
1799     const auto bcInsLast = bcIns.JumpTo(insSz);
1800 
1801     LabelTable labelTable = GetExceptions(method, methodId, codeId);
1802 
1803     IdList unknownExternalMethods {};
1804 
1805     while (bcIns.GetAddress() != bcInsLast.GetAddress()) {
1806         if (bcIns.GetAddress() > bcInsLast.GetAddress()) {
1807             LOG(ERROR, DISASSEMBLER) << "> error encountered at " << codeId << " (0x" << std::hex << codeId
1808                                      << "). bytecode instructions sequence corrupted for method " << method->name
1809                                      << "! went out of bounds";
1810 
1811             break;
1812         }
1813 
1814         if (bcIns.HasFlag(BytecodeInstruction::Flags::FIELD_ID)) {
1815             auto idx = bcIns.GetId().AsIndex();
1816             auto id = file_->ResolveFieldIndex(methodId, idx);
1817             panda_file::FieldDataAccessor fieldAccessor(*file_, id);
1818 
1819             if (fieldAccessor.IsExternal()) {
1820                 CollectExternalFields(fieldAccessor);
1821             }
1822         }
1823 
1824         auto paIns = BytecodeInstructionToPandasmInstruction(bcIns, methodId);
1825         paIns.insDebug.boundLeft =
1826             bcIns.GetAddress() - from;  // It is used to produce a line table during method serialization
1827         if (paIns.IsJump()) {
1828             TranslateImmToLabel(&paIns, &labelTable, insArr, bcIns, bcInsLast, codeId);
1829         }
1830 
1831         // check if method id is unknown external method. if so, emplace it in table
1832         if (bcIns.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) {
1833             const auto argMethodIdx = bcIns.GetId().AsIndex();
1834             const auto argMethodId = file_->ResolveMethodIndex(methodId, argMethodIdx);
1835 
1836             const auto argMethodSignature = GetMethodSignature(argMethodId);
1837 
1838             const bool isPresent = prog_.functionTable.find(argMethodSignature) != prog_.functionTable.cend();
1839             const bool isExternal = file_->IsExternal(argMethodId);
1840             if (isExternal && !isPresent) {
1841                 unknownExternalMethods.push_back(argMethodId);
1842             }
1843         }
1844 
1845         method->ins.push_back(paIns);
1846         bcIns = bcIns.GetNext();
1847     }
1848 
1849     for (const auto &pair : labelTable) {
1850         method->ins[pair.first].label = pair.second;
1851         method->ins[pair.first].setLabel = true;
1852     }
1853 
1854     return unknownExternalMethods;
1855 }
1856 
1857 }  // namespace ark::disasm
1858