• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "disassembler.h"
17 #include "class_data_accessor.h"
18 #include "field_data_accessor.h"
19 #include "libpandafile/type_helper.h"
20 #include "literal_data_accessor.h"
21 #include "mangling.h"
22 #include "utils/logger.h"
23 
24 #include <cstdint>
25 #include <iomanip>
26 
27 #include "get_language_specific_metadata.inc"
28 
29 namespace ark::disasm {
30 
Disassemble(std::string_view filenameIn,const bool quiet,const bool skipStrings)31 void Disassembler::Disassemble(std::string_view filenameIn, const bool quiet, const bool skipStrings)
32 {
33     auto file = panda_file::File::Open(filenameIn);
34     if (file == nullptr) {
35         LOG(FATAL, DISASSEMBLER) << "> unable to open specified pandafile: <" << filenameIn << ">";
36     }
37 
38     Disassemble(file, quiet, skipStrings);
39 }
40 
Disassemble(const panda_file::File & file,const bool quiet,const bool skipStrings)41 void Disassembler::Disassemble(const panda_file::File &file, const bool quiet, const bool skipStrings)
42 {
43     SetFile(file);
44     DisassembleImpl(quiet, skipStrings);
45 }
46 
Disassemble(std::unique_ptr<const panda_file::File> & file,const bool quiet,const bool skipStrings)47 void Disassembler::Disassemble(std::unique_ptr<const panda_file::File> &file, const bool quiet, const bool skipStrings)
48 {
49     SetFile(file);
50     DisassembleImpl(quiet, skipStrings);
51 }
52 
DisassembleImpl(const bool quiet,const bool skipStrings)53 void Disassembler::DisassembleImpl(const bool quiet, const bool skipStrings)
54 {
55     prog_ = pandasm::Program {};
56 
57     recordNameToId_.clear();
58     methodNameToId_.clear();
59 
60     skipStrings_ = skipStrings;
61     quiet_ = quiet;
62 
63     progInfo_ = ProgInfo {};
64 
65     progAnn_ = ProgAnnotations {};
66 
67     GetLiteralArrays();
68     GetRecords();
69 
70     AddExternalFieldsToRecords();
71     GetLanguageSpecificMetadata();
72 }
73 
SetFile(std::unique_ptr<const panda_file::File> & file)74 void Disassembler::SetFile(std::unique_ptr<const panda_file::File> &file)
75 {
76     fileHolder_.swap(file);
77     file_ = fileHolder_.get();
78 }
79 
SetFile(const panda_file::File & file)80 void Disassembler::SetFile(const panda_file::File &file)
81 {
82     fileHolder_.reset();
83     file_ = &file;
84 }
85 
SetProfile(std::string_view fname)86 void Disassembler::SetProfile(std::string_view fname)
87 {
88     std::ifstream stm(fname.data(), std::ios::binary);
89     if (!stm.is_open()) {
90         LOG(FATAL, DISASSEMBLER) << "Cannot open profile file";
91     }
92 
93     auto res = profiling::ReadProfile(stm, fileLanguage_);
94     if (!res) {
95         LOG(FATAL, DISASSEMBLER) << "Failed to deserialize: " << res.Error();
96     }
97     profile_ = res.Value();
98 }
99 
GetInsInfo(panda_file::MethodDataAccessor & mda,const panda_file::File::EntityId & codeId,MethodInfo * methodInfo) const100 void Disassembler::GetInsInfo(panda_file::MethodDataAccessor &mda, const panda_file::File::EntityId &codeId,
101                               MethodInfo *methodInfo /* out */) const
102 {
103     const static size_t FORMAT_WIDTH = 20;
104     const static size_t INSTRUCTION_WIDTH = 2;
105 
106     panda_file::CodeDataAccessor codeAccessor(*file_, codeId);
107 
108     std::string methodName = mda.GetFullName();
109     auto prof = profiling::INVALID_PROFILE;
110     if (profile_ != profiling::INVALID_PROFILE) {
111         prof = profiling::FindMethodInProfile(profile_, fileLanguage_, methodName);
112     }
113 
114     auto insSz = codeAccessor.GetCodeSize();
115     auto insArr = codeAccessor.GetInstructions();
116 
117     auto bcIns = BytecodeInstruction(insArr);
118     auto bcInsLast = bcIns.JumpTo(insSz);
119 
120     while (bcIns.GetAddress() != bcInsLast.GetAddress()) {
121         std::stringstream ss;
122 
123         uintptr_t bc = bcIns.GetAddress() - BytecodeInstruction(insArr).GetAddress();
124         ss << "offset: 0x" << std::setfill('0') << std::setw(4U) << std::hex << bc;
125         ss << ", " << std::setfill('.');
126 
127         BytecodeInstruction::Format format = bcIns.GetFormat();
128 
129         auto formatStr = std::string("[") + BytecodeInstruction::GetFormatString(format) + ']';
130         ss << std::setw(FORMAT_WIDTH) << std::left << formatStr;
131 
132         ss << "[";
133 
134         const uint8_t *pc = bcIns.GetAddress();
135         const size_t sz = bcIns.GetSize();
136 
137         for (size_t i = 0; i < sz; i++) {
138             ss << "0x" << std::setw(INSTRUCTION_WIDTH) << std::setfill('0') << std::right << std::hex
139                << static_cast<int>(pc[i]);  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
140 
141             if (i != sz - 1) {
142                 ss << " ";
143             }
144         }
145 
146         ss << "]";
147 
148         if (profile_ != profiling::INVALID_PROFILE && prof != profiling::INVALID_PROFILE) {
149             auto profId = bcIns.GetProfileId();
150             if (profId != -1) {
151                 ss << ", Profile: ";
152                 profiling::DumpProfile(prof, fileLanguage_, &bcIns, ss);
153             }
154         }
155 
156         methodInfo->instructionsInfo.push_back(ss.str());
157 
158         bcIns = bcIns.GetNext();
159     }
160 }
161 
CollectInfo()162 void Disassembler::CollectInfo()
163 {
164     LOG(DEBUG, DISASSEMBLER) << "\n[getting program info]\n";
165 
166     debugInfoExtractor_ = std::make_unique<panda_file::DebugInfoExtractor>(file_);
167 
168     for (const auto &pair : recordNameToId_) {
169         GetRecordInfo(pair.second, &progInfo_.recordsInfo[pair.first]);
170     }
171 
172     for (const auto &pair : methodNameToId_) {
173         GetMethodInfo(pair.second, &progInfo_.methodsInfo[pair.first]);
174     }
175 
176     AddExternalFieldsInfoToRecords();
177 }
178 
Serialize(std::ostream & os,bool addSeparators,bool printInformation) const179 void Disassembler::Serialize(std::ostream &os, bool addSeparators, bool printInformation) const
180 {
181     if (os.bad()) {
182         LOG(DEBUG, DISASSEMBLER) << "> serialization failed. os bad\n";
183 
184         return;
185     }
186 
187     SerializeFilename(os);
188     SerializeLanguage(os);
189     SerializeLitArrays(os, addSeparators);
190     SerializeRecords(os, addSeparators, printInformation);
191     SerializeMethods(os, addSeparators, printInformation);
192 }
193 
SerializePrintStartInfo(const pandasm::Function & method,std::ostringstream & headerSs) const194 void Disassembler::SerializePrintStartInfo(const pandasm::Function &method, std::ostringstream &headerSs) const
195 {
196     headerSs << ".function " << method.returnType.GetPandasmName() << " " << method.name << "(";
197 
198     if (!method.params.empty()) {
199         headerSs << method.params[0].type.GetPandasmName() << " a0";
200 
201         for (size_t i = 1; i < method.params.size(); i++) {
202             headerSs << ", " << method.params[i].type.GetPandasmName() << " a" << (size_t)i;
203         }
204     }
205     headerSs << ")";
206 }
207 
SerializeCheckEnd(const pandasm::Function & method,std::ostream & os,bool printMethodInfo,const MethodInfo * & methodInfo) const208 void Disassembler::SerializeCheckEnd(const pandasm::Function &method, std::ostream &os, bool printMethodInfo,
209                                      const MethodInfo *&methodInfo) const
210 {
211     if (!method.catchBlocks.empty()) {
212         os << "\n";
213 
214         for (const auto &catchBlock : method.catchBlocks) {
215             Serialize(catchBlock, os);
216             os << "\n";
217         }
218     }
219 
220     if (printMethodInfo) {
221         ASSERT(methodInfo != nullptr);
222         SerializeLineNumberTable(methodInfo->lineNumberTable, os);
223         SerializeLocalVariableTable(methodInfo->localVariableTable, method, os);
224     }
225 
226     os << "}\n\n";
227 }
228 
SerializeIfPrintMethodInfo(const pandasm::Function & method,bool printMethodInfo,std::ostringstream & headerSs,const MethodInfo * & methodInfo,std::map<std::string,ark::disasm::MethodInfo>::const_iterator & methodInfoIt) const229 size_t Disassembler::SerializeIfPrintMethodInfo(
230     const pandasm::Function &method, bool printMethodInfo, std::ostringstream &headerSs, const MethodInfo *&methodInfo,
231     std::map<std::string, ark::disasm::MethodInfo>::const_iterator &methodInfoIt) const
232 {
233     size_t width = 0;
234     if (printMethodInfo) {
235         methodInfo = &methodInfoIt->second;
236 
237         for (const auto &i : method.ins) {
238             if (i.ToString().size() > width) {
239                 width = i.ToString().size();
240             }
241         }
242 
243         headerSs << " # " << methodInfo->methodInfo << "\n#   CODE:";
244     }
245 
246     headerSs << "\n";
247     return width;
248 }
249 
Serialize(const pandasm::Function & method,std::ostream & os,bool printInformation,panda_file::LineNumberTable * lineTable) const250 void Disassembler::Serialize(const pandasm::Function &method, std::ostream &os, bool printInformation,
251                              panda_file::LineNumberTable *lineTable) const
252 {
253     std::ostringstream headerSs;
254     SerializePrintStartInfo(method, headerSs);
255     const std::string signature = pandasm::GetFunctionSignatureFromName(method.name, method.params);
256     const auto methodIter = progAnn_.methodAnnotations.find(signature);
257     if (methodIter != progAnn_.methodAnnotations.end()) {
258         Serialize(*method.metadata, methodIter->second, headerSs);
259     } else {
260         Serialize(*method.metadata, {}, headerSs);
261     }
262 
263     if (!method.HasImplementation()) {
264         headerSs << "\n\n";
265         os << headerSs.str();
266         return;
267     }
268 
269     headerSs << " {";
270 
271     const MethodInfo *methodInfo = nullptr;
272     auto methodInfoIt = progInfo_.methodsInfo.find(signature);
273     bool printMethodInfo = printInformation && methodInfoIt != progInfo_.methodsInfo.end();
274     size_t width = SerializeIfPrintMethodInfo(method, printMethodInfo, headerSs, methodInfo, methodInfoIt);
275 
276     auto headerSsStr = headerSs.str();
277     size_t lineNumber = std::count(headerSsStr.begin(), headerSsStr.end(), '\n') + 1;
278 
279     os << headerSsStr;
280 
281     for (size_t i = 0; i < method.ins.size(); i++) {
282         std::ostringstream insSs;
283 
284         std::string ins = method.ins[i].ToString("", method.GetParamsNum() != 0, method.regsNum);
285         if (method.ins[i].setLabel) {
286             insSs << ins.substr(0, ins.find(": ")) << ":\n";
287             ins.erase(0, ins.find(": ") + std::string(": ").length());
288         }
289 
290         insSs << "\t";
291         if (printMethodInfo) {
292             insSs << std::setw(width) << std::left;
293         }
294         insSs << ins;
295         if (printMethodInfo) {
296             ASSERT(methodInfo != nullptr);
297             insSs << " # " << methodInfo->instructionsInfo[i];
298         }
299         insSs << "\n";
300 
301         auto insSsStr = insSs.str();
302         lineNumber += std::count(insSsStr.begin(), insSsStr.end(), '\n');
303 
304         if (lineTable != nullptr) {
305             lineTable->emplace_back(
306                 panda_file::LineTableEntry {static_cast<uint32_t>(method.ins[i].insDebug.boundLeft), lineNumber - 1});
307         }
308 
309         os << insSsStr;
310     }
311 
312     SerializeCheckEnd(method, os, printMethodInfo, methodInfo);
313 }
314 
IsSystemType(const std::string & typeName)315 inline bool Disassembler::IsSystemType(const std::string &typeName)
316 {
317     bool isArrayType = typeName.back() == ']';
318     bool isGlobal = typeName == "_GLOBAL";
319 
320     return isArrayType || isGlobal;
321 }
322 
GetRecord(pandasm::Record & record,const panda_file::File::EntityId & recordId)323 void Disassembler::GetRecord(pandasm::Record &record, const panda_file::File::EntityId &recordId)
324 {
325     LOG(DEBUG, DISASSEMBLER) << "\n[getting record]\nid: " << recordId << " (0x" << std::hex << recordId << ")";
326 
327     record.name = GetFullRecordName(recordId);
328 
329     LOG(DEBUG, DISASSEMBLER) << "name: " << record.name;
330 
331     GetMetaData(&record, recordId);
332 
333     if (!file_->IsExternal(recordId)) {
334         GetMethods(recordId);
335         GetFields(record, recordId);
336     }
337 }
338 
AddMethodToTables(const panda_file::File::EntityId & methodId)339 void Disassembler::AddMethodToTables(const panda_file::File::EntityId &methodId)
340 {
341     pandasm::Function newMethod("", fileLanguage_);
342     GetMethod(&newMethod, methodId);
343 
344     const auto signature = pandasm::GetFunctionSignatureFromName(newMethod.name, newMethod.params);
345     if (prog_.functionTable.find(signature) != prog_.functionTable.end()) {
346         return;
347     }
348 
349     methodNameToId_.emplace(signature, methodId);
350     prog_.functionSynonyms[newMethod.name].push_back(signature);
351     prog_.functionTable.emplace(signature, std::move(newMethod));
352 }
353 
GetMethod(pandasm::Function * method,const panda_file::File::EntityId & methodId)354 void Disassembler::GetMethod(pandasm::Function *method, const panda_file::File::EntityId &methodId)
355 {
356     LOG(DEBUG, DISASSEMBLER) << "\n[getting method]\nid: " << methodId << " (0x" << std::hex << methodId << ")";
357 
358     if (method == nullptr) {
359         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
360 
361         return;
362     }
363 
364     panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
365 
366     method->name = GetFullMethodName(methodId);
367 
368     LOG(DEBUG, DISASSEMBLER) << "name: " << method->name;
369 
370     GetParams(method, methodAccessor.GetProtoId());
371     GetMetaData(method, methodId);
372 
373     if (!method->HasImplementation()) {
374         return;
375     }
376 
377     if (methodAccessor.GetCodeId().has_value()) {
378         const IdList idList = GetInstructions(method, methodId, methodAccessor.GetCodeId().value());
379 
380         for (const auto &id : idList) {
381             AddMethodToTables(id);
382         }
383     } else {
384         LOG(ERROR, DISASSEMBLER) << "> error encountered at " << methodId << " (0x" << std::hex << methodId
385                                  << "). implementation of method expected, but no \'CODE\' tag was found!";
386 
387         return;
388     }
389 }
390 
391 template <typename T>
FillLiteralArrayData(pandasm::LiteralArray * litArray,const panda_file::LiteralTag & tag,const panda_file::LiteralDataAccessor::LiteralValue & value) const392 void Disassembler::FillLiteralArrayData(pandasm::LiteralArray *litArray, const panda_file::LiteralTag &tag,
393                                         const panda_file::LiteralDataAccessor::LiteralValue &value) const
394 {
395     panda_file::File::EntityId id(std::get<uint32_t>(value));
396     auto sp = file_->GetSpanFromId(id);
397     auto len = panda_file::helpers::Read<sizeof(uint32_t)>(&sp);
398     if (tag != panda_file::LiteralTag::ARRAY_STRING) {
399         for (size_t i = 0; i < len; i++) {
400             pandasm::LiteralArray::Literal lit;
401             lit.tag = tag;
402             lit.value = bit_cast<T>(panda_file::helpers::Read<sizeof(T)>(&sp));
403             litArray->literals.push_back(lit);
404         }
405     } else {
406         for (size_t i = 0; i < len; i++) {
407             auto strId = panda_file::helpers::Read<sizeof(T)>(&sp);
408             pandasm::LiteralArray::Literal lit;
409             lit.tag = tag;
410             lit.value = StringDataToString(file_->GetStringData(panda_file::File::EntityId(strId)));
411             litArray->literals.push_back(lit);
412         }
413     }
414 }
415 
FillLiteralData(pandasm::LiteralArray * litArray,const panda_file::LiteralDataAccessor::LiteralValue & value,const panda_file::LiteralTag & tag) const416 void Disassembler::FillLiteralData(pandasm::LiteralArray *litArray,
417                                    const panda_file::LiteralDataAccessor::LiteralValue &value,
418                                    const panda_file::LiteralTag &tag) const
419 {
420     pandasm::LiteralArray::Literal lit;
421     if (tag == panda_file::LiteralTag::TAGVALUE) {
422         return;
423     }
424     lit.tag = tag;
425     lit.value = ParseLiteralValue(value, tag);
426     litArray->literals.push_back(lit);
427 }
428 
ParseLiteralValue(const panda_file::LiteralDataAccessor::LiteralValue & value,const panda_file::LiteralTag & tag) const429 std::variant<bool, uint8_t, uint16_t, uint32_t, uint64_t, float, double, std::string> Disassembler::ParseLiteralValue(
430     const panda_file::LiteralDataAccessor::LiteralValue &value, const panda_file::LiteralTag &tag) const
431 {
432     switch (tag) {
433         case panda_file::LiteralTag::BOOL:
434             return std::get<bool>(value);
435         case panda_file::LiteralTag::ACCESSOR:
436         case panda_file::LiteralTag::NULLVALUE:
437             return std::get<uint8_t>(value);
438         case panda_file::LiteralTag::METHODAFFILIATE:
439             return std::get<uint16_t>(value);
440         case panda_file::LiteralTag::INTEGER:
441             return std::get<uint32_t>(value);
442         case panda_file::LiteralTag::BIGINT:
443             return std::get<uint64_t>(value);
444         case panda_file::LiteralTag::FLOAT:
445             return std::get<float>(value);
446         case panda_file::LiteralTag::DOUBLE:
447             return std::get<double>(value);
448         case panda_file::LiteralTag::STRING:
449         case panda_file::LiteralTag::METHOD:
450         case panda_file::LiteralTag::GENERATORMETHOD:
451             return ParseStringData(value);
452         case panda_file::LiteralTag::LITERALARRAY:
453             return ParseLiteralArrayData(value);
454         default:
455             LOG(ERROR, DISASSEMBLER) << "Unsupported literal with tag 0x" << std::hex << static_cast<uint32_t>(tag);
456             UNREACHABLE();
457     }
458 }
459 
ParseStringData(const panda_file::LiteralDataAccessor::LiteralValue & value) const460 std::string Disassembler::ParseStringData(const panda_file::LiteralDataAccessor::LiteralValue &value) const
461 {
462     auto strData = file_->GetStringData(panda_file::File::EntityId(std::get<uint32_t>(value)));
463     return StringDataToString(strData);
464 }
465 
ParseLiteralArrayData(const panda_file::LiteralDataAccessor::LiteralValue & value) const466 std::string Disassembler::ParseLiteralArrayData(const panda_file::LiteralDataAccessor::LiteralValue &value) const
467 {
468     std::stringstream ss;
469     ss << "0x" << std::hex << std::get<uint32_t>(value);
470     return ss.str();
471 }
472 
GetLiteralArrayByOffset(pandasm::LiteralArray * litArray,panda_file::File::EntityId offset) const473 void Disassembler::GetLiteralArrayByOffset(pandasm::LiteralArray *litArray, panda_file::File::EntityId offset) const
474 {
475     panda_file::LiteralDataAccessor litArrayAccessor(*file_, file_->GetLiteralArraysId());
476     auto processLiteralValue = [this, litArray](const panda_file::LiteralDataAccessor::LiteralValue &value,
477                                                 const panda_file::LiteralTag &tag) {
478         switch (tag) {
479             case panda_file::LiteralTag::ARRAY_U1: {
480                 FillLiteralArrayData<bool>(litArray, tag, value);
481                 break;
482             }
483             case panda_file::LiteralTag::ARRAY_I8:
484             case panda_file::LiteralTag::ARRAY_U8: {
485                 FillLiteralArrayData<uint8_t>(litArray, tag, value);
486                 break;
487             }
488             case panda_file::LiteralTag::ARRAY_I16:
489             case panda_file::LiteralTag::ARRAY_U16: {
490                 FillLiteralArrayData<uint16_t>(litArray, tag, value);
491                 break;
492             }
493             case panda_file::LiteralTag::ARRAY_I32:
494             case panda_file::LiteralTag::ARRAY_U32: {
495                 FillLiteralArrayData<uint32_t>(litArray, tag, value);
496                 break;
497             }
498             case panda_file::LiteralTag::ARRAY_I64:
499             case panda_file::LiteralTag::ARRAY_U64: {
500                 FillLiteralArrayData<uint64_t>(litArray, tag, value);
501                 break;
502             }
503             case panda_file::LiteralTag::ARRAY_F32: {
504                 FillLiteralArrayData<float>(litArray, tag, value);
505                 break;
506             }
507             case panda_file::LiteralTag::ARRAY_F64: {
508                 FillLiteralArrayData<double>(litArray, tag, value);
509                 break;
510             }
511             case panda_file::LiteralTag::ARRAY_STRING: {
512                 FillLiteralArrayData<uint32_t>(litArray, tag, value);
513                 break;
514             }
515             default: {
516                 FillLiteralData(litArray, value, tag);
517                 break;
518             }
519         }
520     };
521 
522     litArrayAccessor.EnumerateLiteralVals(offset, processLiteralValue);
523 }
524 
GetLiteralArray(pandasm::LiteralArray * litArray,const size_t index)525 void Disassembler::GetLiteralArray(pandasm::LiteralArray *litArray, const size_t index)
526 {
527     LOG(DEBUG, DISASSEMBLER) << "\n[getting literal array]\nindex: " << index;
528 
529     panda_file::LiteralDataAccessor litArrayAccessor(*file_, file_->GetLiteralArraysId());
530     GetLiteralArrayByOffset(litArray, litArrayAccessor.GetLiteralArrayId(index));
531 }
532 
GetLiteralArrays()533 void Disassembler::GetLiteralArrays()
534 {
535     const auto litArraysId = file_->GetLiteralArraysId();
536 
537     LOG(DEBUG, DISASSEMBLER) << "\n[getting literal arrays]\nid: " << litArraysId << " (0x" << std::hex << litArraysId
538                              << ")";
539 
540     panda_file::LiteralDataAccessor litArrayAccessor(*file_, litArraysId);
541     size_t numLitarrays = litArrayAccessor.GetLiteralNum();
542     for (size_t index = 0; index < numLitarrays; index++) {
543         ark::pandasm::LiteralArray litAr;
544         GetLiteralArray(&litAr, index);
545         prog_.literalarrayTable.emplace(std::to_string(index), litAr);
546     }
547 }
548 
GetRecords()549 void Disassembler::GetRecords()
550 {
551     LOG(DEBUG, DISASSEMBLER) << "\n[getting records]\n";
552 
553     const auto classIdx = file_->GetClasses();
554 
555     for (size_t i = 0; i < classIdx.size(); i++) {
556         uint32_t classId = classIdx[i];
557         auto classOff = file_->GetHeader()->classIdxOff + sizeof(uint32_t) * i;
558 
559         if (classId > file_->GetHeader()->fileSize) {
560             LOG(ERROR, DISASSEMBLER) << "> error encountered in record at " << classOff << " (0x" << std::hex
561                                      << classOff << "). binary file corrupted. record offset (0x" << classId
562                                      << ") out of bounds (0x" << file_->GetHeader()->fileSize << ")!";
563             break;
564         }
565 
566         const panda_file::File::EntityId recordId {classId};
567         auto language = GetRecordLanguage(recordId);
568         if (language != fileLanguage_) {
569             if (fileLanguage_ == panda_file::SourceLang::PANDA_ASSEMBLY) {
570                 fileLanguage_ = language;
571             } else if (language != panda_file::SourceLang::PANDA_ASSEMBLY) {
572                 LOG(ERROR, DISASSEMBLER) << "> possible error encountered in record at" << classOff << " (0x"
573                                          << std::hex << classOff << "). record's language  ("
574                                          << panda_file::LanguageToString(language)
575                                          << ")  differs from file's language ("
576                                          << panda_file::LanguageToString(fileLanguage_) << ")!";
577             }
578         }
579 
580         pandasm::Record record("", fileLanguage_);
581         GetRecord(record, recordId);
582 
583         if (prog_.recordTable.find(record.name) == prog_.recordTable.end()) {
584             recordNameToId_.emplace(record.name, recordId);
585             prog_.recordTable.emplace(record.name, std::move(record));
586         }
587     }
588 }
589 
GetField(pandasm::Field & field,const panda_file::FieldDataAccessor & fieldAccessor)590 void Disassembler::GetField(pandasm::Field &field, const panda_file::FieldDataAccessor &fieldAccessor)
591 {
592     panda_file::File::EntityId fieldNameId = fieldAccessor.GetNameId();
593     field.name = StringDataToString(file_->GetStringData(fieldNameId));
594 
595     uint32_t fieldType = fieldAccessor.GetType();
596     field.type = FieldTypeToPandasmType(fieldType);
597 
598     GetMetaData(&field, fieldAccessor.GetFieldId());
599 }
600 
GetFields(pandasm::Record & record,const panda_file::File::EntityId & recordId)601 void Disassembler::GetFields(pandasm::Record &record, const panda_file::File::EntityId &recordId)
602 {
603     panda_file::ClassDataAccessor classAccessor {*file_, recordId};
604 
605     classAccessor.EnumerateFields([&](panda_file::FieldDataAccessor &fieldAccessor) -> void {
606         pandasm::Field field(fileLanguage_);
607 
608         GetField(field, fieldAccessor);
609 
610         record.fieldList.push_back(std::move(field));
611     });
612 }
613 
AddExternalFieldsToRecords()614 void Disassembler::AddExternalFieldsToRecords()
615 {
616     for (auto &[recordName, record] : prog_.recordTable) {
617         auto iter = externalFieldTable_.find(recordName);
618         if (iter == externalFieldTable_.end() || iter->second.empty()) {
619             continue;
620         }
621         for (auto &fieldIter : iter->second) {
622             record.fieldList.push_back(std::move(fieldIter));
623         }
624         externalFieldTable_.erase(recordName);
625     }
626 }
627 
AddExternalFieldsInfoToRecords()628 void Disassembler::AddExternalFieldsInfoToRecords()
629 {
630     for (auto &[recordName, recordInfo] : progInfo_.recordsInfo) {
631         auto iter = externalFieldsInfoTable_.find(recordName);
632         if (iter == externalFieldsInfoTable_.end() || iter->second.empty()) {
633             continue;
634         }
635         for (auto &info : iter->second) {
636             recordInfo.fieldsInfo.push_back(std::move(info));
637         }
638         externalFieldsInfoTable_.erase(recordName);
639     }
640 }
641 
GetMethods(const panda_file::File::EntityId & recordId)642 void Disassembler::GetMethods(const panda_file::File::EntityId &recordId)
643 {
644     panda_file::ClassDataAccessor classAccessor {*file_, recordId};
645 
646     classAccessor.EnumerateMethods([&](panda_file::MethodDataAccessor &methodAccessor) -> void {
647         AddMethodToTables(methodAccessor.GetMethodId());
648     });
649 }
650 
GetParams(pandasm::Function * method,const panda_file::File::EntityId & protoId) const651 void Disassembler::GetParams(pandasm::Function *method, const panda_file::File::EntityId &protoId) const
652 {
653     /// frame size - 2^16 - 1
654     static const uint32_t MAX_ARG_NUM = 0xFFFF;
655 
656     LOG(DEBUG, DISASSEMBLER) << "[getting params]\nproto id: " << protoId << " (0x" << std::hex << protoId << ")";
657 
658     if (method == nullptr) {
659         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
660 
661         return;
662     }
663 
664     panda_file::ProtoDataAccessor protoAccessor(*file_, protoId);
665 
666     if (protoAccessor.GetNumArgs() > MAX_ARG_NUM) {
667         LOG(ERROR, DISASSEMBLER) << "> error encountered at " << protoId << " (0x" << std::hex << protoId
668                                  << "). number of function's arguments (" << std::dec << protoAccessor.GetNumArgs()
669                                  << ") exceeds MAX_ARG_NUM (" << MAX_ARG_NUM << ") !";
670 
671         return;
672     }
673 
674     size_t refIdx = 0;
675     method->returnType = PFTypeToPandasmType(protoAccessor.GetReturnType(), protoAccessor, refIdx);
676 
677     for (size_t i = 0; i < protoAccessor.GetNumArgs(); i++) {
678         auto argType = PFTypeToPandasmType(protoAccessor.GetArgType(i), protoAccessor, refIdx);
679         method->params.emplace_back(argType, fileLanguage_);
680     }
681 }
682 
GetExceptions(pandasm::Function * method,panda_file::File::EntityId methodId,panda_file::File::EntityId codeId) const683 LabelTable Disassembler::GetExceptions(pandasm::Function *method, panda_file::File::EntityId methodId,
684                                        panda_file::File::EntityId codeId) const
685 {
686     LOG(DEBUG, DISASSEMBLER) << "[getting exceptions]\ncode id: " << codeId << " (0x" << std::hex << codeId << ")";
687 
688     if (method == nullptr) {
689         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!\n";
690         return LabelTable {};
691     }
692 
693     panda_file::CodeDataAccessor codeAccessor(*file_, codeId);
694 
695     const auto bcIns = BytecodeInstruction(codeAccessor.GetInstructions());
696     const auto bcInsLast = bcIns.JumpTo(codeAccessor.GetCodeSize());
697 
698     size_t tryIdx = 0;
699     LabelTable labelTable {};
700     codeAccessor.EnumerateTryBlocks([&](panda_file::CodeDataAccessor::TryBlock &tryBlock) {
701         pandasm::Function::CatchBlock catchBlockPa {};
702         if (!LocateTryBlock(bcIns, bcInsLast, tryBlock, &catchBlockPa, &labelTable, tryIdx)) {
703             return false;
704         }
705         size_t catchIdx = 0;
706         tryBlock.EnumerateCatchBlocks([&](panda_file::CodeDataAccessor::CatchBlock &catchBlock) {
707             auto classIdx = catchBlock.GetTypeIdx();
708             if (classIdx == panda_file::INVALID_INDEX) {
709                 catchBlockPa.exceptionRecord = "";
710             } else {
711                 const auto classId = file_->ResolveClassIndex(methodId, classIdx);
712                 catchBlockPa.exceptionRecord = GetFullRecordName(classId);
713             }
714             if (!LocateCatchBlock(bcIns, bcInsLast, catchBlock, &catchBlockPa, &labelTable, tryIdx, catchIdx)) {
715                 return false;
716             }
717 
718             method->catchBlocks.push_back(catchBlockPa);
719             catchBlockPa.catchBeginLabel = "";
720             catchBlockPa.catchEndLabel = "";
721             catchIdx++;
722 
723             return true;
724         });
725         tryIdx++;
726 
727         return true;
728     });
729 
730     return labelTable;
731 }
732 
GetBytecodeInstructionNumber(BytecodeInstruction bcInsFirst,BytecodeInstruction bcInsCur)733 static size_t GetBytecodeInstructionNumber(BytecodeInstruction bcInsFirst, BytecodeInstruction bcInsCur)
734 {
735     size_t count = 0;
736 
737     while (bcInsFirst.GetAddress() != bcInsCur.GetAddress()) {
738         count++;
739         bcInsFirst = bcInsFirst.GetNext();
740         if (bcInsFirst.GetAddress() > bcInsCur.GetAddress()) {
741             return std::numeric_limits<size_t>::max();
742         }
743     }
744 
745     return count;
746 }
747 
748 // CC-OFFNXT(G.FUN.01) solid logic
LocateTryBlock(const BytecodeInstruction & bcIns,const BytecodeInstruction & bcInsLast,const panda_file::CodeDataAccessor::TryBlock & tryBlock,pandasm::Function::CatchBlock * catchBlockPa,LabelTable * labelTable,size_t tryIdx) const749 bool Disassembler::LocateTryBlock(const BytecodeInstruction &bcIns, const BytecodeInstruction &bcInsLast,
750                                   const panda_file::CodeDataAccessor::TryBlock &tryBlock,
751                                   pandasm::Function::CatchBlock *catchBlockPa, LabelTable *labelTable,
752                                   size_t tryIdx) const
753 {
754     const auto tryBeginBcIns = bcIns.JumpTo(tryBlock.GetStartPc());
755     const auto tryEndBcIns = bcIns.JumpTo(tryBlock.GetStartPc() + tryBlock.GetLength());
756 
757     const size_t tryBeginIdx = GetBytecodeInstructionNumber(bcIns, tryBeginBcIns);
758     const size_t tryEndIdx = GetBytecodeInstructionNumber(bcIns, tryEndBcIns);
759 
760     const bool tryBeginOffsetInRange = bcInsLast.GetAddress() > tryBeginBcIns.GetAddress();
761     const bool tryEndOffsetInRange = bcInsLast.GetAddress() >= tryEndBcIns.GetAddress();
762     const bool tryBeginOffsetValid = tryBeginIdx != std::numeric_limits<size_t>::max();
763     const bool tryEndOffsetValid = tryEndIdx != std::numeric_limits<size_t>::max();
764 
765     if (!tryBeginOffsetInRange || !tryBeginOffsetValid) {
766         LOG(ERROR, DISASSEMBLER) << "> invalid try block begin offset! address is: 0x" << std::hex
767                                  << tryBeginBcIns.GetAddress();
768         return false;
769     }
770 
771     auto itBegin = labelTable->find(tryBeginIdx);
772     if (itBegin == labelTable->end()) {
773         std::stringstream ss {};
774         ss << "try_begin_label_" << tryIdx;
775         catchBlockPa->tryBeginLabel = ss.str();
776         labelTable->insert(std::pair<size_t, std::string>(tryBeginIdx, ss.str()));
777     } else {
778         catchBlockPa->tryBeginLabel = itBegin->second;
779     }
780 
781     if (!tryEndOffsetInRange || !tryEndOffsetValid) {
782         LOG(ERROR, DISASSEMBLER) << "> invalid try block end offset! address is: 0x" << std::hex
783                                  << tryEndBcIns.GetAddress();
784         return false;
785     }
786 
787     auto itEnd = labelTable->find(tryEndIdx);
788     if (itEnd == labelTable->end()) {
789         std::stringstream ss {};
790         ss << "try_end_label_" << tryIdx;
791         catchBlockPa->tryEndLabel = ss.str();
792         labelTable->insert(std::pair<size_t, std::string>(tryEndIdx, ss.str()));
793     } else {
794         catchBlockPa->tryEndLabel = itEnd->second;
795     }
796 
797     return true;
798 }
799 
LocateCatchBlock(const BytecodeInstruction & bcIns,const BytecodeInstruction & bcInsLast,const panda_file::CodeDataAccessor::CatchBlock & catchBlock,pandasm::Function::CatchBlock * catchBlockPa,LabelTable * labelTable,size_t tryIdx,size_t catchIdx) const800 bool Disassembler::LocateCatchBlock(const BytecodeInstruction &bcIns, const BytecodeInstruction &bcInsLast,
801                                     const panda_file::CodeDataAccessor::CatchBlock &catchBlock,
802                                     pandasm::Function::CatchBlock *catchBlockPa, LabelTable *labelTable, size_t tryIdx,
803                                     size_t catchIdx) const
804 {
805     const auto handlerBeginOffset = catchBlock.GetHandlerPc();
806     const auto handlerEndOffset = handlerBeginOffset + catchBlock.GetCodeSize();
807 
808     const auto handlerBeginBcIns = bcIns.JumpTo(handlerBeginOffset);
809     const auto handlerEndBcIns = bcIns.JumpTo(handlerEndOffset);
810 
811     const size_t handlerBeginIdx = GetBytecodeInstructionNumber(bcIns, handlerBeginBcIns);
812     const size_t handlerEndIdx = GetBytecodeInstructionNumber(bcIns, handlerEndBcIns);
813 
814     const bool handlerBeginOffsetInRange = bcInsLast.GetAddress() > handlerBeginBcIns.GetAddress();
815     const bool handlerEndOffsetInRange = bcInsLast.GetAddress() > handlerEndBcIns.GetAddress();
816     const bool handlerEndPresent = catchBlock.GetCodeSize() != 0;
817     const bool handlerBeginOffsetValid = handlerBeginIdx != std::numeric_limits<size_t>::max();
818     const bool handlerEndOffsetValid = handlerEndIdx != std::numeric_limits<size_t>::max();
819 
820     if (!handlerBeginOffsetInRange || !handlerBeginOffsetValid) {
821         LOG(ERROR, DISASSEMBLER) << "> invalid catch block begin offset! address is: 0x" << std::hex
822                                  << handlerBeginBcIns.GetAddress();
823         return false;
824     }
825 
826     auto itBegin = labelTable->find(handlerBeginIdx);
827     if (itBegin == labelTable->end()) {
828         std::stringstream ss {};
829         ss << "handler_begin_label_" << tryIdx << "_" << catchIdx;
830         catchBlockPa->catchBeginLabel = ss.str();
831         labelTable->insert(std::pair<size_t, std::string>(handlerBeginIdx, ss.str()));
832     } else {
833         catchBlockPa->catchBeginLabel = itBegin->second;
834     }
835 
836     if (!handlerEndOffsetInRange || !handlerEndOffsetValid) {
837         LOG(ERROR, DISASSEMBLER) << "> invalid catch block end offset! address is: 0x" << std::hex
838                                  << handlerEndBcIns.GetAddress();
839         return false;
840     }
841 
842     if (handlerEndPresent) {
843         auto itEnd = labelTable->find(handlerEndIdx);
844         if (itEnd == labelTable->end()) {
845             std::stringstream ss {};
846             ss << "handler_end_label_" << tryIdx << "_" << catchIdx;
847             catchBlockPa->catchEndLabel = ss.str();
848             labelTable->insert(std::pair<size_t, std::string>(handlerEndIdx, ss.str()));
849         } else {
850             catchBlockPa->catchEndLabel = itEnd->second;
851         }
852     }
853 
854     return true;
855 }
856 
857 template <typename T>
SetEntityAttribute(T * entity,const std::function<bool ()> & shouldSet,std::string_view attribute)858 static void SetEntityAttribute(T *entity, const std::function<bool()> &shouldSet, std::string_view attribute)
859 {
860     if (shouldSet()) {
861         auto err = entity->metadata->SetAttribute(attribute);
862         if (err.has_value()) {
863             LOG(ERROR, DISASSEMBLER) << err.value().GetMessage();
864         }
865     }
866 }
867 
868 template <typename T>
SetEntityAttributeValue(T * entity,const std::function<bool ()> & shouldSet,std::string_view attribute,const char * value)869 static void SetEntityAttributeValue(T *entity, const std::function<bool()> &shouldSet, std::string_view attribute,
870                                     const char *value)
871 {
872     if (shouldSet()) {
873         auto err = entity->metadata->SetAttributeValue(attribute, value);
874         if (err.has_value()) {
875             LOG(ERROR, DISASSEMBLER) << err.value().GetMessage();
876         }
877     }
878 }
879 
GetMetaData(pandasm::Function * method,const panda_file::File::EntityId & methodId) const880 void Disassembler::GetMetaData(pandasm::Function *method, const panda_file::File::EntityId &methodId) const
881 {
882     LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nmethod id: " << methodId << " (0x" << std::hex << methodId << ")";
883 
884     if (method == nullptr) {
885         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
886 
887         return;
888     }
889 
890     panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
891 
892     const auto methodNameRaw = StringDataToString(file_->GetStringData(methodAccessor.GetNameId()));
893 
894     if (!methodAccessor.IsStatic()) {
895         const auto className = StringDataToString(file_->GetStringData(methodAccessor.GetClassId()));
896         auto thisType = pandasm::Type::FromDescriptor(className);
897 
898         LOG(DEBUG, DISASSEMBLER) << "method (raw: \'" << methodNameRaw
899                                  << "\') is not static. emplacing self-argument of type " << thisType.GetName();
900 
901         method->params.insert(method->params.begin(), pandasm::Function::Parameter(thisType, fileLanguage_));
902     }
903     SetEntityAttribute(
904         method, [&methodAccessor]() { return methodAccessor.IsStatic(); }, "static");
905 
906     SetEntityAttribute(
907         method, [this, &methodAccessor]() { return file_->IsExternal(methodAccessor.GetMethodId()); }, "external");
908 
909     SetEntityAttribute(
910         method, [&methodAccessor]() { return methodAccessor.IsNative(); }, "native");
911 
912     SetEntityAttribute(
913         method, [&methodAccessor]() { return methodAccessor.IsAbstract(); }, "noimpl");
914 
915     SetEntityAttribute(
916         method, [&methodAccessor]() { return methodAccessor.IsVarArgs(); }, "varargs");
917 
918     SetEntityAttributeValue(
919         method, [&methodAccessor]() { return methodAccessor.IsPublic(); }, "access.function", "public");
920 
921     SetEntityAttributeValue(
922         method, [&methodAccessor]() { return methodAccessor.IsProtected(); }, "access.function", "protected");
923 
924     SetEntityAttributeValue(
925         method, [&methodAccessor]() { return methodAccessor.IsPrivate(); }, "access.function", "private");
926 
927     SetEntityAttribute(
928         method, [&methodAccessor]() { return methodAccessor.IsFinal(); }, "final");
929 
930     std::string ctorName = ark::panda_file::GetCtorName(fileLanguage_);
931     std::string cctorName = ark::panda_file::GetCctorName(fileLanguage_);
932 
933     const bool isCtor = (methodNameRaw == ctorName);
934     const bool isCctor = (methodNameRaw == cctorName);
935 
936     if (isCtor) {
937         method->metadata->SetAttribute("ctor");
938         method->name.replace(method->name.find(ctorName), ctorName.length(), "_ctor_");
939     } else if (isCctor) {
940         method->metadata->SetAttribute("cctor");
941         method->name.replace(method->name.find(cctorName), cctorName.length(), "_cctor_");
942     }
943 }
944 
GetMetaData(pandasm::Record * record,const panda_file::File::EntityId & recordId) const945 void Disassembler::GetMetaData(pandasm::Record *record, const panda_file::File::EntityId &recordId) const
946 {
947     LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nrecord id: " << recordId << " (0x" << std::hex << recordId << ")";
948 
949     if (record == nullptr) {
950         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but record ptr expected!";
951 
952         return;
953     }
954 
955     SetEntityAttribute(
956         record, [this, recordId]() { return file_->IsExternal(recordId); }, "external");
957 
958     auto external = file_->IsExternal(recordId);
959     if (!external) {
960         auto cda = panda_file::ClassDataAccessor {*file_, recordId};
961         SetEntityAttributeValue(
962             record, [&cda]() { return cda.IsPublic(); }, "access.record", "public");
963 
964         SetEntityAttributeValue(
965             record, [&cda]() { return cda.IsProtected(); }, "access.record", "protected");
966 
967         SetEntityAttributeValue(
968             record, [&cda]() { return cda.IsPrivate(); }, "access.record", "private");
969 
970         SetEntityAttribute(
971             record, [&cda]() { return cda.IsFinal(); }, "final");
972     }
973 }
974 
975 template <typename T, pandasm::Value::Type VALUE_TYPE>
SetMetadata(panda_file::FieldDataAccessor & accessor,pandasm::Field * field) const976 void Disassembler::SetMetadata(panda_file::FieldDataAccessor &accessor, pandasm::Field *field) const
977 {
978     std::optional<T> val = accessor.GetValue<T>();
979     if (val.has_value()) {
980         field->metadata->SetValue(pandasm::ScalarValue::Create<VALUE_TYPE>(val.value()));
981     }
982 }
983 
GetMetadataFieldValue(panda_file::FieldDataAccessor & fieldAccessor,pandasm::Field * field) const984 void Disassembler::GetMetadataFieldValue(panda_file::FieldDataAccessor &fieldAccessor, pandasm::Field *field) const
985 {
986     static const std::unordered_map<panda_file::Type::TypeId,
987                                     std::function<void(panda_file::FieldDataAccessor &, pandasm::Field *)>>
988         HANDLERS = {
989             {panda_file::Type::TypeId::U1,
990              [this](auto &accessor, auto *f) { SetMetadata<bool, pandasm::Value::Type::U1>(accessor, f); }},
991             {panda_file::Type::TypeId::U8,
992              [this](auto &accessor, auto *f) { SetMetadata<uint8_t, pandasm::Value::Type::U8>(accessor, f); }},
993             {panda_file::Type::TypeId::U16,
994              [this](auto &accessor, auto *f) { SetMetadata<uint16_t, pandasm::Value::Type::U16>(accessor, f); }},
995             {panda_file::Type::TypeId::U32,
996              [this](auto &accessor, auto *f) { SetMetadata<uint32_t, pandasm::Value::Type::U32>(accessor, f); }},
997             {panda_file::Type::TypeId::F64,
998              [this](auto &accessor, auto *f) { SetMetadata<double, pandasm::Value::Type::F64>(accessor, f); }},
999             {panda_file::Type::TypeId::I8,
1000              [this](auto &accessor, auto *f) { SetMetadata<int8_t, pandasm::Value::Type::I8>(accessor, f); }},
1001             {panda_file::Type::TypeId::I16,
1002              [this](auto &accessor, auto *f) { SetMetadata<int16_t, pandasm::Value::Type::I16>(accessor, f); }},
1003             {panda_file::Type::TypeId::I32,
1004              [this](auto &accessor, auto *f) { SetMetadata<int32_t, pandasm::Value::Type::I32>(accessor, f); }},
1005             {panda_file::Type::TypeId::I64,
1006              [this](auto &accessor, auto *f) { SetMetadata<int64_t, pandasm::Value::Type::I64>(accessor, f); }},
1007         };
1008 
1009     auto it = HANDLERS.find(field->type.GetId());
1010     if (it != HANDLERS.end()) {
1011         it->second(fieldAccessor, field);
1012     } else if (field->type.GetId() == panda_file::Type::TypeId::REFERENCE &&
1013                field->type.GetName() == "std/core/String") {
1014         std::optional<uint32_t> stringOffsetVal = fieldAccessor.GetValue<uint32_t>();
1015         if (stringOffsetVal.has_value()) {
1016             std::string_view val {reinterpret_cast<const char *>(
1017                 file_->GetStringData(panda_file::File::EntityId(stringOffsetVal.value())).data)};
1018             field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::STRING>(val));
1019         }
1020     } else if (field->type.GetRank() > 0) {
1021         std::optional<uint32_t> litarrayOffsetVal = fieldAccessor.GetValue<uint32_t>();
1022         if (litarrayOffsetVal.has_value()) {
1023             field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::LITERALARRAY>(
1024                 std::string_view {std::to_string(litarrayOffsetVal.value())}));
1025         }
1026     }
1027 }
1028 
GetMetaData(pandasm::Field * field,const panda_file::File::EntityId & fieldId) const1029 void Disassembler::GetMetaData(pandasm::Field *field, const panda_file::File::EntityId &fieldId) const
1030 {
1031     LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nfield id: " << fieldId << " (0x" << std::hex << fieldId << ")";
1032 
1033     if (field == nullptr) {
1034         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
1035 
1036         return;
1037     }
1038 
1039     panda_file::FieldDataAccessor fieldAccessor(*file_, fieldId);
1040 
1041     SetEntityAttribute(
1042         field, [&fieldAccessor]() { return fieldAccessor.IsExternal(); }, "external");
1043 
1044     SetEntityAttribute(
1045         field, [&fieldAccessor]() { return fieldAccessor.IsStatic(); }, "static");
1046 
1047     SetEntityAttributeValue(
1048         field, [&fieldAccessor]() { return fieldAccessor.IsPublic(); }, "access.field", "public");
1049 
1050     SetEntityAttributeValue(
1051         field, [&fieldAccessor]() { return fieldAccessor.IsProtected(); }, "access.field", "protected");
1052 
1053     SetEntityAttributeValue(
1054         field, [&fieldAccessor]() { return fieldAccessor.IsPrivate(); }, "access.field", "private");
1055 
1056     SetEntityAttribute(
1057         field, [&fieldAccessor]() { return fieldAccessor.IsFinal(); }, "final");
1058     GetMetadataFieldValue(fieldAccessor, field);
1059 }
1060 
AnnotationTagToString(const char tag) const1061 std::string Disassembler::AnnotationTagToString(const char tag) const
1062 {
1063     static const std::unordered_map<char, std::string> TAG_TO_STRING = {{'1', "u1"},
1064                                                                         {'2', "i8"},
1065                                                                         {'3', "u8"},
1066                                                                         {'4', "i16"},
1067                                                                         {'5', "u16"},
1068                                                                         {'6', "i32"},
1069                                                                         {'7', "u32"},
1070                                                                         {'8', "i64"},
1071                                                                         {'9', "u64"},
1072                                                                         {'A', "f32"},
1073                                                                         {'B', "f64"},
1074                                                                         {'C', "string"},
1075                                                                         {'D', "record"},
1076                                                                         {'E', "method"},
1077                                                                         {'F', "enum"},
1078                                                                         {'G', "annotation"},
1079                                                                         {'J', "method_handle"},
1080                                                                         {'H', "array"},
1081                                                                         {'K', "u1[]"},
1082                                                                         {'L', "i8[]"},
1083                                                                         {'M', "u8[]"},
1084                                                                         {'N', "i16[]"},
1085                                                                         {'O', "u16[]"},
1086                                                                         {'P', "i32[]"},
1087                                                                         {'Q', "u32[]"},
1088                                                                         {'R', "i64[]"},
1089                                                                         {'S', "u64[]"},
1090                                                                         {'T', "f32[]"},
1091                                                                         {'U', "f64[]"},
1092                                                                         {'V', "string[]"},
1093                                                                         {'W', "record[]"},
1094                                                                         {'X', "method[]"},
1095                                                                         {'Y', "enum[]"},
1096                                                                         {'Z', "annotation[]"},
1097                                                                         {'@', "method_handle[]"},
1098                                                                         {'*', "nullptr_string"}};
1099 
1100     return TAG_TO_STRING.at(tag);
1101 }
1102 
ScalarValueToString(const panda_file::ScalarValue & value,const std::string & type)1103 std::string Disassembler::ScalarValueToString(const panda_file::ScalarValue &value, const std::string &type)
1104 {
1105     std::stringstream ss;
1106 
1107     if (type == "i8") {
1108         auto res = value.Get<int8_t>();
1109         ss << static_cast<int>(res);
1110     } else if (type == "u1" || type == "u8") {
1111         auto res = value.Get<uint8_t>();
1112         ss << static_cast<unsigned int>(res);
1113     } else if (type == "i16") {
1114         ss << value.Get<int16_t>();
1115     } else if (type == "u16") {
1116         ss << value.Get<uint16_t>();
1117     } else if (type == "i32") {
1118         ss << value.Get<int32_t>();
1119     } else if (type == "u32") {
1120         ss << value.Get<uint32_t>();
1121     } else if (type == "i64") {
1122         ss << value.Get<int64_t>();
1123     } else if (type == "u64") {
1124         ss << value.Get<uint64_t>();
1125     } else if (type == "f32") {
1126         ss << value.Get<float>();
1127     } else if (type == "f64") {
1128         ss << value.Get<double>();
1129     } else if (type == "string") {
1130         const auto id = value.Get<panda_file::File::EntityId>();
1131         ss << "\"" << StringDataToString(file_->GetStringData(id)) << "\"";
1132     } else if (type == "record") {
1133         const auto id = value.Get<panda_file::File::EntityId>();
1134         ss << GetFullRecordName(id);
1135     } else if (type == "method") {
1136         const auto id = value.Get<panda_file::File::EntityId>();
1137         AddMethodToTables(id);
1138         ss << GetMethodSignature(id);
1139     } else if (type == "enum") {
1140         const auto id = value.Get<panda_file::File::EntityId>();
1141         panda_file::FieldDataAccessor fieldAccessor(*file_, id);
1142         ss << GetFullRecordName(fieldAccessor.GetClassId()) << "."
1143            << StringDataToString(file_->GetStringData(fieldAccessor.GetNameId()));
1144     } else if (type == "annotation") {
1145         const auto id = value.Get<panda_file::File::EntityId>();
1146         ss << "id_" << id;
1147     } else if (type == "void") {
1148         return std::string();
1149     } else if (type == "method_handle") {
1150     } else if (type == "nullptr_string") {
1151         ss << static_cast<uint32_t>(0);
1152     }
1153 
1154     return ss.str();
1155 }
1156 
ArrayValueToString(const panda_file::ArrayValue & value,const std::string & type,const size_t idx)1157 std::string Disassembler::ArrayValueToString(const panda_file::ArrayValue &value, const std::string &type,
1158                                              const size_t idx)
1159 {
1160     std::stringstream ss;
1161 
1162     if (type == "i8") {
1163         auto res = value.Get<int8_t>(idx);
1164         ss << static_cast<int>(res);
1165     } else if (type == "u1" || type == "u8") {
1166         auto res = value.Get<uint8_t>(idx);
1167         ss << static_cast<unsigned int>(res);
1168     } else if (type == "i16") {
1169         ss << (value.Get<int16_t>(idx));
1170     } else if (type == "u16") {
1171         ss << (value.Get<uint16_t>(idx));
1172     } else if (type == "i32") {
1173         ss << (value.Get<int32_t>(idx));
1174     } else if (type == "u32") {
1175         ss << (value.Get<uint32_t>(idx));
1176     } else if (type == "i64") {
1177         ss << (value.Get<int64_t>(idx));
1178     } else if (type == "u64") {
1179         ss << (value.Get<uint64_t>(idx));
1180     } else if (type == "f32") {
1181         ss << value.Get<float>(idx);
1182     } else if (type == "f64") {
1183         ss << value.Get<double>(idx);
1184     } else if (type == "string") {
1185         const auto id = value.Get<panda_file::File::EntityId>(idx);
1186         ss << '\"' << StringDataToString(file_->GetStringData(id)) << '\"';
1187     } else if (type == "record") {
1188         const auto id = value.Get<panda_file::File::EntityId>(idx);
1189         ss << GetFullRecordName(id);
1190     } else if (type == "method") {
1191         const auto id = value.Get<panda_file::File::EntityId>(idx);
1192         AddMethodToTables(id);
1193         ss << GetMethodSignature(id);
1194     } else if (type == "enum") {
1195         const auto id = value.Get<panda_file::File::EntityId>(idx);
1196         panda_file::FieldDataAccessor fieldAccessor(*file_, id);
1197         ss << GetFullRecordName(fieldAccessor.GetClassId()) << "."
1198            << StringDataToString(file_->GetStringData(fieldAccessor.GetNameId()));
1199     } else if (type == "annotation") {
1200         const auto id = value.Get<panda_file::File::EntityId>(idx);
1201         ss << "id_" << id;
1202     } else if (type == "method_handle") {
1203     } else if (type == "nullptr_string") {
1204     }
1205 
1206     return ss.str();
1207 }
1208 
GetFullMethodName(const panda_file::File::EntityId & methodId) const1209 std::string Disassembler::GetFullMethodName(const panda_file::File::EntityId &methodId) const
1210 {
1211     ark::panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
1212 
1213     const auto methodNameRaw = StringDataToString(file_->GetStringData(methodAccessor.GetNameId()));
1214 
1215     std::string className = GetFullRecordName(methodAccessor.GetClassId());
1216     if (IsSystemType(className)) {
1217         className = "";
1218     } else {
1219         className += ".";
1220     }
1221 
1222     return className + methodNameRaw;
1223 }
1224 
GetMethodSignature(const panda_file::File::EntityId & methodId) const1225 std::string Disassembler::GetMethodSignature(const panda_file::File::EntityId &methodId) const
1226 {
1227     ark::panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
1228 
1229     pandasm::Function method(GetFullMethodName(methodId), fileLanguage_);
1230     GetParams(&method, methodAccessor.GetProtoId());
1231     GetMetaData(&method, methodId);
1232 
1233     return pandasm::GetFunctionSignatureFromName(method.name, method.params);
1234 }
1235 
GetFullRecordName(const panda_file::File::EntityId & classId) const1236 std::string Disassembler::GetFullRecordName(const panda_file::File::EntityId &classId) const
1237 {
1238     std::string name = StringDataToString(file_->GetStringData(classId));
1239 
1240     auto type = pandasm::Type::FromDescriptor(name);
1241     type = pandasm::Type(type.GetComponentName(), type.GetRank());
1242 
1243     return type.GetPandasmName();
1244 }
1245 
1246 static constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
1247 
GetFieldInfo(const panda_file::FieldDataAccessor & fieldAccessor,std::stringstream & ss)1248 static void GetFieldInfo(const panda_file::FieldDataAccessor &fieldAccessor, std::stringstream &ss)
1249 {
1250     ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex << fieldAccessor.GetFieldId()
1251        << ", type: 0x" << fieldAccessor.GetType();
1252 }
1253 
GetFieldInfo(const panda_file::FieldDataAccessor & fieldAccessor)1254 static std::string GetFieldInfo(const panda_file::FieldDataAccessor &fieldAccessor)
1255 {
1256     std::stringstream ss;
1257     ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex << fieldAccessor.GetFieldId()
1258        << ", type: 0x" << fieldAccessor.GetType();
1259     return ss.str();
1260 }
1261 
GetRecordInfo(const panda_file::File::EntityId & recordId,RecordInfo * recordInfo) const1262 void Disassembler::GetRecordInfo(const panda_file::File::EntityId &recordId, RecordInfo *recordInfo) const
1263 {
1264     if (file_->IsExternal(recordId)) {
1265         return;
1266     }
1267 
1268     panda_file::ClassDataAccessor classAccessor {*file_, recordId};
1269     std::stringstream ss;
1270 
1271     ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex << classAccessor.GetClassId()
1272        << ", size: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << classAccessor.GetSize() << " ("
1273        << std::dec << classAccessor.GetSize() << ")";
1274 
1275     recordInfo->recordInfo = ss.str();
1276     ss.str(std::string());
1277 
1278     classAccessor.EnumerateFields([&](panda_file::FieldDataAccessor &fieldAccessor) -> void {
1279         GetFieldInfo(fieldAccessor, ss);
1280 
1281         recordInfo->fieldsInfo.push_back(ss.str());
1282 
1283         ss.str(std::string());
1284     });
1285 }
1286 
GetMethodInfo(const panda_file::File::EntityId & methodId,MethodInfo * methodInfo) const1287 void Disassembler::GetMethodInfo(const panda_file::File::EntityId &methodId, MethodInfo *methodInfo) const
1288 {
1289     panda_file::MethodDataAccessor methodAccessor {*file_, methodId};
1290     std::stringstream ss;
1291 
1292     ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1293        << methodAccessor.GetMethodId();
1294 
1295     if (methodAccessor.GetCodeId().has_value()) {
1296         ss << ", code offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1297            << methodAccessor.GetCodeId().value();
1298 
1299         GetInsInfo(methodAccessor, methodAccessor.GetCodeId().value(), methodInfo);
1300     } else {
1301         ss << ", <no code>";
1302     }
1303 
1304     auto profileSize = methodAccessor.GetProfileSize();
1305     if (profileSize) {
1306         ss << ", profile size: " << profileSize.value();
1307     }
1308 
1309     methodInfo->methodInfo = ss.str();
1310 
1311     if (methodAccessor.GetCodeId()) {
1312         ASSERT(debugInfoExtractor_ != nullptr);
1313         methodInfo->lineNumberTable = debugInfoExtractor_->GetLineNumberTable(methodId);
1314         methodInfo->localVariableTable = debugInfoExtractor_->GetLocalVariableTable(methodId);
1315 
1316         // Add information about parameters into the table
1317         panda_file::CodeDataAccessor codeda(*file_, methodAccessor.GetCodeId().value());
1318         auto argIdx = static_cast<int32_t>(codeda.GetNumVregs());
1319         uint32_t codeSize = codeda.GetCodeSize();
1320         for (const auto &info : debugInfoExtractor_->GetParameterInfo(methodId)) {
1321             panda_file::LocalVariableInfo argInfo {info.name, info.signature, "", argIdx++, 0, codeSize};
1322             methodInfo->localVariableTable.emplace_back(argInfo);
1323         }
1324     }
1325 }
1326 
Serialize(const std::string & name,const pandasm::LiteralArray & litArray,std::ostream & os) const1327 void Disassembler::Serialize(const std::string &name, const pandasm::LiteralArray &litArray, std::ostream &os) const
1328 {
1329     if (litArray.literals.empty()) {
1330         return;
1331     }
1332 
1333     bool isConst = litArray.literals[0].IsArray();
1334 
1335     std::stringstream specifiers {};
1336 
1337     if (isConst) {
1338         specifiers << LiteralTagToString(litArray.literals[0].tag) << " " << litArray.literals.size() << " ";
1339     }
1340 
1341     os << ".array array_" << name << " " << specifiers.str() << "{";
1342 
1343     SerializeValues(litArray, isConst, os);
1344 
1345     os << "}\n";
1346 }
1347 
LiteralTagToString(const panda_file::LiteralTag & tag) const1348 std::string Disassembler::LiteralTagToString(const panda_file::LiteralTag &tag) const
1349 {
1350     switch (tag) {
1351         case panda_file::LiteralTag::BOOL:
1352         case panda_file::LiteralTag::ARRAY_U1:
1353             return "u1";
1354         case panda_file::LiteralTag::ARRAY_U8:
1355             return "u8";
1356         case panda_file::LiteralTag::ARRAY_I8:
1357             return "i8";
1358         case panda_file::LiteralTag::ARRAY_U16:
1359             return "u16";
1360         case panda_file::LiteralTag::ARRAY_I16:
1361             return "i16";
1362         case panda_file::LiteralTag::ARRAY_U32:
1363             return "u32";
1364         case panda_file::LiteralTag::INTEGER:
1365         case panda_file::LiteralTag::ARRAY_I32:
1366             return "i32";
1367         case panda_file::LiteralTag::ARRAY_U64:
1368             return "u64";
1369         case panda_file::LiteralTag::BIGINT:
1370         case panda_file::LiteralTag::ARRAY_I64:
1371             return "i64";
1372         case panda_file::LiteralTag::FLOAT:
1373         case panda_file::LiteralTag::ARRAY_F32:
1374             return "f32";
1375         case panda_file::LiteralTag::DOUBLE:
1376         case panda_file::LiteralTag::ARRAY_F64:
1377             return "f64";
1378         case panda_file::LiteralTag::STRING:
1379         case panda_file::LiteralTag::ARRAY_STRING:
1380             return pandasm::Type::FromDescriptor(panda_file::GetStringClassDescriptor(fileLanguage_)).GetPandasmName();
1381         case panda_file::LiteralTag::ACCESSOR:
1382             return "accessor";
1383         case panda_file::LiteralTag::NULLVALUE:
1384             return "nullvalue";
1385         case panda_file::LiteralTag::METHODAFFILIATE:
1386             return "method_affiliate";
1387         case panda_file::LiteralTag::METHOD:
1388             return "method";
1389         case panda_file::LiteralTag::GENERATORMETHOD:
1390             return "generator_method";
1391         case panda_file::LiteralTag::LITERALARRAY:
1392             return "lit_offset";
1393         default:
1394             LOG(ERROR, DISASSEMBLER) << "Unsupported literal with tag 0x" << std::hex << static_cast<uint32_t>(tag);
1395             UNREACHABLE();
1396     }
1397 }
1398 
SerializeLiterals(const pandasm::LiteralArray::Literal & lit) const1399 std::string Disassembler::SerializeLiterals(const pandasm::LiteralArray::Literal &lit) const
1400 {
1401     std::stringstream res {};
1402     const auto &val = lit.value;
1403     switch (lit.tag) {
1404         case panda_file::LiteralTag::BOOL: {
1405             res << (std::get<bool>(val));
1406             break;
1407         }
1408         case panda_file::LiteralTag::INTEGER: {
1409             res << (bit_cast<int32_t>(std::get<uint32_t>(val)));
1410             break;
1411         }
1412         case panda_file::LiteralTag::DOUBLE: {
1413             res << (std::get<double>(val));
1414             break;
1415         }
1416         case panda_file::LiteralTag::STRING: {
1417             res << "\"" << (std::get<std::string>(val)) << "\"";
1418             break;
1419         }
1420         case panda_file::LiteralTag::METHOD:
1421         case panda_file::LiteralTag::GENERATORMETHOD: {
1422             res << (std::get<std::string>(val));
1423             break;
1424         }
1425         case panda_file::LiteralTag::NULLVALUE:
1426         case panda_file::LiteralTag::ACCESSOR: {
1427             res << (static_cast<int16_t>(bit_cast<int8_t>(std::get<uint8_t>(val))));
1428             break;
1429         }
1430         case panda_file::LiteralTag::METHODAFFILIATE: {
1431             res << (std::get<uint16_t>(val));
1432             break;
1433         }
1434         case panda_file::LiteralTag::LITERALARRAY: {
1435             res << (std::get<std::string>(val));
1436             break;
1437         }
1438         default:
1439             UNREACHABLE();
1440     }
1441     res << ", ";
1442     return res.str();
1443 }
1444 
LiteralValueToString(const pandasm::LiteralArray::Literal & lit) const1445 std::string Disassembler::LiteralValueToString(const pandasm::LiteralArray::Literal &lit) const
1446 {
1447     if (lit.IsBoolValue()) {
1448         std::stringstream res {};
1449         res << (std::get<bool>(lit.value));
1450         return res.str();
1451     }
1452 
1453     if (lit.IsByteValue()) {
1454         return LiteralIntegralValueToString<uint8_t>(lit);
1455     }
1456 
1457     if (lit.IsShortValue()) {
1458         return LiteralIntegralValueToString<uint16_t>(lit);
1459     }
1460 
1461     if (lit.IsIntegerValue()) {
1462         return LiteralIntegralValueToString<uint32_t>(lit);
1463     }
1464 
1465     if (lit.IsLongValue()) {
1466         return LiteralIntegralValueToString<uint64_t>(lit);
1467     }
1468 
1469     if (lit.IsDoubleValue()) {
1470         std::stringstream res {};
1471         res << std::get<double>(lit.value);
1472         return res.str();
1473     }
1474 
1475     if (lit.IsFloatValue()) {
1476         std::stringstream res {};
1477         res << std::get<float>(lit.value);
1478         return res.str();
1479     }
1480 
1481     if (lit.IsStringValue()) {
1482         std::stringstream res {};
1483         res << "\"" << std::get<std::string>(lit.value) << "\"";
1484         return res.str();
1485     }
1486 
1487     if (lit.IsLiteralArrayValue()) {
1488         return SerializeLiterals(lit);
1489     }
1490 
1491     UNREACHABLE();
1492 }
1493 
SerializeValues(const pandasm::LiteralArray & litArray,const bool isConst,std::ostream & os) const1494 void Disassembler::SerializeValues(const pandasm::LiteralArray &litArray, const bool isConst, std::ostream &os) const
1495 {
1496     std::string separator = (isConst) ? (" ") : ("\n");
1497 
1498     os << separator;
1499 
1500     if (isConst) {
1501         for (const auto &l : litArray.literals) {
1502             os << LiteralValueToString(l) << separator;
1503         }
1504     } else {
1505         for (const auto &l : litArray.literals) {
1506             os << "\t" << LiteralTagToString(l.tag) << " " << LiteralValueToString(l) << separator;
1507         }
1508     }
1509 }
1510 
Serialize(const pandasm::Record & record,std::ostream & os,bool printInformation) const1511 void Disassembler::Serialize(const pandasm::Record &record, std::ostream &os, bool printInformation) const
1512 {
1513     if (IsSystemType(record.name)) {
1514         return;
1515     }
1516 
1517     os << ".record " << record.name;
1518 
1519     const auto recordIter = progAnn_.recordAnnotations.find(record.name);
1520     const bool recordInTable = recordIter != progAnn_.recordAnnotations.end();
1521     if (recordInTable) {
1522         Serialize(*record.metadata, recordIter->second.annList, os);
1523     } else {
1524         Serialize(*record.metadata, {}, os);
1525     }
1526 
1527     if (record.metadata->IsForeign() && record.fieldList.empty()) {
1528         os << "\n\n";
1529         return;
1530     }
1531 
1532     os << " {";
1533 
1534     if (printInformation && progInfo_.recordsInfo.find(record.name) != progInfo_.recordsInfo.end()) {
1535         os << " # " << progInfo_.recordsInfo.at(record.name).recordInfo << "\n";
1536         SerializeFields(record, os, true);
1537     } else {
1538         os << "\n";
1539         SerializeFields(record, os, false);
1540     }
1541 
1542     os << "}\n\n";
1543 }
1544 
SerializeUnionFields(const pandasm::Record & record,std::ostream & os,bool printInformation) const1545 void Disassembler::SerializeUnionFields(const pandasm::Record &record, std::ostream &os, bool printInformation) const
1546 {
1547     if (printInformation && progInfo_.recordsInfo.find(record.name) != progInfo_.recordsInfo.end()) {
1548         os << " # " << progInfo_.recordsInfo.at(record.name).recordInfo << "\n";
1549         SerializeFields(record, os, true, true);
1550     } else {
1551         SerializeFields(record, os, false, true);
1552     }
1553     os << "\n";
1554 }
1555 
DumpLiteralArray(const pandasm::LiteralArray & literalArray,std::stringstream & ss) const1556 void Disassembler::DumpLiteralArray(const pandasm::LiteralArray &literalArray, std::stringstream &ss) const
1557 {
1558     ss << "[";
1559     bool firstItem = true;
1560     for (const auto &item : literalArray.literals) {
1561         if (!firstItem) {
1562             ss << ", ";
1563         } else {
1564             firstItem = false;
1565         }
1566 
1567         switch (item.tag) {
1568             case panda_file::LiteralTag::INTEGER: {
1569                 ss << std::get<uint32_t>(item.value);  // CC-OFF(G.EXP.30-CPP) false positive
1570                 break;
1571             }
1572             case panda_file::LiteralTag::DOUBLE: {
1573                 ss << std::get<double>(item.value);
1574                 break;
1575             }
1576             case panda_file::LiteralTag::BOOL: {
1577                 ss << std::get<bool>(item.value);
1578                 break;
1579             }
1580             case panda_file::LiteralTag::STRING: {
1581                 ss << "\"" << std::get<std::string>(item.value) << "\"";
1582                 break;
1583             }
1584             case panda_file::LiteralTag::LITERALARRAY: {
1585                 std::string offsetStr = std::get<std::string>(item.value);
1586                 const int hexBase = 16;
1587                 uint32_t litArrayOffset = std::stoi(offsetStr, nullptr, hexBase);
1588                 pandasm::LiteralArray litArray;
1589                 GetLiteralArrayByOffset(&litArray, panda_file::File::EntityId(litArrayOffset));
1590                 DumpLiteralArray(litArray, ss);
1591                 break;
1592             }
1593             default: {
1594                 UNREACHABLE();
1595                 break;
1596             }
1597         }
1598     }
1599     ss << "]";
1600 }
1601 
SerializeFieldValue(const pandasm::Field & f,std::stringstream & ss) const1602 void Disassembler::SerializeFieldValue(const pandasm::Field &f, std::stringstream &ss) const
1603 {
1604     if (f.type.GetId() == panda_file::Type::TypeId::U32) {
1605         ss << " = 0x" << std::hex << f.metadata->GetValue().value().GetValue<uint32_t>();
1606     } else if (f.type.GetId() == panda_file::Type::TypeId::U8) {
1607         ss << " = 0x" << std::hex << static_cast<uint32_t>(f.metadata->GetValue().value().GetValue<uint8_t>());
1608     } else if (f.type.GetId() == panda_file::Type::TypeId::F64) {
1609         ss << " = " << static_cast<double>(f.metadata->GetValue().value().GetValue<double>());
1610     } else if (f.type.GetId() == panda_file::Type::TypeId::U1) {
1611         ss << " = " << static_cast<bool>(f.metadata->GetValue().value().GetValue<bool>());
1612     } else if (f.type.GetId() == panda_file::Type::TypeId::I32) {
1613         ss << " = " << static_cast<bool>(f.metadata->GetValue().value().GetValue<int>());
1614     } else if (f.type.GetId() == panda_file::Type::TypeId::REFERENCE && f.type.GetName() == "std/core/String") {
1615         ss << " = \"" << static_cast<std::string>(f.metadata->GetValue().value().GetValue<std::string>()) << "\"";
1616     } else if (f.type.GetRank() > 0) {
1617         uint32_t litArrayOffset =
1618             std::stoi(static_cast<std::string>(f.metadata->GetValue().value().GetValue<std::string>()));
1619         pandasm::LiteralArray litArray;
1620         GetLiteralArrayByOffset(&litArray, panda_file::File::EntityId(litArrayOffset));
1621         ss << " = ";
1622         DumpLiteralArray(litArray, ss);
1623     }
1624 }
1625 
SerializeFields(const pandasm::Record & record,std::ostream & os,bool printInformation,bool isUnion) const1626 void Disassembler::SerializeFields(const pandasm::Record &record, std::ostream &os, bool printInformation,
1627                                    bool isUnion) const
1628 {
1629     constexpr size_t INFO_OFFSET = 80;
1630 
1631     const auto recordIter = progAnn_.recordAnnotations.find(record.name);
1632     const bool recordInTable = recordIter != progAnn_.recordAnnotations.end();
1633 
1634     const auto recInf = (printInformation) ? (progInfo_.recordsInfo.at(record.name)) : (RecordInfo {});
1635 
1636     size_t fieldIdx = 0;
1637 
1638     std::stringstream ss;
1639     for (const auto &f : record.fieldList) {
1640         if (isUnion) {
1641             ss << ".union_field ";
1642         } else {
1643             ss << "\t";
1644         }
1645         ss << f.type.GetPandasmName() << " " << f.name;
1646         if (f.metadata->GetValue().has_value()) {
1647             SerializeFieldValue(f, ss);
1648         }
1649         if (!isUnion && recordInTable) {
1650             const auto fieldIter = recordIter->second.fieldAnnotations.find(f.name);
1651             if (fieldIter != recordIter->second.fieldAnnotations.end()) {
1652                 Serialize(*f.metadata, fieldIter->second, ss);
1653             } else {
1654                 Serialize(*f.metadata, {}, ss);
1655             }
1656         } else if (!isUnion && !recordInTable) {
1657             Serialize(*f.metadata, {}, ss);
1658         }
1659 
1660         if (printInformation) {
1661             os << std::setw(INFO_OFFSET) << std::left << ss.str() << " # " << recInf.fieldsInfo.at(fieldIdx) << "\n";
1662         } else {
1663             os << ss.str() << "\n";
1664         }
1665 
1666         ss.str(std::string());
1667         ss.clear();
1668 
1669         fieldIdx++;
1670     }
1671 }
1672 
Serialize(const pandasm::Function::CatchBlock & catchBlock,std::ostream & os) const1673 void Disassembler::Serialize(const pandasm::Function::CatchBlock &catchBlock, std::ostream &os) const
1674 {
1675     if (catchBlock.exceptionRecord.empty()) {
1676         os << ".catchall ";
1677     } else {
1678         os << ".catch " << catchBlock.exceptionRecord << ", ";
1679     }
1680 
1681     os << catchBlock.tryBeginLabel << ", " << catchBlock.tryEndLabel << ", " << catchBlock.catchBeginLabel;
1682 
1683     if (!catchBlock.catchEndLabel.empty()) {
1684         os << ", " << catchBlock.catchEndLabel;
1685     }
1686 }
1687 
Serialize(const pandasm::ItemMetadata & meta,const AnnotationList & annList,std::ostream & os) const1688 void Disassembler::Serialize(const pandasm::ItemMetadata &meta, const AnnotationList &annList, std::ostream &os) const
1689 {
1690     auto boolAttributes = meta.GetBoolAttributes();
1691     auto attributes = meta.GetAttributes();
1692     if (boolAttributes.empty() && attributes.empty() && annList.empty()) {
1693         return;
1694     }
1695 
1696     os << " <";
1697 
1698     size_t size = boolAttributes.size();
1699     size_t idx = 0;
1700     for (const auto &attr : boolAttributes) {
1701         os << attr;
1702         ++idx;
1703 
1704         if (!attributes.empty() || !annList.empty() || idx < size) {
1705             os << ", ";
1706         }
1707     }
1708 
1709     size = attributes.size();
1710     idx = 0;
1711     for (const auto &[key, values] : attributes) {
1712         for (size_t i = 0; i < values.size(); i++) {
1713             os << key << "=" << values[i];
1714 
1715             if (i < values.size() - 1) {
1716                 os << ", ";
1717             }
1718         }
1719 
1720         ++idx;
1721 
1722         if (!annList.empty() || idx < size) {
1723             os << ", ";
1724         }
1725     }
1726 
1727     size = annList.size();
1728     idx = 0;
1729     for (const auto &[key, value] : annList) {
1730         os << key << "=" << value;
1731 
1732         ++idx;
1733 
1734         if (idx < size) {
1735             os << ", ";
1736         }
1737     }
1738 
1739     os << ">";
1740 }
1741 
SerializeLineNumberTable(const panda_file::LineNumberTable & lineNumberTable,std::ostream & os) const1742 void Disassembler::SerializeLineNumberTable(const panda_file::LineNumberTable &lineNumberTable, std::ostream &os) const
1743 {
1744     if (lineNumberTable.empty()) {
1745         return;
1746     }
1747 
1748     os << "\n#   LINE_NUMBER_TABLE:\n";
1749     for (const auto &lineInfo : lineNumberTable) {
1750         os << "#\tline " << lineInfo.line << ": " << lineInfo.offset << "\n";
1751     }
1752 }
1753 
SerializeLocalVariableTable(const panda_file::LocalVariableTable & localVariableTable,const pandasm::Function & method,std::ostream & os) const1754 void Disassembler::SerializeLocalVariableTable(const panda_file::LocalVariableTable &localVariableTable,
1755                                                const pandasm::Function &method, std::ostream &os) const
1756 {
1757     if (localVariableTable.empty()) {
1758         return;
1759     }
1760 
1761     os << "\n#   LOCAL_VARIABLE_TABLE:\n";
1762     os << "#\t Start   End  Register           Name   Signature\n";
1763     const int startWidth = 5;
1764     const int endWidth = 4;
1765     const int regWidth = 8;
1766     const int nameWidth = 14;
1767     for (const auto &variableInfo : localVariableTable) {
1768         std::ostringstream regStream;
1769         regStream << variableInfo.regNumber << '(';
1770         if (variableInfo.regNumber < 0) {
1771             regStream << "acc";
1772         } else {
1773             uint32_t vreg = variableInfo.regNumber;
1774             uint32_t firstArgReg = method.GetTotalRegs();
1775             if (vreg < firstArgReg) {
1776                 regStream << 'v' << vreg;
1777             } else {
1778                 regStream << 'a' << vreg - firstArgReg;
1779             }
1780         }
1781         regStream << ')';
1782 
1783         os << "#\t " << std::setw(startWidth) << std::right << variableInfo.startOffset << "  ";
1784         os << std::setw(endWidth) << std::right << variableInfo.endOffset << "  ";
1785         os << std::setw(regWidth) << std::right << regStream.str() << " ";
1786         os << std::setw(nameWidth) << std::right << variableInfo.name << "   " << variableInfo.type;
1787         if (!variableInfo.typeSignature.empty() && variableInfo.typeSignature != variableInfo.type) {
1788             os << " (" << variableInfo.typeSignature << ")";
1789         }
1790         os << "\n";
1791     }
1792 }
1793 
SerializeLanguage(std::ostream & os) const1794 void Disassembler::SerializeLanguage(std::ostream &os) const
1795 {
1796     os << ".language " << ark::panda_file::LanguageToString(fileLanguage_) << "\n\n";
1797 }
1798 
SerializeFilename(std::ostream & os) const1799 void Disassembler::SerializeFilename(std::ostream &os) const
1800 {
1801     if (file_ == nullptr || file_->GetFilename().empty()) {
1802         return;
1803     }
1804 
1805     os << "# source binary: " << file_->GetFilename() << "\n\n";
1806 }
1807 
SerializeLitArrays(std::ostream & os,bool addSeparators) const1808 void Disassembler::SerializeLitArrays(std::ostream &os, bool addSeparators) const
1809 {
1810     LOG(DEBUG, DISASSEMBLER) << "[serializing literals]";
1811 
1812     if (prog_.literalarrayTable.empty()) {
1813         return;
1814     }
1815 
1816     if (addSeparators) {
1817         os << "# ====================\n"
1818               "# LITERALS\n\n";
1819     }
1820 
1821     for (const auto &pair : prog_.literalarrayTable) {
1822         Serialize(pair.first, pair.second, os);
1823     }
1824 
1825     os << "\n";
1826 }
1827 
SerializeRecords(std::ostream & os,bool addSeparators,bool printInformation) const1828 void Disassembler::SerializeRecords(std::ostream &os, bool addSeparators, bool printInformation) const
1829 {
1830     LOG(DEBUG, DISASSEMBLER) << "[serializing records]";
1831 
1832     if (prog_.recordTable.empty()) {
1833         return;
1834     }
1835 
1836     if (addSeparators) {
1837         os << "# ====================\n"
1838               "# RECORDS\n\n";
1839     }
1840 
1841     for (const auto &r : prog_.recordTable) {
1842         if (!panda_file::IsDummyClassName(r.first)) {
1843             Serialize(r.second, os, printInformation);
1844         } else {
1845             SerializeUnionFields(r.second, os, printInformation);
1846         }
1847     }
1848 }
1849 
SerializeMethods(std::ostream & os,bool addSeparators,bool printInformation) const1850 void Disassembler::SerializeMethods(std::ostream &os, bool addSeparators, bool printInformation) const
1851 {
1852     LOG(DEBUG, DISASSEMBLER) << "[serializing methods]";
1853 
1854     if (prog_.functionTable.empty()) {
1855         return;
1856     }
1857 
1858     if (addSeparators) {
1859         os << "# ====================\n"
1860               "# METHODS\n\n";
1861     }
1862 
1863     for (const auto &m : prog_.functionTable) {
1864         Serialize(m.second, os, printInformation);
1865     }
1866 }
1867 
BytecodeOpcodeToPandasmOpcode(uint8_t o) const1868 pandasm::Opcode Disassembler::BytecodeOpcodeToPandasmOpcode(uint8_t o) const
1869 {
1870     return BytecodeOpcodeToPandasmOpcode(BytecodeInstruction::Opcode(o));
1871 }
1872 
IDToString(BytecodeInstruction bcIns,panda_file::File::EntityId methodId) const1873 std::string Disassembler::IDToString(BytecodeInstruction bcIns, panda_file::File::EntityId methodId) const
1874 {
1875     std::stringstream name;
1876 
1877     if (bcIns.HasFlag(BytecodeInstruction::Flags::TYPE_ID)) {
1878         auto idx = bcIns.GetId().AsIndex();
1879         auto id = file_->ResolveClassIndex(methodId, idx);
1880         auto type = pandasm::Type::FromDescriptor(StringDataToString(file_->GetStringData(id)));
1881 
1882         name.str("");
1883         name << type.GetPandasmName();
1884     } else if (bcIns.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) {
1885         auto idx = bcIns.GetId().AsIndex();
1886         auto id = file_->ResolveMethodIndex(methodId, idx);
1887 
1888         name << GetMethodSignature(id);
1889     } else if (bcIns.HasFlag(BytecodeInstruction::Flags::STRING_ID)) {
1890         name << '\"';
1891 
1892         if (skipStrings_ || quiet_) {
1893             name << std::hex << "0x" << bcIns.GetId().AsFileId();
1894         } else {
1895             name << StringDataToString(file_->GetStringData(bcIns.GetId().AsFileId()));
1896         }
1897 
1898         name << '\"';
1899     } else if (bcIns.HasFlag(BytecodeInstruction::Flags::FIELD_ID)) {
1900         auto idx = bcIns.GetId().AsIndex();
1901         auto id = file_->ResolveFieldIndex(methodId, idx);
1902         panda_file::FieldDataAccessor fieldAccessor(*file_, id);
1903 
1904         auto recordName = GetFullRecordName(fieldAccessor.GetClassId());
1905         if (!panda_file::IsDummyClassName(recordName)) {
1906             name << recordName;
1907             name << '.';
1908         }
1909         name << StringDataToString(file_->GetStringData(fieldAccessor.GetNameId()));
1910     } else if (bcIns.HasFlag(BytecodeInstruction::Flags::LITERALARRAY_ID)) {
1911         auto index = bcIns.GetId().AsIndex();
1912         name << "array_" << index;
1913     }
1914 
1915     return name.str();
1916 }
1917 
GetRecordLanguage(panda_file::File::EntityId classId) const1918 ark::panda_file::SourceLang Disassembler::GetRecordLanguage(panda_file::File::EntityId classId) const
1919 {
1920     if (file_->IsExternal(classId)) {
1921         return ark::panda_file::SourceLang::PANDA_ASSEMBLY;
1922     }
1923 
1924     panda_file::ClassDataAccessor cda(*file_, classId);
1925     return cda.GetSourceLang().value_or(panda_file::SourceLang::PANDA_ASSEMBLY);
1926 }
1927 
1928 // CC-OFFNXT(G.FUN.01) solid logic
TranslateImmToLabel(pandasm::Ins * paIns,LabelTable * labelTable,const uint8_t * insArr,BytecodeInstruction bcIns,BytecodeInstruction bcInsLast,panda_file::File::EntityId codeId)1929 static void TranslateImmToLabel(pandasm::Ins *paIns, LabelTable *labelTable, const uint8_t *insArr,
1930                                 BytecodeInstruction bcIns, BytecodeInstruction bcInsLast,
1931                                 panda_file::File::EntityId codeId)
1932 {
1933     const int32_t jmpOffset = std::get<int64_t>(paIns->imms.at(0));
1934     const auto bcInsDest = bcIns.JumpTo(jmpOffset);
1935     if (bcInsLast.GetAddress() > bcInsDest.GetAddress()) {
1936         size_t idx = GetBytecodeInstructionNumber(BytecodeInstruction(insArr), bcInsDest);
1937         if (idx != std::numeric_limits<size_t>::max()) {
1938             if (labelTable->find(idx) == labelTable->end()) {
1939                 std::stringstream ss;
1940                 ss << "jump_label_" << labelTable->size();
1941                 (*labelTable)[idx] = ss.str();
1942             }
1943 
1944             paIns->imms.clear();
1945             paIns->ids.push_back(labelTable->at(idx));
1946         } else {
1947             LOG(ERROR, DISASSEMBLER) << "> error encountered at " << codeId << " (0x" << std::hex << codeId
1948                                      << "). incorrect instruction at offset: 0x" << (bcIns.GetAddress() - insArr)
1949                                      << ": invalid jump offset 0x" << jmpOffset
1950                                      << " - jumping in the middle of another instruction!";
1951         }
1952     } else {
1953         LOG(ERROR, DISASSEMBLER) << "> error encountered at " << codeId << " (0x" << std::hex << codeId
1954                                  << "). incorrect instruction at offset: 0x" << (bcIns.GetAddress() - insArr)
1955                                  << ": invalid jump offset 0x" << jmpOffset << " - jumping out of bounds!";
1956     }
1957 }
1958 
CollectExternalFields(const panda_file::FieldDataAccessor & fieldAccessor)1959 void Disassembler::CollectExternalFields(const panda_file::FieldDataAccessor &fieldAccessor)
1960 {
1961     auto recordName = GetFullRecordName(fieldAccessor.GetClassId());
1962 
1963     pandasm::Field field(fileLanguage_);
1964     GetField(field, fieldAccessor);
1965     if (field.name.empty()) {
1966         return;
1967     }
1968 
1969     auto &fieldList = externalFieldTable_[recordName];
1970     auto retField = std::find_if(fieldList.begin(), fieldList.end(),
1971                                  [&field](pandasm::Field &fieldFromList) { return field.name == fieldFromList.name; });
1972     if (retField == fieldList.end()) {
1973         fieldList.emplace_back(std::move(field));
1974 
1975         externalFieldsInfoTable_[recordName].emplace_back(GetFieldInfo(fieldAccessor));
1976     }
1977 }
1978 
GetInstructions(pandasm::Function * method,panda_file::File::EntityId methodId,panda_file::File::EntityId codeId)1979 IdList Disassembler::GetInstructions(pandasm::Function *method, panda_file::File::EntityId methodId,
1980                                      panda_file::File::EntityId codeId)
1981 {
1982     panda_file::CodeDataAccessor codeAccessor(*file_, codeId);
1983 
1984     const auto insSz = codeAccessor.GetCodeSize();
1985     const auto insArr = codeAccessor.GetInstructions();
1986 
1987     method->regsNum = codeAccessor.GetNumVregs();
1988 
1989     auto bcIns = BytecodeInstruction(insArr);
1990     auto from = bcIns.GetAddress();
1991     const auto bcInsLast = bcIns.JumpTo(insSz);
1992 
1993     LabelTable labelTable = GetExceptions(method, methodId, codeId);
1994 
1995     IdList unknownExternalMethods {};
1996 
1997     while (bcIns.GetAddress() != bcInsLast.GetAddress()) {
1998         if (bcIns.GetAddress() > bcInsLast.GetAddress()) {
1999             LOG(ERROR, DISASSEMBLER) << "> error encountered at " << codeId << " (0x" << std::hex << codeId
2000                                      << "). bytecode instructions sequence corrupted for method " << method->name
2001                                      << "! went out of bounds";
2002 
2003             break;
2004         }
2005 
2006         if (bcIns.HasFlag(BytecodeInstruction::Flags::FIELD_ID)) {
2007             auto idx = bcIns.GetId().AsIndex();
2008             auto id = file_->ResolveFieldIndex(methodId, idx);
2009             panda_file::FieldDataAccessor fieldAccessor(*file_, id);
2010 
2011             if (fieldAccessor.IsExternal()) {
2012                 CollectExternalFields(fieldAccessor);
2013             }
2014         }
2015 
2016         auto paIns = BytecodeInstructionToPandasmInstruction(bcIns, methodId);
2017         paIns.insDebug.boundLeft =
2018             bcIns.GetAddress() - from;  // It is used to produce a line table during method serialization
2019         if (paIns.IsJump()) {
2020             TranslateImmToLabel(&paIns, &labelTable, insArr, bcIns, bcInsLast, codeId);
2021         }
2022 
2023         // check if method id is unknown external method. if so, emplace it in table
2024         if (bcIns.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) {
2025             const auto argMethodIdx = bcIns.GetId().AsIndex();
2026             const auto argMethodId = file_->ResolveMethodIndex(methodId, argMethodIdx);
2027 
2028             const auto argMethodSignature = GetMethodSignature(argMethodId);
2029 
2030             const bool isPresent = prog_.functionTable.find(argMethodSignature) != prog_.functionTable.cend();
2031             const bool isExternal = file_->IsExternal(argMethodId);
2032             if (isExternal && !isPresent) {
2033                 unknownExternalMethods.push_back(argMethodId);
2034             }
2035         }
2036 
2037         method->ins.push_back(paIns);
2038         bcIns = bcIns.GetNext();
2039     }
2040 
2041     for (const auto &pair : labelTable) {
2042         method->ins[pair.first].label = pair.second;
2043         method->ins[pair.first].setLabel = true;
2044     }
2045 
2046     return unknownExternalMethods;
2047 }
2048 
2049 }  // namespace ark::disasm
2050