• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "disassembler.h"
17 #include "class_data_accessor.h"
18 #include "field_data_accessor.h"
19 #include "libpandafile/type_helper.h"
20 #include "mangling.h"
21 #include "utils/logger.h"
22 
23 #include <iomanip>
24 
25 #include "get_language_specific_metadata.inc"
26 
27 namespace panda::disasm {
28 
Disassemble(std::string_view filenameIn,const bool quiet,const bool skipStrings)29 void Disassembler::Disassemble(std::string_view filenameIn, const bool quiet, const bool skipStrings)
30 {
31     auto file = panda_file::File::Open(filenameIn);
32     if (file == nullptr) {
33         LOG(FATAL, DISASSEMBLER) << "> unable to open specified pandafile: <" << filenameIn << ">";
34     }
35 
36     Disassemble(file, quiet, skipStrings);
37 }
38 
Disassemble(const panda_file::File & file,const bool quiet,const bool skipStrings)39 void Disassembler::Disassemble(const panda_file::File &file, const bool quiet, const bool skipStrings)
40 {
41     SetFile(file);
42     DisassembleImpl(quiet, skipStrings);
43 }
44 
Disassemble(std::unique_ptr<const panda_file::File> & file,const bool quiet,const bool skipStrings)45 void Disassembler::Disassemble(std::unique_ptr<const panda_file::File> &file, const bool quiet, const bool skipStrings)
46 {
47     SetFile(file);
48     DisassembleImpl(quiet, skipStrings);
49 }
50 
DisassembleImpl(const bool quiet,const bool skipStrings)51 void Disassembler::DisassembleImpl(const bool quiet, const bool skipStrings)
52 {
53     prog_ = pandasm::Program {};
54 
55     recordNameToId_.clear();
56     methodNameToId_.clear();
57 
58     skipStrings_ = skipStrings;
59     quiet_ = quiet;
60 
61     progInfo_ = ProgInfo {};
62 
63     progAnn_ = ProgAnnotations {};
64 
65     GetLiteralArrays();
66     GetRecords();
67 
68     AddExternalFieldsToRecords();
69     GetLanguageSpecificMetadata();
70 }
71 
SetFile(std::unique_ptr<const panda_file::File> & file)72 void Disassembler::SetFile(std::unique_ptr<const panda_file::File> &file)
73 {
74     fileHolder_.swap(file);
75     file_ = fileHolder_.get();
76 }
77 
SetFile(const panda_file::File & file)78 void Disassembler::SetFile(const panda_file::File &file)
79 {
80     fileHolder_.reset();
81     file_ = &file;
82 }
83 
SetProfile(std::string_view fname)84 void Disassembler::SetProfile(std::string_view fname)
85 {
86     std::ifstream stm(fname.data(), std::ios::binary);
87     if (!stm.is_open()) {
88         LOG(FATAL, DISASSEMBLER) << "Cannot open profile file";
89     }
90 
91     auto res = profiling::ReadProfile(stm, fileLanguage_);
92     if (!res) {
93         LOG(FATAL, DISASSEMBLER) << "Failed to deserialize: " << res.Error();
94     }
95     profile_ = res.Value();
96 }
97 
GetInsInfo(panda_file::MethodDataAccessor & mda,const panda_file::File::EntityId & codeId,MethodInfo * methodInfo) const98 void Disassembler::GetInsInfo(panda_file::MethodDataAccessor &mda, const panda_file::File::EntityId &codeId,
99                               MethodInfo *methodInfo /* out */) const
100 {
101     const static size_t FORMAT_WIDTH = 20;
102     const static size_t INSTRUCTION_WIDTH = 2;
103 
104     panda_file::CodeDataAccessor codeAccessor(*file_, codeId);
105 
106     std::string methodName = mda.GetFullName();
107     auto prof = profiling::INVALID_PROFILE;
108     if (profile_ != profiling::INVALID_PROFILE) {
109         prof = profiling::FindMethodInProfile(profile_, fileLanguage_, methodName);
110     }
111 
112     auto insSz = codeAccessor.GetCodeSize();
113     auto insArr = codeAccessor.GetInstructions();
114 
115     auto bcIns = BytecodeInstruction(insArr);
116     auto bcInsLast = bcIns.JumpTo(insSz);
117 
118     while (bcIns.GetAddress() != bcInsLast.GetAddress()) {
119         std::stringstream ss;
120 
121         uintptr_t bc = bcIns.GetAddress() - BytecodeInstruction(insArr).GetAddress();
122         ss << "offset: 0x" << std::setfill('0') << std::setw(4U) << std::hex << bc;
123         ss << ", " << std::setfill('.');
124 
125         BytecodeInstruction::Format format = bcIns.GetFormat();
126 
127         auto formatStr = std::string("[") + BytecodeInstruction::GetFormatString(format) + ']';
128         ss << std::setw(FORMAT_WIDTH) << std::left << formatStr;
129 
130         ss << "[";
131 
132         const uint8_t *pc = bcIns.GetAddress();
133         const size_t sz = bcIns.GetSize();
134 
135         for (size_t i = 0; i < sz; i++) {
136             ss << "0x" << std::setw(INSTRUCTION_WIDTH) << std::setfill('0') << std::right << std::hex
137                << static_cast<int>(pc[i]);  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
138 
139             if (i != sz - 1) {
140                 ss << " ";
141             }
142         }
143 
144         ss << "]";
145 
146         if (profile_ != profiling::INVALID_PROFILE && prof != profiling::INVALID_PROFILE) {
147             auto profId = bcIns.GetProfileId();
148             if (profId != -1) {
149                 ss << ", Profile: ";
150                 profiling::DumpProfile(prof, fileLanguage_, &bcIns, ss);
151             }
152         }
153 
154         methodInfo->instructionsInfo.push_back(ss.str());
155 
156         bcIns = bcIns.GetNext();
157     }
158 }
159 
CollectInfo()160 void Disassembler::CollectInfo()
161 {
162     LOG(DEBUG, DISASSEMBLER) << "\n[getting program info]\n";
163 
164     debugInfoExtractor_ = std::make_unique<panda_file::DebugInfoExtractor>(file_);
165 
166     for (const auto &pair : recordNameToId_) {
167         GetRecordInfo(pair.second, &progInfo_.recordsInfo[pair.first]);
168     }
169 
170     for (const auto &pair : methodNameToId_) {
171         GetMethodInfo(pair.second, &progInfo_.methodsInfo[pair.first]);
172     }
173 }
174 
Serialize(std::ostream & os,bool addSeparators,bool printInformation) const175 void Disassembler::Serialize(std::ostream &os, bool addSeparators, bool printInformation) const
176 {
177     if (os.bad()) {
178         LOG(DEBUG, DISASSEMBLER) << "> serialization failed. os bad\n";
179 
180         return;
181     }
182 
183     SerializeFilename(os);
184     SerializeLanguage(os);
185     SerializeLitArrays(os, addSeparators);
186     SerializeRecords(os, addSeparators, printInformation);
187     SerializeMethods(os, addSeparators, printInformation);
188 }
189 
Serialize(const pandasm::Function & method,std::ostream & os,bool printInformation,panda_file::LineNumberTable * lineTable) const190 void Disassembler::Serialize(const pandasm::Function &method, std::ostream &os, bool printInformation,
191                              panda_file::LineNumberTable *lineTable) const
192 {
193     std::ostringstream headerSs;
194     headerSs << ".function " << method.returnType.GetPandasmName() << " " << method.name << "(";
195 
196     if (!method.params.empty()) {
197         headerSs << method.params[0].type.GetPandasmName() << " a0";
198 
199         for (size_t i = 1; i < method.params.size(); i++) {
200             headerSs << ", " << method.params[i].type.GetPandasmName() << " a" << (size_t)i;
201         }
202     }
203     headerSs << ")";
204 
205     const std::string signature = pandasm::GetFunctionSignatureFromName(method.name, method.params);
206 
207     const auto methodIter = progAnn_.methodAnnotations.find(signature);
208     if (methodIter != progAnn_.methodAnnotations.end()) {
209         Serialize(*method.metadata, methodIter->second, headerSs);
210     } else {
211         Serialize(*method.metadata, {}, headerSs);
212     }
213 
214     if (!method.HasImplementation()) {
215         headerSs << "\n\n";
216         os << headerSs.str();
217         return;
218     }
219 
220     headerSs << " {";
221 
222     size_t width;
223     const MethodInfo *methodInfo;
224     auto methodInfoIt = progInfo_.methodsInfo.find(signature);
225     bool printMethodInfo = printInformation && methodInfoIt != progInfo_.methodsInfo.end();
226     if (printMethodInfo) {
227         methodInfo = &methodInfoIt->second;
228 
229         width = 0;
230         for (const auto &i : method.ins) {
231             if (i.ToString().size() > width) {
232                 width = i.ToString().size();
233             }
234         }
235 
236         headerSs << " # " << methodInfo->methodInfo << "\n#   CODE:";
237     }
238 
239     headerSs << "\n";
240 
241     auto headerSsStr = headerSs.str();
242     size_t lineNumber = std::count(headerSsStr.begin(), headerSsStr.end(), '\n') + 1;
243 
244     os << headerSsStr;
245 
246     for (size_t i = 0; i < method.ins.size(); i++) {
247         std::ostringstream insSs;
248 
249         std::string ins = method.ins[i].ToString("", method.GetParamsNum() != 0, method.regsNum);
250         if (method.ins[i].setLabel) {
251             std::string delim = ": ";
252             size_t pos = ins.find(delim);
253             std::string label = ins.substr(0, pos);
254             ins.erase(0, pos + delim.length());
255 
256             insSs << label << ":\n";
257         }
258 
259         insSs << "\t";
260         if (printMethodInfo) {
261             insSs << std::setw(width) << std::left;
262         }
263         insSs << ins;
264         if (printMethodInfo) {
265             ASSERT(methodInfo != nullptr);
266             insSs << " # " << methodInfo->instructionsInfo[i];
267         }
268         insSs << "\n";
269 
270         auto insSsStr = insSs.str();
271         lineNumber += std::count(insSsStr.begin(), insSsStr.end(), '\n');
272 
273         if (lineTable != nullptr) {
274             lineTable->emplace_back(
275                 panda_file::LineTableEntry {static_cast<uint32_t>(method.ins[i].insDebug.boundLeft), lineNumber - 1});
276         }
277 
278         os << insSsStr;
279     }
280 
281     if (!method.catchBlocks.empty()) {
282         os << "\n";
283 
284         for (const auto &catchBlock : method.catchBlocks) {
285             Serialize(catchBlock, os);
286 
287             os << "\n";
288         }
289     }
290 
291     if (printMethodInfo) {
292         ASSERT(methodInfo != nullptr);
293         SerializeLineNumberTable(methodInfo->lineNumberTable, os);
294         SerializeLocalVariableTable(methodInfo->localVariableTable, method, os);
295     }
296 
297     os << "}\n\n";
298 }
299 
IsSystemType(const std::string & typeName)300 inline bool Disassembler::IsSystemType(const std::string &typeName)
301 {
302     bool isArrayType = typeName.back() == ']';
303     bool isGlobal = typeName == "_GLOBAL";
304 
305     return isArrayType || isGlobal;
306 }
307 
GetRecord(pandasm::Record * record,const panda_file::File::EntityId & recordId)308 void Disassembler::GetRecord(pandasm::Record *record, const panda_file::File::EntityId &recordId)
309 {
310     LOG(DEBUG, DISASSEMBLER) << "\n[getting record]\nid: " << recordId << " (0x" << std::hex << recordId << ")";
311 
312     if (record == nullptr) {
313         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but record ptr expected!";
314 
315         return;
316     }
317 
318     record->name = GetFullRecordName(recordId);
319 
320     LOG(DEBUG, DISASSEMBLER) << "name: " << record->name;
321 
322     GetMetaData(record, recordId);
323 
324     if (!file_->IsExternal(recordId)) {
325         GetMethods(recordId);
326         GetFields(record, recordId);
327     }
328 }
329 
AddMethodToTables(const panda_file::File::EntityId & methodId)330 void Disassembler::AddMethodToTables(const panda_file::File::EntityId &methodId)
331 {
332     pandasm::Function newMethod("", fileLanguage_);
333     GetMethod(&newMethod, methodId);
334 
335     const auto signature = pandasm::GetFunctionSignatureFromName(newMethod.name, newMethod.params);
336     if (prog_.functionTable.find(signature) != prog_.functionTable.end()) {
337         return;
338     }
339 
340     methodNameToId_.emplace(signature, methodId);
341     prog_.functionSynonyms[newMethod.name].push_back(signature);
342     prog_.functionTable.emplace(signature, std::move(newMethod));
343 }
344 
GetMethod(pandasm::Function * method,const panda_file::File::EntityId & methodId)345 void Disassembler::GetMethod(pandasm::Function *method, const panda_file::File::EntityId &methodId)
346 {
347     LOG(DEBUG, DISASSEMBLER) << "\n[getting method]\nid: " << methodId << " (0x" << std::hex << methodId << ")";
348 
349     if (method == nullptr) {
350         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
351 
352         return;
353     }
354 
355     panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
356 
357     method->name = GetFullMethodName(methodId);
358 
359     LOG(DEBUG, DISASSEMBLER) << "name: " << method->name;
360 
361     GetParams(method, methodAccessor.GetProtoId());
362     GetMetaData(method, methodId);
363 
364     if (!method->HasImplementation()) {
365         return;
366     }
367 
368     if (methodAccessor.GetCodeId().has_value()) {
369         const IdList idList = GetInstructions(method, methodId, methodAccessor.GetCodeId().value());
370 
371         for (const auto &id : idList) {
372             AddMethodToTables(id);
373         }
374     } else {
375         LOG(ERROR, DISASSEMBLER) << "> error encountered at " << methodId << " (0x" << std::hex << methodId
376                                  << "). implementation of method expected, but no \'CODE\' tag was found!";
377 
378         return;
379     }
380 }
381 
382 template <typename T>
FillLiteralArrayData(pandasm::LiteralArray * litArray,const panda_file::LiteralTag & tag,const panda_file::LiteralDataAccessor::LiteralValue & value) const383 void Disassembler::FillLiteralArrayData(pandasm::LiteralArray *litArray, const panda_file::LiteralTag &tag,
384                                         const panda_file::LiteralDataAccessor::LiteralValue &value) const
385 {
386     panda_file::File::EntityId id(std::get<uint32_t>(value));
387     auto sp = file_->GetSpanFromId(id);
388     auto len = panda_file::helpers::Read<sizeof(uint32_t)>(&sp);
389     if (tag != panda_file::LiteralTag::ARRAY_STRING) {
390         for (size_t i = 0; i < len; i++) {
391             pandasm::LiteralArray::Literal lit;
392             lit.tag = tag;
393             lit.value = bit_cast<T>(panda_file::helpers::Read<sizeof(T)>(&sp));
394             litArray->literals.push_back(lit);
395         }
396     } else {
397         for (size_t i = 0; i < len; i++) {
398             auto strId = panda_file::helpers::Read<sizeof(T)>(&sp);
399             pandasm::LiteralArray::Literal lit;
400             lit.tag = tag;
401             lit.value = StringDataToString(file_->GetStringData(panda_file::File::EntityId(strId)));
402             litArray->literals.push_back(lit);
403         }
404     }
405 }
406 
FillLiteralData(pandasm::LiteralArray * litArray,const panda_file::LiteralDataAccessor::LiteralValue & value,const panda_file::LiteralTag & tag) const407 void Disassembler::FillLiteralData(pandasm::LiteralArray *litArray,
408                                    const panda_file::LiteralDataAccessor::LiteralValue &value,
409                                    const panda_file::LiteralTag &tag) const
410 {
411     pandasm::LiteralArray::Literal lit;
412     lit.tag = tag;
413     switch (tag) {
414         case panda_file::LiteralTag::BOOL: {
415             lit.value = std::get<bool>(value);
416             break;
417         }
418         case panda_file::LiteralTag::ACCESSOR:
419         case panda_file::LiteralTag::NULLVALUE: {
420             lit.value = std::get<uint8_t>(value);
421             break;
422         }
423         case panda_file::LiteralTag::METHODAFFILIATE: {
424             lit.value = std::get<uint16_t>(value);
425             break;
426         }
427         case panda_file::LiteralTag::INTEGER: {
428             lit.value = std::get<uint32_t>(value);
429             break;
430         }
431         case panda_file::LiteralTag::BIGINT: {
432             lit.value = std::get<uint64_t>(value);
433             break;
434         }
435         case panda_file::LiteralTag::FLOAT: {
436             lit.value = std::get<float>(value);
437             break;
438         }
439         case panda_file::LiteralTag::DOUBLE: {
440             lit.value = std::get<double>(value);
441             break;
442         }
443         case panda_file::LiteralTag::STRING:
444         case panda_file::LiteralTag::METHOD:
445         case panda_file::LiteralTag::GENERATORMETHOD: {
446             auto strData = file_->GetStringData(panda_file::File::EntityId(std::get<uint32_t>(value)));
447             lit.value = StringDataToString(strData);
448             break;
449         }
450         case panda_file::LiteralTag::TAGVALUE: {
451             return;
452         }
453         default: {
454             LOG(ERROR, DISASSEMBLER) << "Unsupported literal with tag 0x" << std::hex << static_cast<uint32_t>(tag);
455             UNREACHABLE();
456         }
457     }
458     litArray->literals.push_back(lit);
459 }
460 
GetLiteralArray(pandasm::LiteralArray * litArray,const size_t index)461 void Disassembler::GetLiteralArray(pandasm::LiteralArray *litArray, const size_t index)
462 {
463     LOG(DEBUG, DISASSEMBLER) << "\n[getting literal array]\nindex: " << index;
464 
465     panda_file::LiteralDataAccessor litArrayAccessor(*file_, file_->GetLiteralArraysId());
466 
467     // clang-format off
468     litArrayAccessor.EnumerateLiteralVals(index,
469                                         [this, litArray](const panda_file::LiteralDataAccessor::LiteralValue &value,
470                                                          const panda_file::LiteralTag &tag) {
471                                             switch (tag) {
472                                                 case panda_file::LiteralTag::ARRAY_U1: {
473                                                     FillLiteralArrayData<bool>(litArray, tag, value);
474                                                     break;
475                                                 }
476                                                 case panda_file::LiteralTag::ARRAY_I8:
477                                                 case panda_file::LiteralTag::ARRAY_U8: {
478                                                     FillLiteralArrayData<uint8_t>(litArray, tag, value);
479                                                     break;
480                                                 }
481                                                 case panda_file::LiteralTag::ARRAY_I16:
482                                                 case panda_file::LiteralTag::ARRAY_U16: {
483                                                     FillLiteralArrayData<uint16_t>(litArray, tag, value);
484                                                     break;
485                                                 }
486                                                 case panda_file::LiteralTag::ARRAY_I32:
487                                                 case panda_file::LiteralTag::ARRAY_U32: {
488                                                     FillLiteralArrayData<uint32_t>(litArray, tag, value);
489                                                     break;
490                                                 }
491                                                 case panda_file::LiteralTag::ARRAY_I64:
492                                                 case panda_file::LiteralTag::ARRAY_U64: {
493                                                     FillLiteralArrayData<uint64_t>(litArray, tag, value);
494                                                     break;
495                                                 }
496                                                 case panda_file::LiteralTag::ARRAY_F32: {
497                                                     FillLiteralArrayData<float>(litArray, tag, value);
498                                                     break;
499                                                 }
500                                                 case panda_file::LiteralTag::ARRAY_F64: {
501                                                     FillLiteralArrayData<double>(litArray, tag, value);
502                                                     break;
503                                                 }
504                                                 case panda_file::LiteralTag::ARRAY_STRING: {
505                                                     FillLiteralArrayData<uint32_t>(litArray, tag, value);
506                                                     break;
507                                                 }
508                                                 default: {
509                                                     FillLiteralData(litArray, value, tag);
510                                                 }
511                                             }
512                                         });
513     // clang-format on
514 }
515 
GetLiteralArrays()516 void Disassembler::GetLiteralArrays()
517 {
518     const auto litArraysId = file_->GetLiteralArraysId();
519 
520     LOG(DEBUG, DISASSEMBLER) << "\n[getting literal arrays]\nid: " << litArraysId << " (0x" << std::hex << litArraysId
521                              << ")";
522 
523     panda_file::LiteralDataAccessor litArrayAccessor(*file_, litArraysId);
524     size_t numLitarrays = litArrayAccessor.GetLiteralNum();
525     for (size_t index = 0; index < numLitarrays; index++) {
526         panda::pandasm::LiteralArray litAr;
527         GetLiteralArray(&litAr, index);
528         prog_.literalarrayTable.emplace(std::to_string(index), litAr);
529     }
530 }
531 
GetRecords()532 void Disassembler::GetRecords()
533 {
534     LOG(DEBUG, DISASSEMBLER) << "\n[getting records]\n";
535 
536     const auto classIdx = file_->GetClasses();
537 
538     for (size_t i = 0; i < classIdx.size(); i++) {
539         uint32_t classId = classIdx[i];
540         auto classOff = file_->GetHeader()->classIdxOff + sizeof(uint32_t) * i;
541 
542         if (classId > file_->GetHeader()->fileSize) {
543             LOG(ERROR, DISASSEMBLER) << "> error encountered in record at " << classOff << " (0x" << std::hex
544                                      << classOff << "). binary file corrupted. record offset (0x" << classId
545                                      << ") out of bounds (0x" << file_->GetHeader()->fileSize << ")!";
546             break;
547         }
548 
549         const panda_file::File::EntityId recordId {classId};
550         auto language = GetRecordLanguage(recordId);
551         if (language != fileLanguage_) {
552             if (fileLanguage_ == panda_file::SourceLang::PANDA_ASSEMBLY) {
553                 fileLanguage_ = language;
554             } else if (language != panda_file::SourceLang::PANDA_ASSEMBLY) {
555                 LOG(ERROR, DISASSEMBLER) << "> possible error encountered in record at" << classOff << " (0x"
556                                          << std::hex << classOff << "). record's language  ("
557                                          << panda_file::LanguageToString(language)
558                                          << ")  differs from file's language ("
559                                          << panda_file::LanguageToString(fileLanguage_) << ")!";
560             }
561         }
562 
563         pandasm::Record record("", fileLanguage_);
564         GetRecord(&record, recordId);
565 
566         if (prog_.recordTable.find(record.name) == prog_.recordTable.end()) {
567             recordNameToId_.emplace(record.name, recordId);
568             prog_.recordTable.emplace(record.name, std::move(record));
569         }
570     }
571 }
572 
GetField(pandasm::Field & field,const panda_file::FieldDataAccessor & fieldAccessor)573 void Disassembler::GetField(pandasm::Field &field, const panda_file::FieldDataAccessor &fieldAccessor)
574 {
575     panda_file::File::EntityId fieldNameId = fieldAccessor.GetNameId();
576     field.name = StringDataToString(file_->GetStringData(fieldNameId));
577 
578     uint32_t fieldType = fieldAccessor.GetType();
579     field.type = FieldTypeToPandasmType(fieldType);
580 
581     GetMetaData(&field, fieldAccessor.GetFieldId());
582 }
583 
GetFields(pandasm::Record * record,const panda_file::File::EntityId & recordId)584 void Disassembler::GetFields(pandasm::Record *record, const panda_file::File::EntityId &recordId)
585 {
586     panda_file::ClassDataAccessor classAccessor {*file_, recordId};
587 
588     classAccessor.EnumerateFields([&](panda_file::FieldDataAccessor &fieldAccessor) -> void {
589         pandasm::Field field(fileLanguage_);
590 
591         GetField(field, fieldAccessor);
592 
593         record->fieldList.push_back(std::move(field));
594     });
595 }
596 
AddExternalFieldsToRecords()597 void Disassembler::AddExternalFieldsToRecords()
598 {
599     for (auto &[record_name, record] : prog_.recordTable) {
600         auto &[unused, field_list] = *(externalFieldTable_.find(record_name));
601         (void)unused;
602         if (field_list.empty()) {
603             continue;
604         }
605         for (auto &fieldIter : field_list) {
606             if (!fieldIter.name.empty()) {
607                 record.fieldList.push_back(std::move(fieldIter));
608             }
609         }
610         externalFieldTable_.erase(record_name);
611     }
612 }
613 
GetMethods(const panda_file::File::EntityId & recordId)614 void Disassembler::GetMethods(const panda_file::File::EntityId &recordId)
615 {
616     panda_file::ClassDataAccessor classAccessor {*file_, recordId};
617 
618     classAccessor.EnumerateMethods([&](panda_file::MethodDataAccessor &methodAccessor) -> void {
619         AddMethodToTables(methodAccessor.GetMethodId());
620     });
621 }
622 
GetParams(pandasm::Function * method,const panda_file::File::EntityId & protoId) const623 void Disassembler::GetParams(pandasm::Function *method, const panda_file::File::EntityId &protoId) const
624 {
625     /// frame size - 2^16 - 1
626     static const uint32_t MAX_ARG_NUM = 0xFFFF;
627 
628     LOG(DEBUG, DISASSEMBLER) << "[getting params]\nproto id: " << protoId << " (0x" << std::hex << protoId << ")";
629 
630     if (method == nullptr) {
631         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
632 
633         return;
634     }
635 
636     panda_file::ProtoDataAccessor protoAccessor(*file_, protoId);
637 
638     if (protoAccessor.GetNumArgs() > MAX_ARG_NUM) {
639         LOG(ERROR, DISASSEMBLER) << "> error encountered at " << protoId << " (0x" << std::hex << protoId
640                                  << "). number of function's arguments (" << std::dec << protoAccessor.GetNumArgs()
641                                  << ") exceeds MAX_ARG_NUM (" << MAX_ARG_NUM << ") !";
642 
643         return;
644     }
645 
646     size_t refIdx = 0;
647     method->returnType = PFTypeToPandasmType(protoAccessor.GetReturnType(), protoAccessor, refIdx);
648 
649     for (size_t i = 0; i < protoAccessor.GetNumArgs(); i++) {
650         auto argType = PFTypeToPandasmType(protoAccessor.GetArgType(i), protoAccessor, refIdx);
651         method->params.emplace_back(argType, fileLanguage_);
652     }
653 }
654 
GetExceptions(pandasm::Function * method,panda_file::File::EntityId methodId,panda_file::File::EntityId codeId) const655 LabelTable Disassembler::GetExceptions(pandasm::Function *method, panda_file::File::EntityId methodId,
656                                        panda_file::File::EntityId codeId) const
657 {
658     LOG(DEBUG, DISASSEMBLER) << "[getting exceptions]\ncode id: " << codeId << " (0x" << std::hex << codeId << ")";
659 
660     if (method == nullptr) {
661         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!\n";
662         return LabelTable {};
663     }
664 
665     panda_file::CodeDataAccessor codeAccessor(*file_, codeId);
666 
667     const auto bcIns = BytecodeInstruction(codeAccessor.GetInstructions());
668     const auto bcInsLast = bcIns.JumpTo(codeAccessor.GetCodeSize());
669 
670     size_t tryIdx = 0;
671     LabelTable labelTable {};
672     codeAccessor.EnumerateTryBlocks([&](panda_file::CodeDataAccessor::TryBlock &tryBlock) {
673         pandasm::Function::CatchBlock catchBlockPa {};
674         if (!LocateTryBlock(bcIns, bcInsLast, tryBlock, &catchBlockPa, &labelTable, tryIdx)) {
675             return false;
676         }
677         size_t catchIdx = 0;
678         tryBlock.EnumerateCatchBlocks([&](panda_file::CodeDataAccessor::CatchBlock &catchBlock) {
679             auto classIdx = catchBlock.GetTypeIdx();
680             if (classIdx == panda_file::INVALID_INDEX) {
681                 catchBlockPa.exceptionRecord = "";
682             } else {
683                 const auto classId = file_->ResolveClassIndex(methodId, classIdx);
684                 catchBlockPa.exceptionRecord = GetFullRecordName(classId);
685             }
686             if (!LocateCatchBlock(bcIns, bcInsLast, catchBlock, &catchBlockPa, &labelTable, tryIdx, catchIdx)) {
687                 return false;
688             }
689 
690             method->catchBlocks.push_back(catchBlockPa);
691             catchBlockPa.catchBeginLabel = "";
692             catchBlockPa.catchEndLabel = "";
693             catchIdx++;
694 
695             return true;
696         });
697         tryIdx++;
698 
699         return true;
700     });
701 
702     return labelTable;
703 }
704 
GetBytecodeInstructionNumber(BytecodeInstruction bcInsFirst,BytecodeInstruction bcInsCur)705 static size_t GetBytecodeInstructionNumber(BytecodeInstruction bcInsFirst, BytecodeInstruction bcInsCur)
706 {
707     size_t count = 0;
708 
709     while (bcInsFirst.GetAddress() != bcInsCur.GetAddress()) {
710         count++;
711         bcInsFirst = bcInsFirst.GetNext();
712         if (bcInsFirst.GetAddress() > bcInsCur.GetAddress()) {
713             return std::numeric_limits<size_t>::max();
714         }
715     }
716 
717     return count;
718 }
719 
LocateTryBlock(const BytecodeInstruction & bcIns,const BytecodeInstruction & bcInsLast,const panda_file::CodeDataAccessor::TryBlock & tryBlock,pandasm::Function::CatchBlock * catchBlockPa,LabelTable * labelTable,size_t tryIdx) const720 bool Disassembler::LocateTryBlock(const BytecodeInstruction &bcIns, const BytecodeInstruction &bcInsLast,
721                                   const panda_file::CodeDataAccessor::TryBlock &tryBlock,
722                                   pandasm::Function::CatchBlock *catchBlockPa, LabelTable *labelTable,
723                                   size_t tryIdx) const
724 {
725     const auto tryBeginBcIns = bcIns.JumpTo(tryBlock.GetStartPc());
726     const auto tryEndBcIns = bcIns.JumpTo(tryBlock.GetStartPc() + tryBlock.GetLength());
727 
728     const size_t tryBeginIdx = GetBytecodeInstructionNumber(bcIns, tryBeginBcIns);
729     const size_t tryEndIdx = GetBytecodeInstructionNumber(bcIns, tryEndBcIns);
730 
731     const bool tryBeginOffsetInRange = bcInsLast.GetAddress() > tryBeginBcIns.GetAddress();
732     const bool tryEndOffsetInRange = bcInsLast.GetAddress() >= tryEndBcIns.GetAddress();
733     const bool tryBeginOffsetValid = tryBeginIdx != std::numeric_limits<size_t>::max();
734     const bool tryEndOffsetValid = tryEndIdx != std::numeric_limits<size_t>::max();
735 
736     if (!tryBeginOffsetInRange || !tryBeginOffsetValid) {
737         LOG(ERROR, DISASSEMBLER) << "> invalid try block begin offset! address is: 0x" << std::hex
738                                  << tryBeginBcIns.GetAddress();
739         return false;
740     }
741 
742     auto itBegin = labelTable->find(tryBeginIdx);
743     if (itBegin == labelTable->end()) {
744         std::stringstream ss {};
745         ss << "try_begin_label_" << tryIdx;
746         catchBlockPa->tryBeginLabel = ss.str();
747         labelTable->insert(std::pair<size_t, std::string>(tryBeginIdx, ss.str()));
748     } else {
749         catchBlockPa->tryBeginLabel = itBegin->second;
750     }
751 
752     if (!tryEndOffsetInRange || !tryEndOffsetValid) {
753         LOG(ERROR, DISASSEMBLER) << "> invalid try block end offset! address is: 0x" << std::hex
754                                  << tryEndBcIns.GetAddress();
755         return false;
756     }
757 
758     auto itEnd = labelTable->find(tryEndIdx);
759     if (itEnd == labelTable->end()) {
760         std::stringstream ss {};
761         ss << "try_end_label_" << tryIdx;
762         catchBlockPa->tryEndLabel = ss.str();
763         labelTable->insert(std::pair<size_t, std::string>(tryEndIdx, ss.str()));
764     } else {
765         catchBlockPa->tryEndLabel = itEnd->second;
766     }
767 
768     return true;
769 }
770 
LocateCatchBlock(const BytecodeInstruction & bcIns,const BytecodeInstruction & bcInsLast,const panda_file::CodeDataAccessor::CatchBlock & catchBlock,pandasm::Function::CatchBlock * catchBlockPa,LabelTable * labelTable,size_t tryIdx,size_t catchIdx) const771 bool Disassembler::LocateCatchBlock(const BytecodeInstruction &bcIns, const BytecodeInstruction &bcInsLast,
772                                     const panda_file::CodeDataAccessor::CatchBlock &catchBlock,
773                                     pandasm::Function::CatchBlock *catchBlockPa, LabelTable *labelTable, size_t tryIdx,
774                                     size_t catchIdx) const
775 {
776     const auto handlerBeginOffset = catchBlock.GetHandlerPc();
777     const auto handlerEndOffset = handlerBeginOffset + catchBlock.GetCodeSize();
778 
779     const auto handlerBeginBcIns = bcIns.JumpTo(handlerBeginOffset);
780     const auto handlerEndBcIns = bcIns.JumpTo(handlerEndOffset);
781 
782     const size_t handlerBeginIdx = GetBytecodeInstructionNumber(bcIns, handlerBeginBcIns);
783     const size_t handlerEndIdx = GetBytecodeInstructionNumber(bcIns, handlerEndBcIns);
784 
785     const bool handlerBeginOffsetInRange = bcInsLast.GetAddress() > handlerBeginBcIns.GetAddress();
786     const bool handlerEndOffsetInRange = bcInsLast.GetAddress() > handlerEndBcIns.GetAddress();
787     const bool handlerEndPresent = catchBlock.GetCodeSize() != 0;
788     const bool handlerBeginOffsetValid = handlerBeginIdx != std::numeric_limits<size_t>::max();
789     const bool handlerEndOffsetValid = handlerEndIdx != std::numeric_limits<size_t>::max();
790 
791     if (!handlerBeginOffsetInRange || !handlerBeginOffsetValid) {
792         LOG(ERROR, DISASSEMBLER) << "> invalid catch block begin offset! address is: 0x" << std::hex
793                                  << handlerBeginBcIns.GetAddress();
794         return false;
795     }
796 
797     auto itBegin = labelTable->find(handlerBeginIdx);
798     if (itBegin == labelTable->end()) {
799         std::stringstream ss {};
800         ss << "handler_begin_label_" << tryIdx << "_" << catchIdx;
801         catchBlockPa->catchBeginLabel = ss.str();
802         labelTable->insert(std::pair<size_t, std::string>(handlerBeginIdx, ss.str()));
803     } else {
804         catchBlockPa->catchBeginLabel = itBegin->second;
805     }
806 
807     if (!handlerEndOffsetInRange || !handlerEndOffsetValid) {
808         LOG(ERROR, DISASSEMBLER) << "> invalid catch block end offset! address is: 0x" << std::hex
809                                  << handlerEndBcIns.GetAddress();
810         return false;
811     }
812 
813     if (handlerEndPresent) {
814         auto itEnd = labelTable->find(handlerEndIdx);
815         if (itEnd == labelTable->end()) {
816             std::stringstream ss {};
817             ss << "handler_end_label_" << tryIdx << "_" << catchIdx;
818             catchBlockPa->catchEndLabel = ss.str();
819             labelTable->insert(std::pair<size_t, std::string>(handlerEndIdx, ss.str()));
820         } else {
821             catchBlockPa->catchEndLabel = itEnd->second;
822         }
823     }
824 
825     return true;
826 }
827 
828 template <typename T>
SetEntityAttribute(T * entity,const std::function<bool ()> & shouldSet,std::string_view attribute)829 static void SetEntityAttribute(T *entity, const std::function<bool()> &shouldSet, std::string_view attribute)
830 {
831     if (shouldSet()) {
832         auto err = entity->metadata->SetAttribute(attribute);
833         if (err.has_value()) {
834             LOG(ERROR, DISASSEMBLER) << err.value().GetMessage();
835         }
836     }
837 }
838 
839 template <typename T>
SetEntityAttributeValue(T * entity,const std::function<bool ()> & shouldSet,std::string_view attribute,const char * value)840 static void SetEntityAttributeValue(T *entity, const std::function<bool()> &shouldSet, std::string_view attribute,
841                                     const char *value)
842 {
843     if (shouldSet()) {
844         auto err = entity->metadata->SetAttributeValue(attribute, value);
845         if (err.has_value()) {
846             LOG(ERROR, DISASSEMBLER) << err.value().GetMessage();
847         }
848     }
849 }
850 
GetMetaData(pandasm::Function * method,const panda_file::File::EntityId & methodId) const851 void Disassembler::GetMetaData(pandasm::Function *method, const panda_file::File::EntityId &methodId) const
852 {
853     LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nmethod id: " << methodId << " (0x" << std::hex << methodId << ")";
854 
855     if (method == nullptr) {
856         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
857 
858         return;
859     }
860 
861     panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
862 
863     const auto methodNameRaw = StringDataToString(file_->GetStringData(methodAccessor.GetNameId()));
864 
865     if (!methodAccessor.IsStatic()) {
866         const auto className = StringDataToString(file_->GetStringData(methodAccessor.GetClassId()));
867         auto thisType = pandasm::Type::FromDescriptor(className);
868 
869         LOG(DEBUG, DISASSEMBLER) << "method (raw: \'" << methodNameRaw
870                                  << "\') is not static. emplacing self-argument of type " << thisType.GetName();
871 
872         method->params.insert(method->params.begin(), pandasm::Function::Parameter(thisType, fileLanguage_));
873     }
874     SetEntityAttribute(
875         method, [&methodAccessor]() { return methodAccessor.IsStatic(); }, "static");
876 
877     SetEntityAttribute(
878         method, [this, &methodAccessor]() { return file_->IsExternal(methodAccessor.GetMethodId()); }, "external");
879 
880     SetEntityAttribute(
881         method, [&methodAccessor]() { return methodAccessor.IsNative(); }, "native");
882 
883     SetEntityAttribute(
884         method, [&methodAccessor]() { return methodAccessor.IsAbstract(); }, "noimpl");
885 
886     SetEntityAttributeValue(
887         method, [&methodAccessor]() { return methodAccessor.IsPublic(); }, "access.function", "public");
888 
889     SetEntityAttributeValue(
890         method, [&methodAccessor]() { return methodAccessor.IsProtected(); }, "access.function", "protected");
891 
892     SetEntityAttributeValue(
893         method, [&methodAccessor]() { return methodAccessor.IsPrivate(); }, "access.function", "private");
894 
895     SetEntityAttribute(
896         method, [&methodAccessor]() { return methodAccessor.IsFinal(); }, "final");
897 
898     std::string ctorName = panda::panda_file::GetCtorName(fileLanguage_);
899     std::string cctorName = panda::panda_file::GetCctorName(fileLanguage_);
900 
901     const bool isCtor = (methodNameRaw == ctorName);
902     const bool isCctor = (methodNameRaw == cctorName);
903 
904     if (isCtor) {
905         method->metadata->SetAttribute("ctor");
906         method->name.replace(method->name.find(ctorName), ctorName.length(), "_ctor_");
907     } else if (isCctor) {
908         method->metadata->SetAttribute("cctor");
909         method->name.replace(method->name.find(cctorName), cctorName.length(), "_cctor_");
910     }
911 }
912 
GetMetaData(pandasm::Record * record,const panda_file::File::EntityId & recordId) const913 void Disassembler::GetMetaData(pandasm::Record *record, const panda_file::File::EntityId &recordId) const
914 {
915     LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nrecord id: " << recordId << " (0x" << std::hex << recordId << ")";
916 
917     if (record == nullptr) {
918         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but record ptr expected!";
919 
920         return;
921     }
922 
923     SetEntityAttribute(
924         record, [this, recordId]() { return file_->IsExternal(recordId); }, "external");
925 
926     auto external = file_->IsExternal(recordId);
927     if (!external) {
928         auto cda = panda_file::ClassDataAccessor {*file_, recordId};
929         SetEntityAttributeValue(
930             record, [&cda]() { return cda.IsPublic(); }, "access.record", "public");
931 
932         SetEntityAttributeValue(
933             record, [&cda]() { return cda.IsProtected(); }, "access.record", "protected");
934 
935         SetEntityAttributeValue(
936             record, [&cda]() { return cda.IsPrivate(); }, "access.record", "private");
937 
938         SetEntityAttribute(
939             record, [&cda]() { return cda.IsFinal(); }, "final");
940     }
941 }
942 
GetMetaData(pandasm::Field * field,const panda_file::File::EntityId & fieldId) const943 void Disassembler::GetMetaData(pandasm::Field *field, const panda_file::File::EntityId &fieldId) const
944 {
945     LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nfield id: " << fieldId << " (0x" << std::hex << fieldId << ")";
946 
947     if (field == nullptr) {
948         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
949 
950         return;
951     }
952 
953     panda_file::FieldDataAccessor fieldAccessor(*file_, fieldId);
954 
955     SetEntityAttribute(
956         field, [&fieldAccessor]() { return fieldAccessor.IsExternal(); }, "external");
957 
958     SetEntityAttribute(
959         field, [&fieldAccessor]() { return fieldAccessor.IsStatic(); }, "static");
960 
961     SetEntityAttributeValue(
962         field, [&fieldAccessor]() { return fieldAccessor.IsPublic(); }, "access.field", "public");
963 
964     SetEntityAttributeValue(
965         field, [&fieldAccessor]() { return fieldAccessor.IsProtected(); }, "access.field", "protected");
966 
967     SetEntityAttributeValue(
968         field, [&fieldAccessor]() { return fieldAccessor.IsPrivate(); }, "access.field", "private");
969 
970     SetEntityAttribute(
971         field, [&fieldAccessor]() { return fieldAccessor.IsFinal(); }, "final");
972 }
973 
AnnotationTagToString(const char tag) const974 std::string Disassembler::AnnotationTagToString(const char tag) const
975 {
976     static const std::unordered_map<char, std::string> TAG_TO_STRING = {{'1', "u1"},
977                                                                         {'2', "i8"},
978                                                                         {'3', "u8"},
979                                                                         {'4', "i16"},
980                                                                         {'5', "u16"},
981                                                                         {'6', "i32"},
982                                                                         {'7', "u32"},
983                                                                         {'8', "i64"},
984                                                                         {'9', "u64"},
985                                                                         {'A', "f32"},
986                                                                         {'B', "f64"},
987                                                                         {'C', "string"},
988                                                                         {'D', "record"},
989                                                                         {'E', "method"},
990                                                                         {'F', "enum"},
991                                                                         {'G', "annotation"},
992                                                                         {'J', "method_handle"},
993                                                                         {'H', "array"},
994                                                                         {'K', "u1[]"},
995                                                                         {'L', "i8[]"},
996                                                                         {'M', "u8[]"},
997                                                                         {'N', "i16[]"},
998                                                                         {'O', "u16[]"},
999                                                                         {'P', "i32[]"},
1000                                                                         {'Q', "u32[]"},
1001                                                                         {'R', "i64[]"},
1002                                                                         {'S', "u64[]"},
1003                                                                         {'T', "f32[]"},
1004                                                                         {'U', "f64[]"},
1005                                                                         {'V', "string[]"},
1006                                                                         {'W', "record[]"},
1007                                                                         {'X', "method[]"},
1008                                                                         {'Y', "enum[]"},
1009                                                                         {'Z', "annotation[]"},
1010                                                                         {'@', "method_handle[]"},
1011                                                                         {'*', "nullptr_string"}};
1012 
1013     return TAG_TO_STRING.at(tag);
1014 }
1015 
ScalarValueToString(const panda_file::ScalarValue & value,const std::string & type)1016 std::string Disassembler::ScalarValueToString(const panda_file::ScalarValue &value, const std::string &type)
1017 {
1018     std::stringstream ss;
1019 
1020     if (type == "i8") {
1021         auto res = value.Get<int8_t>();
1022         ss << static_cast<int>(res);
1023     } else if (type == "u1" || type == "u8") {
1024         auto res = value.Get<uint8_t>();
1025         ss << static_cast<unsigned int>(res);
1026     } else if (type == "i16") {
1027         ss << value.Get<int16_t>();
1028     } else if (type == "u16") {
1029         ss << value.Get<uint16_t>();
1030     } else if (type == "i32") {
1031         ss << value.Get<int32_t>();
1032     } else if (type == "u32") {
1033         ss << value.Get<uint32_t>();
1034     } else if (type == "i64") {
1035         ss << value.Get<int64_t>();
1036     } else if (type == "u64") {
1037         ss << value.Get<uint64_t>();
1038     } else if (type == "f32") {
1039         ss << value.Get<float>();
1040     } else if (type == "f64") {
1041         ss << value.Get<double>();
1042     } else if (type == "string") {
1043         const auto id = value.Get<panda_file::File::EntityId>();
1044         ss << "\"" << StringDataToString(file_->GetStringData(id)) << "\"";
1045     } else if (type == "record") {
1046         const auto id = value.Get<panda_file::File::EntityId>();
1047         ss << GetFullRecordName(id);
1048     } else if (type == "method") {
1049         const auto id = value.Get<panda_file::File::EntityId>();
1050         AddMethodToTables(id);
1051         ss << GetMethodSignature(id);
1052     } else if (type == "enum") {
1053         const auto id = value.Get<panda_file::File::EntityId>();
1054         panda_file::FieldDataAccessor fieldAccessor(*file_, id);
1055         ss << GetFullRecordName(fieldAccessor.GetClassId()) << "."
1056            << StringDataToString(file_->GetStringData(fieldAccessor.GetNameId()));
1057     } else if (type == "annotation") {
1058         const auto id = value.Get<panda_file::File::EntityId>();
1059         ss << "id_" << id;
1060     } else if (type == "void") {
1061         return std::string();
1062     } else if (type == "method_handle") {
1063     } else if (type == "nullptr_string") {
1064         ss << static_cast<uint32_t>(0);
1065     }
1066 
1067     return ss.str();
1068 }
1069 
ArrayValueToString(const panda_file::ArrayValue & value,const std::string & type,const size_t idx)1070 std::string Disassembler::ArrayValueToString(const panda_file::ArrayValue &value, const std::string &type,
1071                                              const size_t idx)
1072 {
1073     std::stringstream ss;
1074 
1075     if (type == "i8") {
1076         auto res = value.Get<int8_t>(idx);
1077         ss << static_cast<int>(res);
1078     } else if (type == "u1" || type == "u8") {
1079         auto res = value.Get<uint8_t>(idx);
1080         ss << static_cast<unsigned int>(res);
1081     } else if (type == "i16") {
1082         ss << value.Get<int16_t>(idx);
1083     } else if (type == "u16") {
1084         ss << value.Get<uint16_t>(idx);
1085     } else if (type == "i32") {
1086         ss << value.Get<int32_t>(idx);
1087     } else if (type == "u32") {
1088         ss << value.Get<uint32_t>(idx);
1089     } else if (type == "i64") {
1090         ss << value.Get<int64_t>(idx);
1091     } else if (type == "u64") {
1092         ss << value.Get<uint64_t>(idx);
1093     } else if (type == "f32") {
1094         ss << value.Get<float>(idx);
1095     } else if (type == "f64") {
1096         ss << value.Get<double>(idx);
1097     } else if (type == "string") {
1098         const auto id = value.Get<panda_file::File::EntityId>(idx);
1099         ss << '\"' << StringDataToString(file_->GetStringData(id)) << '\"';
1100     } else if (type == "record") {
1101         const auto id = value.Get<panda_file::File::EntityId>(idx);
1102         ss << GetFullRecordName(id);
1103     } else if (type == "method") {
1104         const auto id = value.Get<panda_file::File::EntityId>(idx);
1105         AddMethodToTables(id);
1106         ss << GetMethodSignature(id);
1107     } else if (type == "enum") {
1108         const auto id = value.Get<panda_file::File::EntityId>(idx);
1109         panda_file::FieldDataAccessor fieldAccessor(*file_, id);
1110         ss << GetFullRecordName(fieldAccessor.GetClassId()) << "."
1111            << StringDataToString(file_->GetStringData(fieldAccessor.GetNameId()));
1112     } else if (type == "annotation") {
1113         const auto id = value.Get<panda_file::File::EntityId>(idx);
1114         ss << "id_" << id;
1115     } else if (type == "method_handle") {
1116     } else if (type == "nullptr_string") {
1117     }
1118 
1119     return ss.str();
1120 }
1121 
GetFullMethodName(const panda_file::File::EntityId & methodId) const1122 std::string Disassembler::GetFullMethodName(const panda_file::File::EntityId &methodId) const
1123 {
1124     panda::panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
1125 
1126     const auto methodNameRaw = StringDataToString(file_->GetStringData(methodAccessor.GetNameId()));
1127 
1128     std::string className = GetFullRecordName(methodAccessor.GetClassId());
1129     if (IsSystemType(className)) {
1130         className = "";
1131     } else {
1132         className += ".";
1133     }
1134 
1135     return className + methodNameRaw;
1136 }
1137 
GetMethodSignature(const panda_file::File::EntityId & methodId) const1138 std::string Disassembler::GetMethodSignature(const panda_file::File::EntityId &methodId) const
1139 {
1140     panda::panda_file::MethodDataAccessor methodAccessor(*file_, methodId);
1141 
1142     pandasm::Function method(GetFullMethodName(methodId), fileLanguage_);
1143     GetParams(&method, methodAccessor.GetProtoId());
1144     GetMetaData(&method, methodId);
1145 
1146     return pandasm::GetFunctionSignatureFromName(method.name, method.params);
1147 }
1148 
GetFullRecordName(const panda_file::File::EntityId & classId) const1149 std::string Disassembler::GetFullRecordName(const panda_file::File::EntityId &classId) const
1150 {
1151     std::string name = StringDataToString(file_->GetStringData(classId));
1152 
1153     auto type = pandasm::Type::FromDescriptor(name);
1154     type = pandasm::Type(type.GetComponentName(), type.GetRank());
1155 
1156     return type.GetPandasmName();
1157 }
1158 
GetRecordInfo(const panda_file::File::EntityId & recordId,RecordInfo * recordInfo) const1159 void Disassembler::GetRecordInfo(const panda_file::File::EntityId &recordId, RecordInfo *recordInfo) const
1160 {
1161     constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
1162 
1163     if (file_->IsExternal(recordId)) {
1164         return;
1165     }
1166 
1167     panda_file::ClassDataAccessor classAccessor {*file_, recordId};
1168     std::stringstream ss;
1169 
1170     ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex << classAccessor.GetClassId()
1171        << ", size: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << classAccessor.GetSize() << " ("
1172        << std::dec << classAccessor.GetSize() << ")";
1173 
1174     recordInfo->recordInfo = ss.str();
1175     ss.str(std::string());
1176 
1177     classAccessor.EnumerateFields([&](panda_file::FieldDataAccessor &fieldAccessor) -> void {
1178         ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1179            << fieldAccessor.GetFieldId() << ", type: 0x" << fieldAccessor.GetType();
1180 
1181         recordInfo->fieldsInfo.push_back(ss.str());
1182 
1183         ss.str(std::string());
1184     });
1185 }
1186 
GetMethodInfo(const panda_file::File::EntityId & methodId,MethodInfo * methodInfo) const1187 void Disassembler::GetMethodInfo(const panda_file::File::EntityId &methodId, MethodInfo *methodInfo) const
1188 {
1189     constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
1190 
1191     panda_file::MethodDataAccessor methodAccessor {*file_, methodId};
1192     std::stringstream ss;
1193 
1194     ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1195        << methodAccessor.GetMethodId();
1196 
1197     if (methodAccessor.GetCodeId().has_value()) {
1198         ss << ", code offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1199            << methodAccessor.GetCodeId().value();
1200 
1201         GetInsInfo(methodAccessor, methodAccessor.GetCodeId().value(), methodInfo);
1202     } else {
1203         ss << ", <no code>";
1204     }
1205 
1206     auto profileSize = methodAccessor.GetProfileSize();
1207     if (profileSize) {
1208         ss << ", profile size: " << profileSize.value();
1209     }
1210 
1211     methodInfo->methodInfo = ss.str();
1212 
1213     if (methodAccessor.GetCodeId()) {
1214         ASSERT(debugInfoExtractor_ != nullptr);
1215         methodInfo->lineNumberTable = debugInfoExtractor_->GetLineNumberTable(methodId);
1216         methodInfo->localVariableTable = debugInfoExtractor_->GetLocalVariableTable(methodId);
1217 
1218         // Add information about parameters into the table
1219         panda_file::CodeDataAccessor codeda(*file_, methodAccessor.GetCodeId().value());
1220         auto argIdx = static_cast<int32_t>(codeda.GetNumVregs());
1221         uint32_t codeSize = codeda.GetCodeSize();
1222         for (const auto &info : debugInfoExtractor_->GetParameterInfo(methodId)) {
1223             panda_file::LocalVariableInfo argInfo {info.name, info.signature, "", argIdx++, 0, codeSize};
1224             methodInfo->localVariableTable.emplace_back(argInfo);
1225         }
1226     }
1227 }
1228 
Serialize(const std::string & name,const pandasm::LiteralArray & litArray,std::ostream & os) const1229 void Disassembler::Serialize(const std::string &name, const pandasm::LiteralArray &litArray, std::ostream &os) const
1230 {
1231     if (litArray.literals.empty()) {
1232         return;
1233     }
1234 
1235     bool isConst = litArray.literals[0].IsArray();
1236 
1237     std::stringstream specifiers {};
1238 
1239     if (isConst) {
1240         specifiers << LiteralTagToString(litArray.literals[0].tag) << " " << litArray.literals.size() << " ";
1241     }
1242 
1243     os << ".array array_" << name << " " << specifiers.str() << "{";
1244 
1245     SerializeValues(litArray, isConst, os);
1246 
1247     os << "}\n";
1248 }
1249 
LiteralTagToString(const panda_file::LiteralTag & tag) const1250 std::string Disassembler::LiteralTagToString(const panda_file::LiteralTag &tag) const
1251 {
1252     switch (tag) {
1253         case panda_file::LiteralTag::BOOL:
1254         case panda_file::LiteralTag::ARRAY_U1:
1255             return "u1";
1256         case panda_file::LiteralTag::ARRAY_U8:
1257             return "u8";
1258         case panda_file::LiteralTag::ARRAY_I8:
1259             return "i8";
1260         case panda_file::LiteralTag::ARRAY_U16:
1261             return "u16";
1262         case panda_file::LiteralTag::ARRAY_I16:
1263             return "i16";
1264         case panda_file::LiteralTag::ARRAY_U32:
1265             return "u32";
1266         case panda_file::LiteralTag::INTEGER:
1267         case panda_file::LiteralTag::ARRAY_I32:
1268             return "i32";
1269         case panda_file::LiteralTag::ARRAY_U64:
1270             return "u64";
1271         case panda_file::LiteralTag::BIGINT:
1272         case panda_file::LiteralTag::ARRAY_I64:
1273             return "i64";
1274         case panda_file::LiteralTag::FLOAT:
1275         case panda_file::LiteralTag::ARRAY_F32:
1276             return "f32";
1277         case panda_file::LiteralTag::DOUBLE:
1278         case panda_file::LiteralTag::ARRAY_F64:
1279             return "f64";
1280         case panda_file::LiteralTag::STRING:
1281         case panda_file::LiteralTag::ARRAY_STRING:
1282             return pandasm::Type::FromDescriptor(panda_file::GetStringClassDescriptor(fileLanguage_)).GetPandasmName();
1283         case panda_file::LiteralTag::ACCESSOR:
1284             return "accessor";
1285         case panda_file::LiteralTag::NULLVALUE:
1286             return "nullvalue";
1287         case panda_file::LiteralTag::METHODAFFILIATE:
1288             return "method_affiliate";
1289         case panda_file::LiteralTag::METHOD:
1290             return "method";
1291         case panda_file::LiteralTag::GENERATORMETHOD:
1292             return "generator_method";
1293         default:
1294             LOG(ERROR, DISASSEMBLER) << "Unsupported literal with tag 0x" << std::hex << static_cast<uint32_t>(tag);
1295             UNREACHABLE();
1296     }
1297 }
1298 
LiteralValueToString(const pandasm::LiteralArray::Literal & lit) const1299 std::string Disassembler::LiteralValueToString(const pandasm::LiteralArray::Literal &lit) const
1300 {
1301     if (lit.IsBoolValue()) {
1302         std::stringstream res {};
1303         res << (std::get<bool>(lit.value));
1304         return res.str();
1305     }
1306 
1307     if (lit.IsByteValue()) {
1308         return LiteralIntegralValueToString<uint8_t>(lit);
1309     }
1310 
1311     if (lit.IsShortValue()) {
1312         return LiteralIntegralValueToString<uint16_t>(lit);
1313     }
1314 
1315     if (lit.IsIntegerValue()) {
1316         return LiteralIntegralValueToString<uint32_t>(lit);
1317     }
1318 
1319     if (lit.IsLongValue()) {
1320         return LiteralIntegralValueToString<uint64_t>(lit);
1321     }
1322 
1323     if (lit.IsDoubleValue()) {
1324         std::stringstream res {};
1325         res << std::get<double>(lit.value);
1326         return res.str();
1327     }
1328 
1329     if (lit.IsFloatValue()) {
1330         std::stringstream res {};
1331         res << std::get<float>(lit.value);
1332         return res.str();
1333     }
1334 
1335     if (lit.IsStringValue()) {
1336         std::stringstream res {};
1337         res << "\"" << std::get<std::string>(lit.value) << "\"";
1338         return res.str();
1339     }
1340 
1341     UNREACHABLE();
1342 }
1343 
SerializeValues(const pandasm::LiteralArray & litArray,const bool isConst,std::ostream & os) const1344 void Disassembler::SerializeValues(const pandasm::LiteralArray &litArray, const bool isConst, std::ostream &os) const
1345 {
1346     std::string separator = (isConst) ? (" ") : ("\n");
1347 
1348     os << separator;
1349 
1350     if (isConst) {
1351         for (const auto &l : litArray.literals) {
1352             os << LiteralValueToString(l) << separator;
1353         }
1354     } else {
1355         for (const auto &l : litArray.literals) {
1356             os << "\t" << LiteralTagToString(l.tag) << " " << LiteralValueToString(l) << separator;
1357         }
1358     }
1359 }
1360 
Serialize(const pandasm::Record & record,std::ostream & os,bool printInformation) const1361 void Disassembler::Serialize(const pandasm::Record &record, std::ostream &os, bool printInformation) const
1362 {
1363     if (IsSystemType(record.name)) {
1364         return;
1365     }
1366 
1367     os << ".record " << record.name;
1368 
1369     const auto recordIter = progAnn_.recordAnnotations.find(record.name);
1370     const bool recordInTable = recordIter != progAnn_.recordAnnotations.end();
1371     if (recordInTable) {
1372         Serialize(*record.metadata, recordIter->second.annList, os);
1373     } else {
1374         Serialize(*record.metadata, {}, os);
1375     }
1376 
1377     if (record.metadata->IsForeign() && record.fieldList.empty()) {
1378         os << "\n\n";
1379         return;
1380     }
1381 
1382     os << " {";
1383 
1384     if (printInformation && progInfo_.recordsInfo.find(record.name) != progInfo_.recordsInfo.end()) {
1385         os << " # " << progInfo_.recordsInfo.at(record.name).recordInfo << "\n";
1386         SerializeFields(record, os, true);
1387     } else {
1388         os << "\n";
1389         SerializeFields(record, os, false);
1390     }
1391 
1392     os << "}\n\n";
1393 }
1394 
SerializeUnionFields(const pandasm::Record & record,std::ostream & os,bool printInformation) const1395 void Disassembler::SerializeUnionFields(const pandasm::Record &record, std::ostream &os, bool printInformation) const
1396 {
1397     if (printInformation && progInfo_.recordsInfo.find(record.name) != progInfo_.recordsInfo.end()) {
1398         os << " # " << progInfo_.recordsInfo.at(record.name).recordInfo << "\n";
1399         SerializeFields(record, os, true, true);
1400     } else {
1401         SerializeFields(record, os, false, true);
1402     }
1403     os << "\n";
1404 }
1405 
SerializeFields(const pandasm::Record & record,std::ostream & os,bool printInformation,bool isUnion) const1406 void Disassembler::SerializeFields(const pandasm::Record &record, std::ostream &os, bool printInformation,
1407                                    bool isUnion) const
1408 {
1409     constexpr size_t INFO_OFFSET = 80;
1410 
1411     const auto recordIter = progAnn_.recordAnnotations.find(record.name);
1412     const bool recordInTable = recordIter != progAnn_.recordAnnotations.end();
1413 
1414     const auto recInf = (printInformation) ? (progInfo_.recordsInfo.at(record.name)) : (RecordInfo {});
1415 
1416     size_t fieldIdx = 0;
1417 
1418     std::stringstream ss;
1419     for (const auto &f : record.fieldList) {
1420         if (isUnion) {
1421             ss << ".union_field ";
1422         } else {
1423             ss << "\t";
1424         }
1425         ss << f.type.GetPandasmName() << " " << f.name;
1426         if (!isUnion) {
1427             if (recordInTable) {
1428                 const auto fieldIter = recordIter->second.fieldAnnotations.find(f.name);
1429                 if (fieldIter != recordIter->second.fieldAnnotations.end()) {
1430                     Serialize(*f.metadata, fieldIter->second, ss);
1431                 } else {
1432                     Serialize(*f.metadata, {}, ss);
1433                 }
1434             } else {
1435                 Serialize(*f.metadata, {}, ss);
1436             }
1437         }
1438 
1439         if (printInformation && !recInf.fieldsInfo.empty()) {
1440             os << std::setw(INFO_OFFSET) << std::left << ss.str() << " # " << recInf.fieldsInfo.at(fieldIdx) << "\n";
1441         } else {
1442             os << ss.str() << "\n";
1443         }
1444 
1445         ss.str(std::string());
1446         ss.clear();
1447 
1448         fieldIdx++;
1449     }
1450 }
1451 
Serialize(const pandasm::Function::CatchBlock & catchBlock,std::ostream & os) const1452 void Disassembler::Serialize(const pandasm::Function::CatchBlock &catchBlock, std::ostream &os) const
1453 {
1454     if (catchBlock.exceptionRecord.empty()) {
1455         os << ".catchall ";
1456     } else {
1457         os << ".catch " << catchBlock.exceptionRecord << ", ";
1458     }
1459 
1460     os << catchBlock.tryBeginLabel << ", " << catchBlock.tryEndLabel << ", " << catchBlock.catchBeginLabel;
1461 
1462     if (!catchBlock.catchEndLabel.empty()) {
1463         os << ", " << catchBlock.catchEndLabel;
1464     }
1465 }
1466 
Serialize(const pandasm::ItemMetadata & meta,const AnnotationList & annList,std::ostream & os) const1467 void Disassembler::Serialize(const pandasm::ItemMetadata &meta, const AnnotationList &annList, std::ostream &os) const
1468 {
1469     auto boolAttributes = meta.GetBoolAttributes();
1470     auto attributes = meta.GetAttributes();
1471     if (boolAttributes.empty() && attributes.empty() && annList.empty()) {
1472         return;
1473     }
1474 
1475     os << " <";
1476 
1477     size_t size = boolAttributes.size();
1478     size_t idx = 0;
1479     for (const auto &attr : boolAttributes) {
1480         os << attr;
1481         ++idx;
1482 
1483         if (!attributes.empty() || !annList.empty() || idx < size) {
1484             os << ", ";
1485         }
1486     }
1487 
1488     size = attributes.size();
1489     idx = 0;
1490     for (const auto &[key, values] : attributes) {
1491         for (size_t i = 0; i < values.size(); i++) {
1492             os << key << "=" << values[i];
1493 
1494             if (i < values.size() - 1) {
1495                 os << ", ";
1496             }
1497         }
1498 
1499         ++idx;
1500 
1501         if (!annList.empty() || idx < size) {
1502             os << ", ";
1503         }
1504     }
1505 
1506     size = annList.size();
1507     idx = 0;
1508     for (const auto &[key, value] : annList) {
1509         os << key << "=" << value;
1510 
1511         ++idx;
1512 
1513         if (idx < size) {
1514             os << ", ";
1515         }
1516     }
1517 
1518     os << ">";
1519 }
1520 
SerializeLineNumberTable(const panda_file::LineNumberTable & lineNumberTable,std::ostream & os) const1521 void Disassembler::SerializeLineNumberTable(const panda_file::LineNumberTable &lineNumberTable, std::ostream &os) const
1522 {
1523     if (lineNumberTable.empty()) {
1524         return;
1525     }
1526 
1527     os << "\n#   LINE_NUMBER_TABLE:\n";
1528     for (const auto &lineInfo : lineNumberTable) {
1529         os << "#\tline " << lineInfo.line << ": " << lineInfo.offset << "\n";
1530     }
1531 }
1532 
SerializeLocalVariableTable(const panda_file::LocalVariableTable & localVariableTable,const pandasm::Function & method,std::ostream & os) const1533 void Disassembler::SerializeLocalVariableTable(const panda_file::LocalVariableTable &localVariableTable,
1534                                                const pandasm::Function &method, std::ostream &os) const
1535 {
1536     if (localVariableTable.empty()) {
1537         return;
1538     }
1539 
1540     os << "\n#   LOCAL_VARIABLE_TABLE:\n";
1541     os << "#\t Start   End  Register           Name   Signature\n";
1542     const int startWidth = 5;
1543     const int endWidth = 4;
1544     const int regWidth = 8;
1545     const int nameWidth = 14;
1546     for (const auto &variableInfo : localVariableTable) {
1547         std::ostringstream regStream;
1548         regStream << variableInfo.regNumber << '(';
1549         if (variableInfo.regNumber < 0) {
1550             regStream << "acc";
1551         } else {
1552             uint32_t vreg = variableInfo.regNumber;
1553             uint32_t firstArgReg = method.GetTotalRegs();
1554             if (vreg < firstArgReg) {
1555                 regStream << 'v' << vreg;
1556             } else {
1557                 regStream << 'a' << vreg - firstArgReg;
1558             }
1559         }
1560         regStream << ')';
1561 
1562         os << "#\t " << std::setw(startWidth) << std::right << variableInfo.startOffset << "  ";
1563         os << std::setw(endWidth) << std::right << variableInfo.endOffset << "  ";
1564         os << std::setw(regWidth) << std::right << regStream.str() << " ";
1565         os << std::setw(nameWidth) << std::right << variableInfo.name << "   " << variableInfo.type;
1566         if (!variableInfo.typeSignature.empty() && variableInfo.typeSignature != variableInfo.type) {
1567             os << " (" << variableInfo.typeSignature << ")";
1568         }
1569         os << "\n";
1570     }
1571 }
1572 
SerializeLanguage(std::ostream & os) const1573 void Disassembler::SerializeLanguage(std::ostream &os) const
1574 {
1575     os << ".language " << panda::panda_file::LanguageToString(fileLanguage_) << "\n\n";
1576 }
1577 
SerializeFilename(std::ostream & os) const1578 void Disassembler::SerializeFilename(std::ostream &os) const
1579 {
1580     if (file_ == nullptr || file_->GetFilename().empty()) {
1581         return;
1582     }
1583 
1584     os << "# source binary: " << file_->GetFilename() << "\n\n";
1585 }
1586 
SerializeLitArrays(std::ostream & os,bool addSeparators) const1587 void Disassembler::SerializeLitArrays(std::ostream &os, bool addSeparators) const
1588 {
1589     LOG(DEBUG, DISASSEMBLER) << "[serializing literals]";
1590 
1591     if (prog_.literalarrayTable.empty()) {
1592         return;
1593     }
1594 
1595     if (addSeparators) {
1596         os << "# ====================\n"
1597               "# LITERALS\n\n";
1598     }
1599 
1600     for (const auto &pair : prog_.literalarrayTable) {
1601         Serialize(pair.first, pair.second, os);
1602     }
1603 
1604     os << "\n";
1605 }
1606 
SerializeRecords(std::ostream & os,bool addSeparators,bool printInformation) const1607 void Disassembler::SerializeRecords(std::ostream &os, bool addSeparators, bool printInformation) const
1608 {
1609     LOG(DEBUG, DISASSEMBLER) << "[serializing records]";
1610 
1611     if (prog_.recordTable.empty()) {
1612         return;
1613     }
1614 
1615     if (addSeparators) {
1616         os << "# ====================\n"
1617               "# RECORDS\n\n";
1618     }
1619 
1620     for (const auto &r : prog_.recordTable) {
1621         if (!panda_file::IsDummyClassName(r.first)) {
1622             Serialize(r.second, os, printInformation);
1623         } else {
1624             SerializeUnionFields(r.second, os, printInformation);
1625         }
1626     }
1627 }
1628 
SerializeMethods(std::ostream & os,bool addSeparators,bool printInformation) const1629 void Disassembler::SerializeMethods(std::ostream &os, bool addSeparators, bool printInformation) const
1630 {
1631     LOG(DEBUG, DISASSEMBLER) << "[serializing methods]";
1632 
1633     if (prog_.functionTable.empty()) {
1634         return;
1635     }
1636 
1637     if (addSeparators) {
1638         os << "# ====================\n"
1639               "# METHODS\n\n";
1640     }
1641 
1642     for (const auto &m : prog_.functionTable) {
1643         Serialize(m.second, os, printInformation);
1644     }
1645 }
1646 
BytecodeOpcodeToPandasmOpcode(uint8_t o) const1647 pandasm::Opcode Disassembler::BytecodeOpcodeToPandasmOpcode(uint8_t o) const
1648 {
1649     return BytecodeOpcodeToPandasmOpcode(BytecodeInstruction::Opcode(o));
1650 }
1651 
IDToString(BytecodeInstruction bcIns,panda_file::File::EntityId methodId) const1652 std::string Disassembler::IDToString(BytecodeInstruction bcIns, panda_file::File::EntityId methodId) const
1653 {
1654     std::stringstream name;
1655 
1656     if (bcIns.HasFlag(BytecodeInstruction::Flags::TYPE_ID)) {
1657         auto idx = bcIns.GetId().AsIndex();
1658         auto id = file_->ResolveClassIndex(methodId, idx);
1659         auto type = pandasm::Type::FromDescriptor(StringDataToString(file_->GetStringData(id)));
1660 
1661         name.str("");
1662         name << type.GetPandasmName();
1663     } else if (bcIns.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) {
1664         auto idx = bcIns.GetId().AsIndex();
1665         auto id = file_->ResolveMethodIndex(methodId, idx);
1666 
1667         name << GetMethodSignature(id);
1668     } else if (bcIns.HasFlag(BytecodeInstruction::Flags::STRING_ID)) {
1669         name << '\"';
1670 
1671         if (skipStrings_ || quiet_) {
1672             name << std::hex << "0x" << bcIns.GetId().AsFileId();
1673         } else {
1674             name << StringDataToString(file_->GetStringData(bcIns.GetId().AsFileId()));
1675         }
1676 
1677         name << '\"';
1678     } else if (bcIns.HasFlag(BytecodeInstruction::Flags::FIELD_ID)) {
1679         auto idx = bcIns.GetId().AsIndex();
1680         auto id = file_->ResolveFieldIndex(methodId, idx);
1681         panda_file::FieldDataAccessor fieldAccessor(*file_, id);
1682 
1683         auto recordName = GetFullRecordName(fieldAccessor.GetClassId());
1684         if (!panda_file::IsDummyClassName(recordName)) {
1685             name << recordName;
1686             name << '.';
1687         }
1688         name << StringDataToString(file_->GetStringData(fieldAccessor.GetNameId()));
1689     } else if (bcIns.HasFlag(BytecodeInstruction::Flags::LITERALARRAY_ID)) {
1690         auto index = bcIns.GetId().AsIndex();
1691         name << "array_" << index;
1692     }
1693 
1694     return name.str();
1695 }
1696 
GetRecordLanguage(panda_file::File::EntityId classId) const1697 panda::panda_file::SourceLang Disassembler::GetRecordLanguage(panda_file::File::EntityId classId) const
1698 {
1699     if (file_->IsExternal(classId)) {
1700         return panda::panda_file::SourceLang::PANDA_ASSEMBLY;
1701     }
1702 
1703     panda_file::ClassDataAccessor cda(*file_, classId);
1704     return cda.GetSourceLang().value_or(panda_file::SourceLang::PANDA_ASSEMBLY);
1705 }
1706 
TranslateImmToLabel(pandasm::Ins * paIns,LabelTable * labelTable,const uint8_t * insArr,BytecodeInstruction bcIns,BytecodeInstruction bcInsLast,panda_file::File::EntityId codeId)1707 static void TranslateImmToLabel(pandasm::Ins *paIns, LabelTable *labelTable, const uint8_t *insArr,
1708                                 BytecodeInstruction bcIns, BytecodeInstruction bcInsLast,
1709                                 panda_file::File::EntityId codeId)
1710 {
1711     const int32_t jmpOffset = std::get<int64_t>(paIns->imms.at(0));
1712     const auto bcInsDest = bcIns.JumpTo(jmpOffset);
1713     if (bcInsLast.GetAddress() > bcInsDest.GetAddress()) {
1714         size_t idx = GetBytecodeInstructionNumber(BytecodeInstruction(insArr), bcInsDest);
1715         if (idx != std::numeric_limits<size_t>::max()) {
1716             if (labelTable->find(idx) == labelTable->end()) {
1717                 std::stringstream ss {};
1718                 ss << "jump_label_" << labelTable->size();
1719                 (*labelTable)[idx] = ss.str();
1720             }
1721 
1722             paIns->imms.clear();
1723             paIns->ids.push_back(labelTable->at(idx));
1724         } else {
1725             LOG(ERROR, DISASSEMBLER) << "> error encountered at " << codeId << " (0x" << std::hex << codeId
1726                                      << "). incorrect instruction at offset: 0x" << (bcIns.GetAddress() - insArr)
1727                                      << ": invalid jump offset 0x" << jmpOffset
1728                                      << " - jumping in the middle of another instruction!";
1729         }
1730     } else {
1731         LOG(ERROR, DISASSEMBLER) << "> error encountered at " << codeId << " (0x" << std::hex << codeId
1732                                  << "). incorrect instruction at offset: 0x" << (bcIns.GetAddress() - insArr)
1733                                  << ": invalid jump offset 0x" << jmpOffset << " - jumping out of bounds!";
1734     }
1735 }
1736 
CollectExternalFields(const panda_file::FieldDataAccessor & fieldAccessor)1737 void Disassembler::CollectExternalFields(const panda_file::FieldDataAccessor &fieldAccessor)
1738 {
1739     auto recordName = GetFullRecordName(fieldAccessor.GetClassId());
1740 
1741     pandasm::Field field(fileLanguage_);
1742     GetField(field, fieldAccessor);
1743 
1744     auto &fieldList = externalFieldTable_[recordName];
1745     auto retField = std::find_if(fieldList.begin(), fieldList.end(),
1746                                  [&field](pandasm::Field &fieldFromList) { return field.name == fieldFromList.name; });
1747     if (retField == fieldList.end()) {
1748         fieldList.push_back(std::move(field));
1749     }
1750 }
1751 
GetInstructions(pandasm::Function * method,panda_file::File::EntityId methodId,panda_file::File::EntityId codeId)1752 IdList Disassembler::GetInstructions(pandasm::Function *method, panda_file::File::EntityId methodId,
1753                                      panda_file::File::EntityId codeId)
1754 {
1755     panda_file::CodeDataAccessor codeAccessor(*file_, codeId);
1756 
1757     const auto insSz = codeAccessor.GetCodeSize();
1758     const auto insArr = codeAccessor.GetInstructions();
1759 
1760     method->regsNum = codeAccessor.GetNumVregs();
1761 
1762     auto bcIns = BytecodeInstruction(insArr);
1763     auto from = bcIns.GetAddress();
1764     const auto bcInsLast = bcIns.JumpTo(insSz);
1765 
1766     LabelTable labelTable = GetExceptions(method, methodId, codeId);
1767 
1768     IdList unknownExternalMethods {};
1769 
1770     while (bcIns.GetAddress() != bcInsLast.GetAddress()) {
1771         if (bcIns.GetAddress() > bcInsLast.GetAddress()) {
1772             LOG(ERROR, DISASSEMBLER) << "> error encountered at " << codeId << " (0x" << std::hex << codeId
1773                                      << "). bytecode instructions sequence corrupted for method " << method->name
1774                                      << "! went out of bounds";
1775 
1776             break;
1777         }
1778 
1779         if (bcIns.HasFlag(BytecodeInstruction::Flags::FIELD_ID)) {
1780             auto idx = bcIns.GetId().AsIndex();
1781             auto id = file_->ResolveFieldIndex(methodId, idx);
1782             panda_file::FieldDataAccessor fieldAccessor(*file_, id);
1783 
1784             if (fieldAccessor.IsExternal()) {
1785                 CollectExternalFields(fieldAccessor);
1786             }
1787         }
1788 
1789         auto paIns = BytecodeInstructionToPandasmInstruction(bcIns, methodId);
1790         paIns.insDebug.boundLeft =
1791             bcIns.GetAddress() - from;  // It is used to produce a line table during method serialization
1792         if (paIns.IsJump()) {
1793             TranslateImmToLabel(&paIns, &labelTable, insArr, bcIns, bcInsLast, codeId);
1794         }
1795 
1796         // check if method id is unknown external method. if so, emplace it in table
1797         if (bcIns.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) {
1798             const auto argMethodIdx = bcIns.GetId().AsIndex();
1799             const auto argMethodId = file_->ResolveMethodIndex(methodId, argMethodIdx);
1800 
1801             const auto argMethodSignature = GetMethodSignature(argMethodId);
1802 
1803             const bool isPresent = prog_.functionTable.find(argMethodSignature) != prog_.functionTable.cend();
1804             const bool isExternal = file_->IsExternal(argMethodId);
1805             if (isExternal && !isPresent) {
1806                 unknownExternalMethods.push_back(argMethodId);
1807             }
1808         }
1809 
1810         method->ins.push_back(paIns);
1811         bcIns = bcIns.GetNext();
1812     }
1813 
1814     for (const auto &pair : labelTable) {
1815         method->ins[pair.first].label = pair.second;
1816         method->ins[pair.first].setLabel = true;
1817     }
1818 
1819     return unknownExternalMethods;
1820 }
1821 
1822 }  // namespace panda::disasm
1823