• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "disassembler.h"
17 #include "mangling.h"
18 #include "utils/logger.h"
19 
20 #include <iomanip>
21 
22 #include "get_language_specific_metadata.inc"
23 
24 namespace panda::disasm {
25 
Disassemble(const std::string & filename_in,const bool quiet,const bool skip_strings)26 void Disassembler::Disassemble(const std::string &filename_in, const bool quiet, const bool skip_strings)
27 {
28     auto file_new = panda_file::File::Open(filename_in);
29     file_.swap(file_new);
30 
31     if (file_ != nullptr) {
32         prog_ = pandasm::Program {};
33 
34         record_name_to_id_.clear();
35         method_name_to_id_.clear();
36 
37         skip_strings_ = skip_strings;
38         quiet_ = quiet;
39 
40         prog_info_ = ProgInfo {};
41 
42         prog_ann_ = ProgAnnotations {};
43 
44         GetRecords();
45         GetLiteralArrays();
46 
47         GetLanguageSpecificMetadata();
48     } else {
49         LOG(ERROR, DISASSEMBLER) << "> unable to open specified pandafile: <" << filename_in << ">";
50     }
51 }
52 
CollectInfo()53 void Disassembler::CollectInfo()
54 {
55     LOG(DEBUG, DISASSEMBLER) << "\n[getting program info]\n";
56 
57     debug_info_extractor_ = std::make_unique<panda_file::DebugInfoExtractor>(file_.get());
58 
59     for (const auto &pair : record_name_to_id_) {
60         GetRecordInfo(pair.second, &prog_info_.records_info[pair.first]);
61     }
62 
63     for (const auto &pair : method_name_to_id_) {
64         GetMethodInfo(pair.second, &prog_info_.methods_info[pair.first]);
65     }
66 }
67 
Serialize(std::ostream & os,bool add_separators,bool print_information) const68 void Disassembler::Serialize(std::ostream &os, bool add_separators, bool print_information) const
69 {
70     if (os.bad()) {
71         LOG(DEBUG, DISASSEMBLER) << "> serialization failed. os bad\n";
72 
73         return;
74     }
75 
76     if (file_ != nullptr) {
77         os << "# source binary: " << file_->GetFilename() << "\n\n";
78     }
79 
80     SerializeLanguage(os);
81 
82     if (add_separators) {
83         os << "# ====================\n"
84               "# LITERALS\n\n";
85     }
86 
87     LOG(DEBUG, DISASSEMBLER) << "[serializing literals]";
88 
89     for (const auto &[key, lit_arr] : prog_.literalarray_table) {
90         Serialize(key, lit_arr, os);
91     }
92 
93     os << "\n";
94 
95     if (add_separators) {
96         os << "# ====================\n"
97               "# RECORDS\n\n";
98     }
99 
100     LOG(DEBUG, DISASSEMBLER) << "[serializing records]";
101 
102     for (const auto &r : prog_.record_table) {
103         Serialize(r.second, os, print_information);
104     }
105 
106     if (add_separators) {
107         os << "# ====================\n"
108               "# METHODS\n\n";
109     }
110 
111     LOG(DEBUG, DISASSEMBLER) << "[serializing methods]";
112 
113     for (const auto &m : prog_.function_table) {
114         Serialize(m.second, os, print_information);
115     }
116 }
117 
IsSystemType(const std::string & type_name)118 inline bool Disassembler::IsSystemType(const std::string &type_name)
119 {
120     bool is_array_type = type_name.find('[') != std::string::npos;
121     bool is_global = type_name == "_GLOBAL";
122 
123     return is_array_type || is_global;
124 }
125 
GetRecord(pandasm::Record * record,const panda_file::File::EntityId & record_id)126 void Disassembler::GetRecord(pandasm::Record *record, const panda_file::File::EntityId &record_id)
127 {
128     LOG(DEBUG, DISASSEMBLER) << "\n[getting record]\nid: " << record_id << " (0x" << std::hex << record_id << ")";
129 
130     if (record == nullptr) {
131         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but record ptr expected!";
132 
133         return;
134     }
135 
136     record->name = GetFullRecordName(record_id);
137 
138     LOG(DEBUG, DISASSEMBLER) << "name: " << record->name;
139 
140     GetMetaData(record, record_id);
141 
142     if (!file_->IsExternal(record_id)) {
143         GetMethods(record_id);
144         GetFields(record, record_id);
145     }
146 }
147 
AddMethodToTables(const panda_file::File::EntityId & method_id)148 void Disassembler::AddMethodToTables(const panda_file::File::EntityId &method_id)
149 {
150     pandasm::Function new_method("", file_language_);
151     GetMethod(&new_method, method_id);
152 
153     const auto signature = pandasm::GetFunctionSignatureFromName(new_method.name, new_method.params);
154     if (prog_.function_table.find(signature) != prog_.function_table.end()) {
155         return;
156     }
157 
158     method_name_to_id_.emplace(signature, method_id);
159     prog_.function_synonyms[new_method.name].push_back(signature);
160     prog_.function_table.emplace(signature, std::move(new_method));
161 }
162 
GetMethod(pandasm::Function * method,const panda_file::File::EntityId & method_id)163 void Disassembler::GetMethod(pandasm::Function *method, const panda_file::File::EntityId &method_id)
164 {
165     LOG(DEBUG, DISASSEMBLER) << "\n[getting method]\nid: " << method_id << " (0x" << std::hex << method_id << ")";
166 
167     if (method == nullptr) {
168         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
169 
170         return;
171     }
172 
173     panda_file::MethodDataAccessor method_accessor(*file_, method_id);
174 
175     method->name = GetFullMethodName(method_id);
176 
177     LOG(DEBUG, DISASSEMBLER) << "name: " << method->name;
178 
179     GetParams(method, method_accessor.GetProtoId());
180     GetMetaData(method, method_id);
181 
182     if (method_accessor.GetCodeId().has_value()) {
183         const IdList id_list = GetInstructions(method, method_id, method_accessor.GetCodeId().value());
184 
185         for (const auto &id : id_list) {
186             AddMethodToTables(id);
187         }
188     } else {
189         LOG(ERROR, DISASSEMBLER) << "> error encountered at " << method_id << " (0x" << std::hex << method_id
190                                  << "). implementation of method expected, but no \'CODE\' tag was found!";
191 
192         return;
193     }
194 }
195 
196 template <typename T>
FillLiteralArrayData(pandasm::LiteralArray * lit_array,const panda_file::LiteralTag & tag,const panda_file::LiteralDataAccessor::LiteralValue & value) const197 void Disassembler::FillLiteralArrayData(pandasm::LiteralArray *lit_array, const panda_file::LiteralTag &tag,
198                                         const panda_file::LiteralDataAccessor::LiteralValue &value) const
199 {
200     panda_file::File::EntityId id(std::get<uint32_t>(value));
201     auto sp = file_->GetSpanFromId(id);
202     auto len = panda_file::helpers::Read<sizeof(uint32_t)>(&sp);
203     if (tag != panda_file::LiteralTag::ARRAY_STRING) {
204         for (size_t i = 0; i < len; i++) {
205             pandasm::LiteralArray::Literal lit;
206             lit.tag_ = tag;
207             lit.value_ = bit_cast<T>(panda_file::helpers::Read<sizeof(T)>(&sp));
208             lit_array->literals_.push_back(lit);
209         }
210         return;
211     }
212     for (size_t i = 0; i < len; i++) {
213         auto str_id = panda_file::helpers::Read<sizeof(T)>(&sp);
214         pandasm::LiteralArray::Literal lit;
215         lit.tag_ = tag;
216         lit.value_ = StringDataToString(file_->GetStringData(panda_file::File::EntityId(str_id)));
217         lit_array->literals_.push_back(lit);
218     }
219 }
220 
FillLiteralData(pandasm::LiteralArray * lit_array,const panda_file::LiteralDataAccessor::LiteralValue & value,const panda_file::LiteralTag & tag) const221 void Disassembler::FillLiteralData(pandasm::LiteralArray *lit_array,
222                                    const panda_file::LiteralDataAccessor::LiteralValue &value,
223                                    const panda_file::LiteralTag &tag) const
224 {
225     pandasm::LiteralArray::Literal lit;
226     lit.tag_ = tag;
227     switch (tag) {
228         case panda_file::LiteralTag::BOOL: {
229             lit.value_ = std::get<bool>(value);
230             break;
231         }
232         case panda_file::LiteralTag::ACCESSOR:
233         case panda_file::LiteralTag::NULLVALUE:
234         case panda_file::LiteralTag::BUILTINTYPEINDEX: {
235             lit.value_ = std::get<uint8_t>(value);
236             break;
237         }
238         case panda_file::LiteralTag::METHODAFFILIATE: {
239             lit.value_ = std::get<uint16_t>(value);
240             break;
241         }
242         case panda_file::LiteralTag::LITERALBUFFERINDEX:
243         case panda_file::LiteralTag::INTEGER: {
244             lit.value_ = std::get<uint32_t>(value);
245             break;
246         }
247         case panda_file::LiteralTag::DOUBLE: {
248             lit.value_ = std::get<double>(value);
249             break;
250         }
251         case panda_file::LiteralTag::STRING: {
252             auto str_data = file_->GetStringData(panda_file::File::EntityId(std::get<uint32_t>(value)));
253             lit.value_ = StringDataToString(str_data);
254             break;
255         }
256         case panda_file::LiteralTag::METHOD:
257         case panda_file::LiteralTag::GENERATORMETHOD: {
258             panda_file::MethodDataAccessor mda(*file_, panda_file::File::EntityId(std::get<uint32_t>(value)));
259             lit.value_ = StringDataToString(file_->GetStringData(mda.GetNameId()));
260             break;
261         }
262         case panda_file::LiteralTag::LITERALARRAY: {
263             std::stringstream ss;
264             ss << "0x" << std::hex << std::get<uint32_t>(value);
265             lit.value_ = ss.str();
266             break;
267         }
268         case panda_file::LiteralTag::TAGVALUE: {
269             return;
270         }
271         default: {
272             UNREACHABLE();
273         }
274     }
275     lit_array->literals_.push_back(lit);
276 }
277 
GetLiteralArrayByOffset(pandasm::LiteralArray * lit_array,panda_file::File::EntityId offset) const278 void Disassembler::GetLiteralArrayByOffset(pandasm::LiteralArray *lit_array, panda_file::File::EntityId offset) const
279 {
280     panda_file::LiteralDataAccessor lit_array_accessor(*file_, file_->GetLiteralArraysId());
281     lit_array_accessor.EnumerateLiteralVals(
282         offset, [this, lit_array](const panda_file::LiteralDataAccessor::LiteralValue &value,
283                                   const panda_file::LiteralTag &tag) {
284             switch (tag) {
285                 case panda_file::LiteralTag::ARRAY_U1: {
286                     FillLiteralArrayData<bool>(lit_array, tag, value);
287                     break;
288                 }
289                 case panda_file::LiteralTag::ARRAY_I8:
290                 case panda_file::LiteralTag::ARRAY_U8: {
291                     FillLiteralArrayData<uint8_t>(lit_array, tag, value);
292                     break;
293                 }
294                 case panda_file::LiteralTag::ARRAY_I16:
295                 case panda_file::LiteralTag::ARRAY_U16: {
296                     FillLiteralArrayData<uint16_t>(lit_array, tag, value);
297                     break;
298                 }
299                 case panda_file::LiteralTag::ARRAY_I32:
300                 case panda_file::LiteralTag::ARRAY_U32: {
301                     FillLiteralArrayData<uint32_t>(lit_array, tag, value);
302                     break;
303                 }
304                 case panda_file::LiteralTag::ARRAY_I64:
305                 case panda_file::LiteralTag::ARRAY_U64: {
306                     FillLiteralArrayData<uint64_t>(lit_array, tag, value);
307                     break;
308                 }
309                 case panda_file::LiteralTag::ARRAY_F32: {
310                     FillLiteralArrayData<float>(lit_array, tag, value);
311                     break;
312                 }
313                 case panda_file::LiteralTag::ARRAY_F64: {
314                     FillLiteralArrayData<double>(lit_array, tag, value);
315                     break;
316                 }
317                 case panda_file::LiteralTag::ARRAY_STRING: {
318                     FillLiteralArrayData<uint32_t>(lit_array, tag, value);
319                     break;
320                 }
321                 default: {
322                     FillLiteralData(lit_array, value, tag);
323                     break;
324                 }
325             }
326         });
327 }
328 
GetLiteralArray(pandasm::LiteralArray * lit_array,size_t index) const329 void Disassembler::GetLiteralArray(pandasm::LiteralArray *lit_array, size_t index) const
330 {
331     panda_file::LiteralDataAccessor lit_array_accessor(*file_, file_->GetLiteralArraysId());
332     GetLiteralArrayByOffset(lit_array, lit_array_accessor.GetLiteralArrayId(index));
333 }
334 
IsModuleLiteralOffset(const panda_file::File::EntityId & id) const335 bool Disassembler::IsModuleLiteralOffset(const panda_file::File::EntityId &id) const
336 {
337     return module_literals_.find(id.GetOffset()) != module_literals_.end();
338 }
339 
GetLiteralArrays()340 void Disassembler::GetLiteralArrays()
341 {
342     const auto lit_arrays_id = file_->GetLiteralArraysId();
343 
344     LOG(DEBUG, DISASSEMBLER) << "\n[getting literal arrays]\nid: " << lit_arrays_id << " (0x" << std::hex
345                              << lit_arrays_id << ")";
346 
347     panda_file::LiteralDataAccessor lda(*file_, lit_arrays_id);
348     size_t num_litarrays = lda.GetLiteralNum();
349     for (size_t index = 0; index < num_litarrays; index++) {
350         auto id = lda.GetLiteralArrayId(index);
351         if (IsModuleLiteralOffset(id)) {
352             continue;  // exclude module literals as they do not obey encoding rules of normal literals
353         }
354         std::stringstream ss;
355         ss << index << " 0x" << std::hex << id.GetOffset();
356         panda::pandasm::LiteralArray lit_arr;
357         GetLiteralArray(&lit_arr, index);
358         prog_.literalarray_table.emplace(ss.str(), lit_arr);
359     }
360 }
361 
GetRecords()362 void Disassembler::GetRecords()
363 {
364     LOG(DEBUG, DISASSEMBLER) << "\n[getting records]\n";
365 
366     const auto class_idx = file_->GetClasses();
367 
368     for (size_t i = 0; i < class_idx.size(); i++) {
369         uint32_t class_id = class_idx[i];
370         auto class_off = file_->GetHeader()->class_idx_off + sizeof(uint32_t) * i;
371 
372         if (class_id > file_->GetHeader()->file_size) {
373             LOG(ERROR, DISASSEMBLER) << "> error encountered in record at " << class_off << " (0x" << std::hex
374                                      << class_off << "). binary file corrupted. record offset (0x" << class_id
375                                      << ") out of bounds (0x" << file_->GetHeader()->file_size << ")!";
376             break;
377         }
378 
379         const panda_file::File::EntityId record_id {class_id};
380         auto language = GetRecordLanguage(record_id);
381 
382         if (language != file_language_) {
383             if (file_language_ == panda_file::SourceLang::PANDA_ASSEMBLY) {
384                 file_language_ = language;
385             } else if (language != panda_file::SourceLang::PANDA_ASSEMBLY) {
386                 LOG(ERROR, DISASSEMBLER) << "> possible error encountered in record at" << class_off << " (0x"
387                                          << std::hex << class_off << "). record's language  ("
388                                          << panda_file::LanguageToString(language)
389                                          << ")  differs from file's language ("
390                                          << panda_file::LanguageToString(file_language_) << ")!";
391             }
392         }
393 
394         pandasm::Record record("", file_language_);
395         GetRecord(&record, record_id);
396 
397         if (prog_.record_table.find(record.name) == prog_.record_table.end()) {
398             record_name_to_id_.emplace(record.name, record_id);
399             prog_.record_table.emplace(record.name, std::move(record));
400         }
401     }
402 }
403 
GetFields(pandasm::Record * record,const panda_file::File::EntityId & record_id)404 void Disassembler::GetFields(pandasm::Record *record, const panda_file::File::EntityId &record_id)
405 {
406     panda_file::ClassDataAccessor class_accessor {*file_, record_id};
407 
408     class_accessor.EnumerateFields([&](panda_file::FieldDataAccessor &field_accessor) -> void {
409         pandasm::Field field(file_language_);
410 
411         panda_file::File::EntityId field_name_id = field_accessor.GetNameId();
412         field.name = StringDataToString(file_->GetStringData(field_name_id));
413 
414         uint32_t field_type = field_accessor.GetType();
415         field.type = FieldTypeToPandasmType(field_type);
416 
417         GetMetaData(&field, field_accessor.GetFieldId());
418 
419         record->field_list.push_back(std::move(field));
420     });
421 }
422 
GetMethods(const panda_file::File::EntityId & record_id)423 void Disassembler::GetMethods(const panda_file::File::EntityId &record_id)
424 {
425     panda_file::ClassDataAccessor class_accessor {*file_, record_id};
426 
427     class_accessor.EnumerateMethods([&](panda_file::MethodDataAccessor &method_accessor) -> void {
428         AddMethodToTables(method_accessor.GetMethodId());
429     });
430 }
431 
GetParams(pandasm::Function * method,const panda_file::File::EntityId & proto_id) const432 void Disassembler::GetParams(pandasm::Function *method, const panda_file::File::EntityId &proto_id) const
433 {
434     /**
435      * frame size - 2^16 - 1
436      */
437     static const uint32_t MAX_ARG_NUM = 0xFFFF;
438 
439     LOG(DEBUG, DISASSEMBLER) << "[getting params]\nproto id: " << proto_id << " (0x" << std::hex << proto_id << ")";
440 
441     if (method == nullptr) {
442         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
443 
444         return;
445     }
446 
447     panda_file::ProtoDataAccessor proto_accessor(*file_, proto_id);
448 
449     auto params_num = proto_accessor.GetNumArgs();
450 
451     if (params_num > MAX_ARG_NUM) {
452         LOG(ERROR, DISASSEMBLER) << "> error encountered at " << proto_id << " (0x" << std::hex << proto_id
453                                  << "). number of function's arguments (" << std::dec << params_num
454                                  << ") exceeds MAX_ARG_NUM (" << MAX_ARG_NUM << ") !";
455 
456         return;
457     }
458 
459     size_t ref_idx = 0;
460     method->return_type = PFTypeToPandasmType(proto_accessor.GetReturnType(), proto_accessor, ref_idx);
461 
462     for (uint8_t i = 0; i < params_num; i++) {
463         auto arg_type = PFTypeToPandasmType(proto_accessor.GetArgType(i), proto_accessor, ref_idx);
464         method->params.push_back(pandasm::Function::Parameter(arg_type, file_language_));
465     }
466 }
467 
GetExceptions(pandasm::Function * method,panda_file::File::EntityId method_id,panda_file::File::EntityId code_id) const468 LabelTable Disassembler::GetExceptions(pandasm::Function *method, panda_file::File::EntityId method_id,
469                                        panda_file::File::EntityId code_id) const
470 {
471     LOG(DEBUG, DISASSEMBLER) << "[getting exceptions]\ncode id: " << code_id << " (0x" << std::hex << code_id << ")";
472 
473     if (method == nullptr) {
474         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!\n";
475         return LabelTable {};
476     }
477 
478     panda_file::CodeDataAccessor code_accessor(*file_, code_id);
479 
480     const auto bc_ins = BytecodeInstruction(code_accessor.GetInstructions());
481     const auto bc_ins_last = bc_ins.JumpTo(code_accessor.GetCodeSize());
482 
483     size_t try_idx = 0;
484     LabelTable label_table {};
485     code_accessor.EnumerateTryBlocks([&](panda_file::CodeDataAccessor::TryBlock &try_block) {
486         pandasm::Function::CatchBlock catch_block_pa {};
487         if (!LocateTryBlock(bc_ins, bc_ins_last, try_block, &catch_block_pa, &label_table, try_idx)) {
488             return false;
489         }
490         size_t catch_idx = 0;
491         try_block.EnumerateCatchBlocks([&](panda_file::CodeDataAccessor::CatchBlock &catch_block) {
492             auto class_idx = catch_block.GetTypeIdx();
493 
494             if (class_idx == panda_file::INVALID_INDEX) {
495                 catch_block_pa.exception_record = "";
496             } else {
497                 const auto class_id = file_->ResolveClassIndex(method_id, class_idx);
498                 catch_block_pa.exception_record = GetFullRecordName(class_id);
499             }
500             if (!LocateCatchBlock(bc_ins, bc_ins_last, catch_block, &catch_block_pa, &label_table, try_idx,
501                                   catch_idx)) {
502                 return false;
503             }
504 
505             method->catch_blocks.push_back(catch_block_pa);
506             catch_block_pa.catch_begin_label = "";
507             catch_block_pa.catch_end_label = "";
508             catch_idx++;
509 
510             return true;
511         });
512         try_idx++;
513 
514         return true;
515     });
516 
517     return label_table;
518 }
519 
getBytecodeInstructionNumber(BytecodeInstruction bc_ins_first,BytecodeInstruction bc_ins_cur)520 static size_t getBytecodeInstructionNumber(BytecodeInstruction bc_ins_first, BytecodeInstruction bc_ins_cur)
521 {
522     size_t count = 0;
523 
524     while (bc_ins_first.GetAddress() != bc_ins_cur.GetAddress()) {
525         count++;
526         bc_ins_first = bc_ins_first.GetNext();
527         if (bc_ins_first.GetAddress() > bc_ins_cur.GetAddress()) {
528             return std::numeric_limits<size_t>::max();
529         }
530     }
531 
532     return count;
533 }
534 
LocateTryBlock(const BytecodeInstruction & bc_ins,const BytecodeInstruction & bc_ins_last,const panda_file::CodeDataAccessor::TryBlock & try_block,pandasm::Function::CatchBlock * catch_block_pa,LabelTable * label_table,size_t try_idx) const535 bool Disassembler::LocateTryBlock(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last,
536                                   const panda_file::CodeDataAccessor::TryBlock &try_block,
537                                   pandasm::Function::CatchBlock *catch_block_pa, LabelTable *label_table,
538                                   size_t try_idx) const
539 {
540     const auto try_begin_bc_ins = bc_ins.JumpTo(try_block.GetStartPc());
541     const auto try_end_bc_ins = bc_ins.JumpTo(try_block.GetStartPc() + try_block.GetLength());
542 
543     const size_t try_begin_idx = getBytecodeInstructionNumber(bc_ins, try_begin_bc_ins);
544     const size_t try_end_idx = getBytecodeInstructionNumber(bc_ins, try_end_bc_ins);
545 
546     const bool try_begin_offset_in_range = bc_ins_last.GetAddress() > try_begin_bc_ins.GetAddress();
547     const bool try_end_offset_in_range = bc_ins_last.GetAddress() >= try_end_bc_ins.GetAddress();
548     const bool try_begin_offset_valid = try_begin_idx != std::numeric_limits<size_t>::max();
549     const bool try_end_offset_valid = try_end_idx != std::numeric_limits<size_t>::max();
550 
551     if (!try_begin_offset_in_range || !try_begin_offset_valid) {
552         LOG(ERROR, DISASSEMBLER) << "> invalid try block begin offset! address is: 0x" << std::hex
553                                  << try_begin_bc_ins.GetAddress();
554         return false;
555     } else {
556         std::stringstream ss {};
557         ss << "try_begin_label_" << try_idx;
558 
559         LabelTable::iterator it = label_table->find(try_begin_idx);
560         if (it == label_table->end()) {
561             catch_block_pa->try_begin_label = ss.str();
562             label_table->insert(std::pair<size_t, std::string>(try_begin_idx, ss.str()));
563         } else {
564             catch_block_pa->try_begin_label = it->second;
565         }
566     }
567 
568     if (!try_end_offset_in_range || !try_end_offset_valid) {
569         LOG(ERROR, DISASSEMBLER) << "> invalid try block end offset! address is: 0x" << std::hex
570                                  << try_end_bc_ins.GetAddress();
571         return false;
572     } else {
573         std::stringstream ss {};
574         ss << "try_end_label_" << try_idx;
575 
576         LabelTable::iterator it = label_table->find(try_end_idx);
577         if (it == label_table->end()) {
578             catch_block_pa->try_end_label = ss.str();
579             label_table->insert(std::pair<size_t, std::string>(try_end_idx, ss.str()));
580         } else {
581             catch_block_pa->try_end_label = it->second;
582         }
583     }
584 
585     return true;
586 }
587 
LocateCatchBlock(const BytecodeInstruction & bc_ins,const BytecodeInstruction & bc_ins_last,const panda_file::CodeDataAccessor::CatchBlock & catch_block,pandasm::Function::CatchBlock * catch_block_pa,LabelTable * label_table,size_t try_idx,size_t catch_idx) const588 bool Disassembler::LocateCatchBlock(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last,
589                                     const panda_file::CodeDataAccessor::CatchBlock &catch_block,
590                                     pandasm::Function::CatchBlock *catch_block_pa, LabelTable *label_table,
591                                     size_t try_idx, size_t catch_idx) const
592 {
593     const auto handler_begin_offset = catch_block.GetHandlerPc();
594     const auto handler_end_offset = handler_begin_offset + catch_block.GetCodeSize();
595 
596     const auto handler_begin_bc_ins = bc_ins.JumpTo(handler_begin_offset);
597     const auto handler_end_bc_ins = bc_ins.JumpTo(handler_end_offset);
598 
599     const size_t handler_begin_idx = getBytecodeInstructionNumber(bc_ins, handler_begin_bc_ins);
600     const size_t handler_end_idx = getBytecodeInstructionNumber(bc_ins, handler_end_bc_ins);
601 
602     const bool handler_begin_offset_in_range = bc_ins_last.GetAddress() > handler_begin_bc_ins.GetAddress();
603     const bool handler_end_offset_in_range = bc_ins_last.GetAddress() > handler_end_bc_ins.GetAddress();
604     const bool handler_end_present = catch_block.GetCodeSize() != 0;
605     const bool handler_begin_offset_valid = handler_begin_idx != std::numeric_limits<size_t>::max();
606     const bool handler_end_offset_valid = handler_end_idx != std::numeric_limits<size_t>::max();
607 
608     if (!handler_begin_offset_in_range || !handler_begin_offset_valid) {
609         LOG(ERROR, DISASSEMBLER) << "> invalid catch block begin offset! address is: 0x" << std::hex
610                                  << handler_begin_bc_ins.GetAddress();
611         return false;
612     } else {
613         std::stringstream ss {};
614         ss << "handler_begin_label_" << try_idx << "_" << catch_idx;
615 
616         LabelTable::iterator it = label_table->find(handler_begin_idx);
617         if (it == label_table->end()) {
618             catch_block_pa->catch_begin_label = ss.str();
619             label_table->insert(std::pair<size_t, std::string>(handler_begin_idx, ss.str()));
620         } else {
621             catch_block_pa->catch_begin_label = it->second;
622         }
623     }
624 
625     if (!handler_end_offset_in_range || !handler_end_offset_valid) {
626         LOG(ERROR, DISASSEMBLER) << "> invalid catch block end offset! address is: 0x" << std::hex
627                                  << handler_end_bc_ins.GetAddress();
628         return false;
629     } else if (handler_end_present) {
630         std::stringstream ss {};
631         ss << "handler_end_label_" << try_idx << "_" << catch_idx;
632 
633         LabelTable::iterator it = label_table->find(handler_end_idx);
634         if (it == label_table->end()) {
635             catch_block_pa->catch_end_label = ss.str();
636             label_table->insert(std::pair<size_t, std::string>(handler_end_idx, ss.str()));
637         } else {
638             catch_block_pa->catch_end_label = it->second;
639         }
640     }
641 
642     return true;
643 }
644 
GetMetaData(pandasm::Function * method,const panda_file::File::EntityId & method_id) const645 void Disassembler::GetMetaData(pandasm::Function *method, const panda_file::File::EntityId &method_id) const
646 {
647     LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nmethod id: " << method_id << " (0x" << std::hex << method_id
648                              << ")";
649 
650     if (method == nullptr) {
651         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
652 
653         return;
654     }
655 
656     panda_file::MethodDataAccessor method_accessor(*file_, method_id);
657 
658     const auto method_name_raw = StringDataToString(file_->GetStringData(method_accessor.GetNameId()));
659 
660     if (!method_accessor.IsStatic()) {
661         const auto class_name = StringDataToString(file_->GetStringData(method_accessor.GetClassId()));
662         auto this_type = pandasm::Type::FromDescriptor(class_name);
663 
664         LOG(DEBUG, DISASSEMBLER) << "method (raw: \'" << method_name_raw
665                                  << "\') is not static. emplacing self-argument of type " << this_type.GetName();
666 
667         method->params.insert(method->params.begin(), pandasm::Function::Parameter(this_type, file_language_));
668     } else {
669         method->metadata->SetAttribute("static");
670     }
671 
672     if (file_->IsExternal(method_accessor.GetMethodId())) {
673         method->metadata->SetAttribute("external");
674     }
675 
676     std::string ctor_name = panda::panda_file::GetCtorName(file_language_);
677     std::string cctor_name = panda::panda_file::GetCctorName(file_language_);
678 
679     const bool is_ctor = (method_name_raw == ctor_name);
680     const bool is_cctor = (method_name_raw == cctor_name);
681 
682     if (is_ctor) {
683         method->metadata->SetAttribute("ctor");
684         method->name.replace(method->name.find(ctor_name), ctor_name.length(), "_ctor_");
685     } else if (is_cctor) {
686         method->metadata->SetAttribute("cctor");
687         method->name.replace(method->name.find(cctor_name), cctor_name.length(), "_cctor_");
688     }
689 }
690 
GetMetaData(pandasm::Record * record,const panda_file::File::EntityId & record_id) const691 void Disassembler::GetMetaData(pandasm::Record *record, const panda_file::File::EntityId &record_id) const
692 {
693     LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nrecord id: " << record_id << " (0x" << std::hex << record_id
694                              << ")";
695 
696     if (record == nullptr) {
697         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but record ptr expected!";
698 
699         return;
700     }
701 
702     if (file_->IsExternal(record_id)) {
703         record->metadata->SetAttribute("external");
704     }
705 }
706 
GetMetaData(pandasm::Field * field,const panda_file::File::EntityId & field_id)707 void Disassembler::GetMetaData(pandasm::Field *field, const panda_file::File::EntityId &field_id)
708 {
709     LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nfield id: " << field_id << " (0x" << std::hex << field_id << ")";
710 
711     if (field == nullptr) {
712         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
713 
714         return;
715     }
716 
717     panda_file::FieldDataAccessor field_accessor(*file_, field_id);
718 
719     if (field_accessor.IsExternal()) {
720         field->metadata->SetAttribute("external");
721     }
722 
723     if (field_accessor.IsStatic()) {
724         field->metadata->SetAttribute("static");
725     }
726 
727     if (field->type.GetId() == panda_file::Type::TypeId::U32) {
728         const auto offset = field_accessor.GetValue<uint32_t>().value();
729         static const std::string TYPE_SUMMARY_FIELD_NAME = "typeSummaryOffset";
730         if (field->name != TYPE_SUMMARY_FIELD_NAME) {
731             LOG(DEBUG, DISASSEMBLER) << "Module literalarray " << field->name << " at offset 0x" << std::hex << offset
732                                      << " is excluded";
733             module_literals_.insert(offset);
734         }
735         field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::U32>(offset));
736     }
737     if (field->type.GetId() == panda_file::Type::TypeId::U8) {
738         const auto val = field_accessor.GetValue<uint8_t>().value();
739         field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::U8>(val));
740     }
741 }
742 
AnnotationTagToString(const char tag) const743 std::string Disassembler::AnnotationTagToString(const char tag) const
744 {
745     switch (tag) {
746         case '1':
747             return "u1";
748         case '2':
749             return "i8";
750         case '3':
751             return "u8";
752         case '4':
753             return "i16";
754         case '5':
755             return "u16";
756         case '6':
757             return "i32";
758         case '7':
759             return "u32";
760         case '8':
761             return "i64";
762         case '9':
763             return "u64";
764         case 'A':
765             return "f32";
766         case 'B':
767             return "f64";
768         case 'C':
769             return "string";
770         case 'D':
771             return "record";
772         case 'E':
773             return "method";
774         case 'F':
775             return "enum";
776         case 'G':
777             return "annotation";
778         case 'I':
779             return "void";
780         case 'J':
781             return "method_handle";
782         case 'K':
783             return "u1[]";
784         case 'L':
785             return "i8[]";
786         case 'M':
787             return "u8[]";
788         case 'N':
789             return "i16[]";
790         case 'O':
791             return "u16[]";
792         case 'P':
793             return "i32[]";
794         case 'Q':
795             return "u32[]";
796         case 'R':
797             return "i64[]";
798         case 'S':
799             return "u64[]";
800         case 'T':
801             return "f32[]";
802         case 'U':
803             return "f64[]";
804         case 'V':
805             return "string[]";
806         case 'W':
807             return "record[]";
808         case 'X':
809             return "method[]";
810         case 'Y':
811             return "enum[]";
812         case 'Z':
813             return "annotation[]";
814         case '@':
815             return "method_handle[]";
816         case '*':
817             return "nullptr string";
818         default:
819             return std::string();
820     }
821 }
822 
ScalarValueToString(const panda_file::ScalarValue & value,const std::string & type)823 std::string Disassembler::ScalarValueToString(const panda_file::ScalarValue &value, const std::string &type)
824 {
825     std::stringstream ss;
826 
827     if (type == "i8") {
828         int8_t res = value.Get<int8_t>();
829         ss << static_cast<int>(res);
830     } else if (type == "u1" || type == "u8") {
831         uint8_t res = value.Get<uint8_t>();
832         ss << static_cast<unsigned int>(res);
833     } else if (type == "i16") {
834         ss << value.Get<int16_t>();
835     } else if (type == "u16") {
836         ss << value.Get<uint16_t>();
837     } else if (type == "i32") {
838         ss << value.Get<int32_t>();
839     } else if (type == "u32") {
840         ss << value.Get<uint32_t>();
841     } else if (type == "i64") {
842         ss << value.Get<int64_t>();
843     } else if (type == "u64") {
844         ss << value.Get<uint64_t>();
845     } else if (type == "f32") {
846         ss << value.Get<float>();
847     } else if (type == "f64") {
848         ss << value.Get<double>();
849     } else if (type == "string") {
850         const auto id = value.Get<panda_file::File::EntityId>();
851         ss << "\"" << StringDataToString(file_->GetStringData(id)) << "\"";
852     } else if (type == "record") {
853         const auto id = value.Get<panda_file::File::EntityId>();
854         ss << GetFullRecordName(id);
855     } else if (type == "method") {
856         const auto id = value.Get<panda_file::File::EntityId>();
857         AddMethodToTables(id);
858         ss << GetMethodSignature(id);
859     } else if (type == "enum") {
860         const auto id = value.Get<panda_file::File::EntityId>();
861         panda_file::FieldDataAccessor field_accessor(*file_, id);
862         ss << GetFullRecordName(field_accessor.GetClassId()) << "."
863            << StringDataToString(file_->GetStringData(field_accessor.GetNameId()));
864     } else if (type == "annotation") {
865         const auto id = value.Get<panda_file::File::EntityId>();
866         ss << "id_" << id;
867     } else if (type == "void") {
868         return std::string();
869     } else if (type == "method_handle") {
870     }
871 
872     return ss.str();
873 }
874 
ArrayValueToString(const panda_file::ArrayValue & value,const std::string & type,const size_t idx)875 std::string Disassembler::ArrayValueToString(const panda_file::ArrayValue &value, const std::string &type,
876                                              const size_t idx)
877 {
878     std::stringstream ss;
879 
880     if (type == "i8") {
881         int8_t res = value.Get<int8_t>(idx);
882         ss << static_cast<int>(res);
883     } else if (type == "u1" || type == "u8") {
884         uint8_t res = value.Get<uint8_t>(idx);
885         ss << static_cast<unsigned int>(res);
886     } else if (type == "i16") {
887         ss << value.Get<int16_t>(idx);
888     } else if (type == "u16") {
889         ss << value.Get<uint16_t>(idx);
890     } else if (type == "i32") {
891         ss << value.Get<int32_t>(idx);
892     } else if (type == "u32") {
893         ss << value.Get<uint32_t>(idx);
894     } else if (type == "i64") {
895         ss << value.Get<int64_t>(idx);
896     } else if (type == "u64") {
897         ss << value.Get<uint64_t>(idx);
898     } else if (type == "f32") {
899         ss << value.Get<float>(idx);
900     } else if (type == "f64") {
901         ss << value.Get<double>(idx);
902     } else if (type == "string") {
903         const auto id = value.Get<panda_file::File::EntityId>(idx);
904         ss << '\"' << StringDataToString(file_->GetStringData(id)) << '\"';
905     } else if (type == "record") {
906         const auto id = value.Get<panda_file::File::EntityId>(idx);
907         ss << GetFullRecordName(id);
908     } else if (type == "method") {
909         const auto id = value.Get<panda_file::File::EntityId>(idx);
910         AddMethodToTables(id);
911         ss << GetMethodSignature(id);
912     } else if (type == "enum") {
913         const auto id = value.Get<panda_file::File::EntityId>(idx);
914         panda_file::FieldDataAccessor field_accessor(*file_, id);
915         ss << GetFullRecordName(field_accessor.GetClassId()) << "."
916            << StringDataToString(file_->GetStringData(field_accessor.GetNameId()));
917     } else if (type == "annotation") {
918         const auto id = value.Get<panda_file::File::EntityId>(idx);
919         ss << "id_" << id;
920     } else if (type == "method_handle") {
921     } else if (type == "nullptr string") {
922     }
923 
924     return ss.str();
925 }
926 
GetFullMethodName(const panda_file::File::EntityId & method_id) const927 std::string Disassembler::GetFullMethodName(const panda_file::File::EntityId &method_id) const
928 {
929     panda::panda_file::MethodDataAccessor method_accessor(*file_, method_id);
930 
931     const auto method_name_raw = StringDataToString(file_->GetStringData(method_accessor.GetNameId()));
932 
933     std::string class_name = GetFullRecordName(method_accessor.GetClassId());
934     if (IsSystemType(class_name)) {
935         class_name = "";
936     } else {
937         class_name += ".";
938     }
939 
940     return class_name + method_name_raw;
941 }
942 
GetMethodSignature(const panda_file::File::EntityId & method_id) const943 std::string Disassembler::GetMethodSignature(const panda_file::File::EntityId &method_id) const
944 {
945     panda::panda_file::MethodDataAccessor method_accessor(*file_, method_id);
946 
947     pandasm::Function method(GetFullMethodName(method_id), file_language_);
948     GetParams(&method, method_accessor.GetProtoId());
949     GetMetaData(&method, method_id);
950 
951     return pandasm::GetFunctionSignatureFromName(method.name, method.params);
952 }
953 
GetFullRecordName(const panda_file::File::EntityId & class_id) const954 std::string Disassembler::GetFullRecordName(const panda_file::File::EntityId &class_id) const
955 {
956     std::string name = StringDataToString(file_->GetStringData(class_id));
957 
958     auto type = pandasm::Type::FromDescriptor(name);
959     type = pandasm::Type(type.GetComponentName(), type.GetRank());
960 
961     return type.GetPandasmName();
962 }
963 
GetRecordInfo(const panda_file::File::EntityId & record_id,RecordInfo * record_info) const964 void Disassembler::GetRecordInfo(const panda_file::File::EntityId &record_id, RecordInfo *record_info) const
965 {
966     constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
967 
968     if (file_->IsExternal(record_id)) {
969         return;
970     }
971 
972     panda_file::ClassDataAccessor class_accessor {*file_, record_id};
973     std::stringstream ss;
974 
975     ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
976        << class_accessor.GetClassId() << ", size: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH)
977        << class_accessor.GetSize() << " (" << std::dec << class_accessor.GetSize() << ")";
978 
979     record_info->record_info = ss.str();
980     ss.str(std::string());
981 
982     class_accessor.EnumerateFields([&](panda_file::FieldDataAccessor &field_accessor) -> void {
983         ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
984            << field_accessor.GetFieldId();
985 
986         record_info->fields_info.push_back(ss.str());
987 
988         ss.str(std::string());
989     });
990 }
991 
GetMethodInfo(const panda_file::File::EntityId & method_id,MethodInfo * method_info) const992 void Disassembler::GetMethodInfo(const panda_file::File::EntityId &method_id, MethodInfo *method_info) const
993 {
994     constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
995 
996     panda_file::MethodDataAccessor method_accessor {*file_, method_id};
997     std::stringstream ss;
998 
999     ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1000        << method_accessor.GetMethodId();
1001 
1002     if (method_accessor.GetCodeId().has_value()) {
1003         ss << ", code offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1004            << method_accessor.GetCodeId().value();
1005 
1006         GetInsInfo(method_accessor.GetCodeId().value(), method_info);
1007     } else {
1008         ss << ", <no code>";
1009     }
1010 
1011     method_info->method_info = ss.str();
1012 
1013     if (method_accessor.GetCodeId()) {
1014         ASSERT(debug_info_extractor_ != nullptr);
1015         method_info->line_number_table = debug_info_extractor_->GetLineNumberTable(method_id);
1016         method_info->local_variable_table = debug_info_extractor_->GetLocalVariableTable(method_id);
1017 
1018         // Add information about parameters into the table
1019         panda_file::CodeDataAccessor codeda(*file_, method_accessor.GetCodeId().value());
1020         auto arg_idx = static_cast<int32_t>(codeda.GetNumVregs());
1021         uint32_t code_size = codeda.GetCodeSize();
1022         for (auto info : debug_info_extractor_->GetParameterInfo(method_id)) {
1023             panda_file::LocalVariableInfo arg_info {info.name, info.signature, "", arg_idx++, 0, code_size};
1024             method_info->local_variable_table.emplace_back(arg_info);
1025         }
1026     }
1027 }
1028 
IsArray(const panda_file::LiteralTag & tag)1029 static bool IsArray(const panda_file::LiteralTag &tag)
1030 {
1031     switch (tag) {
1032         case panda_file::LiteralTag::ARRAY_U1:
1033         case panda_file::LiteralTag::ARRAY_U8:
1034         case panda_file::LiteralTag::ARRAY_I8:
1035         case panda_file::LiteralTag::ARRAY_U16:
1036         case panda_file::LiteralTag::ARRAY_I16:
1037         case panda_file::LiteralTag::ARRAY_U32:
1038         case panda_file::LiteralTag::ARRAY_I32:
1039         case panda_file::LiteralTag::ARRAY_U64:
1040         case panda_file::LiteralTag::ARRAY_I64:
1041         case panda_file::LiteralTag::ARRAY_F32:
1042         case panda_file::LiteralTag::ARRAY_F64:
1043         case panda_file::LiteralTag::ARRAY_STRING:
1044             return true;
1045         default:
1046             return false;
1047     }
1048 }
1049 
SerializeLiteralArray(const pandasm::LiteralArray & lit_array) const1050 std::string Disassembler::SerializeLiteralArray(const pandasm::LiteralArray &lit_array) const
1051 {
1052     std::stringstream ret;
1053     if (lit_array.literals_.empty()) {
1054         return "";
1055     }
1056 
1057     std::stringstream ss;
1058     ss << "{ ";
1059     const auto &tag = lit_array.literals_[0].tag_;
1060     if (IsArray(tag)) {
1061         ss << LiteralTagToString(tag);
1062     }
1063     ss << lit_array.literals_.size();
1064     ss << " [ ";
1065     SerializeValues(lit_array, ss);
1066     ss << "]}";
1067     return ss.str();
1068 }
1069 
Serialize(const std::string & key,const pandasm::LiteralArray & lit_array,std::ostream & os) const1070 void Disassembler::Serialize(const std::string &key, const pandasm::LiteralArray &lit_array, std::ostream &os) const
1071 {
1072     os << key << " ";
1073     os << SerializeLiteralArray(lit_array);
1074     os << "\n";
1075 }
1076 
LiteralTagToString(const panda_file::LiteralTag & tag) const1077 std::string Disassembler::LiteralTagToString(const panda_file::LiteralTag &tag) const
1078 {
1079     switch (tag) {
1080         case panda_file::LiteralTag::BOOL:
1081         case panda_file::LiteralTag::ARRAY_U1:
1082             return "u1";
1083         case panda_file::LiteralTag::ARRAY_U8:
1084             return "u8";
1085         case panda_file::LiteralTag::ARRAY_I8:
1086             return "i8";
1087         case panda_file::LiteralTag::ARRAY_U16:
1088             return "u16";
1089         case panda_file::LiteralTag::ARRAY_I16:
1090             return "i16";
1091         case panda_file::LiteralTag::ARRAY_U32:
1092             return "u32";
1093         case panda_file::LiteralTag::INTEGER:
1094         case panda_file::LiteralTag::ARRAY_I32:
1095             return "i32";
1096         case panda_file::LiteralTag::ARRAY_U64:
1097             return "u64";
1098         case panda_file::LiteralTag::ARRAY_I64:
1099             return "i64";
1100         case panda_file::LiteralTag::ARRAY_F32:
1101             return "f32";
1102         case panda_file::LiteralTag::DOUBLE:
1103         case panda_file::LiteralTag::ARRAY_F64:
1104             return "f64";
1105         case panda_file::LiteralTag::STRING:
1106         case panda_file::LiteralTag::ARRAY_STRING:
1107             return "string";
1108         case panda_file::LiteralTag::METHOD:
1109             return "method";
1110         case panda_file::LiteralTag::GENERATORMETHOD:
1111             return "generator_method";
1112         case panda_file::LiteralTag::ACCESSOR:
1113             return "accessor";
1114         case panda_file::LiteralTag::METHODAFFILIATE:
1115             return "method_affiliate";
1116         case panda_file::LiteralTag::NULLVALUE:
1117             return "null_value";
1118         case panda_file::LiteralTag::TAGVALUE:
1119             return "tagvalue";
1120         case panda_file::LiteralTag::LITERALBUFFERINDEX:
1121             return "lit_index";
1122         case panda_file::LiteralTag::LITERALARRAY:
1123             return "lit_offset";
1124         case panda_file::LiteralTag::BUILTINTYPEINDEX:
1125             return "builtin_type";
1126         default:
1127             UNREACHABLE();
1128     }
1129 }
1130 
1131 template <typename T>
SerializeValues(const pandasm::LiteralArray & lit_array,T & os) const1132 void Disassembler::SerializeValues(const pandasm::LiteralArray &lit_array, T &os) const
1133 {
1134     switch (lit_array.literals_[0].tag_) {
1135         case panda_file::LiteralTag::ARRAY_U1: {
1136             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1137                 os << std::get<bool>(lit_array.literals_[i].value_) << " ";
1138             }
1139             break;
1140         }
1141         case panda_file::LiteralTag::ARRAY_U8: {
1142             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1143                 os << static_cast<uint16_t>(std::get<uint8_t>(lit_array.literals_[i].value_)) << " ";
1144             }
1145             break;
1146         }
1147         case panda_file::LiteralTag::ARRAY_I8: {
1148             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1149                 os << static_cast<int16_t>(bit_cast<int8_t>(std::get<uint8_t>(lit_array.literals_[i].value_))) << " ";
1150             }
1151             break;
1152         }
1153         case panda_file::LiteralTag::ARRAY_U16: {
1154             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1155                 os << std::get<uint16_t>(lit_array.literals_[i].value_) << " ";
1156             }
1157             break;
1158         }
1159         case panda_file::LiteralTag::ARRAY_I16: {
1160             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1161                 os << bit_cast<int16_t>(std::get<uint16_t>(lit_array.literals_[i].value_)) << " ";
1162             }
1163             break;
1164         }
1165         case panda_file::LiteralTag::ARRAY_U32: {
1166             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1167                 os << std::get<uint32_t>(lit_array.literals_[i].value_) << " ";
1168             }
1169             break;
1170         }
1171         case panda_file::LiteralTag::ARRAY_I32: {
1172             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1173                 os << bit_cast<int32_t>(std::get<uint32_t>(lit_array.literals_[i].value_)) << " ";
1174             }
1175             break;
1176         }
1177         case panda_file::LiteralTag::ARRAY_U64: {
1178             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1179                 os << std::get<uint64_t>(lit_array.literals_[i].value_) << " ";
1180             }
1181             break;
1182         }
1183         case panda_file::LiteralTag::ARRAY_I64: {
1184             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1185                 os << bit_cast<int64_t>(std::get<uint64_t>(lit_array.literals_[i].value_)) << " ";
1186             }
1187             break;
1188         }
1189         case panda_file::LiteralTag::ARRAY_F32: {
1190             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1191                 os << std::get<float>(lit_array.literals_[i].value_) << " ";
1192             }
1193             break;
1194         }
1195         case panda_file::LiteralTag::ARRAY_F64: {
1196             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1197                 os << std::get<double>(lit_array.literals_[i].value_) << " ";
1198             }
1199             break;
1200         }
1201         case panda_file::LiteralTag::ARRAY_STRING: {
1202             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1203                 os << "\"" << std::get<std::string>(lit_array.literals_[i].value_) << "\" ";
1204             }
1205             break;
1206         }
1207         default:
1208             SerializeLiterals(lit_array, os);
1209     }
1210 }
1211 
1212 template <typename T>
SerializeLiterals(const pandasm::LiteralArray & lit_array,T & os) const1213 void Disassembler::SerializeLiterals(const pandasm::LiteralArray &lit_array, T &os) const
1214 {
1215     for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1216         const auto &tag = lit_array.literals_[i].tag_;
1217         os << LiteralTagToString(tag) << ":";
1218         const auto &val = lit_array.literals_[i].value_;
1219         switch (lit_array.literals_[i].tag_) {
1220             case panda_file::LiteralTag::BOOL: {
1221                 os << std::get<bool>(val);
1222                 break;
1223             }
1224             case panda_file::LiteralTag::LITERALBUFFERINDEX:
1225             case panda_file::LiteralTag::INTEGER: {
1226                 os << bit_cast<int32_t>(std::get<uint32_t>(val));
1227                 break;
1228             }
1229             case panda_file::LiteralTag::DOUBLE: {
1230                 os << std::get<double>(val);
1231                 break;
1232             }
1233             case panda_file::LiteralTag::STRING: {
1234                 os << "\"" << std::get<std::string>(val) << "\"";
1235                 break;
1236             }
1237             case panda_file::LiteralTag::METHOD:
1238             case panda_file::LiteralTag::GENERATORMETHOD: {
1239                 os << std::get<std::string>(val);
1240                 break;
1241             }
1242             case panda_file::LiteralTag::NULLVALUE:
1243             case panda_file::LiteralTag::ACCESSOR: {
1244                 os << static_cast<int16_t>(bit_cast<int8_t>(std::get<uint8_t>(val)));
1245                 break;
1246             }
1247             case panda_file::LiteralTag::METHODAFFILIATE: {
1248                 os << std::get<uint16_t>(val);
1249                 break;
1250             }
1251             case panda_file::LiteralTag::LITERALARRAY: {
1252                 os << std::get<std::string>(val);
1253                 break;
1254             }
1255             case panda_file::LiteralTag::BUILTINTYPEINDEX: {
1256                 os << static_cast<int16_t>(std::get<uint8_t>(val));
1257                 break;
1258             }
1259             default:
1260                 UNREACHABLE();
1261         }
1262         os << ", ";
1263     }
1264 }
1265 
Serialize(const pandasm::Record & record,std::ostream & os,bool print_information) const1266 void Disassembler::Serialize(const pandasm::Record &record, std::ostream &os, bool print_information) const
1267 {
1268     if (IsSystemType(record.name)) {
1269         return;
1270     }
1271 
1272     os << ".record " << record.name;
1273 
1274     const auto record_iter = prog_ann_.record_annotations.find(record.name);
1275     const bool record_in_table = record_iter != prog_ann_.record_annotations.end();
1276 
1277     if (record_in_table) {
1278         Serialize(*record.metadata, record_iter->second.ann_list, os);
1279     } else {
1280         Serialize(*record.metadata, {}, os);
1281     }
1282 
1283     if (record.metadata->IsForeign()) {
1284         os << "\n\n";
1285         return;
1286     }
1287 
1288     os << " {";
1289 
1290     if (print_information && prog_info_.records_info.find(record.name) != prog_info_.records_info.end()) {
1291         os << " # " << prog_info_.records_info.at(record.name).record_info << "\n";
1292         SerializeFields(record, os, true);
1293     } else {
1294         os << "\n";
1295         SerializeFields(record, os, false);
1296     }
1297 
1298     os << "}\n\n";
1299 }
1300 
SerializeFields(const pandasm::Record & record,std::ostream & os,bool print_information) const1301 void Disassembler::SerializeFields(const pandasm::Record &record, std::ostream &os, bool print_information) const
1302 {
1303     constexpr size_t INFO_OFFSET = 80;
1304 
1305     const auto record_iter = prog_ann_.record_annotations.find(record.name);
1306     const bool record_in_table = record_iter != prog_ann_.record_annotations.end();
1307 
1308     const auto rec_inf = (print_information) ? (prog_info_.records_info.at(record.name)) : (RecordInfo {});
1309 
1310     size_t field_idx = 0;
1311 
1312     std::stringstream ss;
1313     for (const auto &f : record.field_list) {
1314         ss << "\t" << f.type.GetPandasmName() << " " << f.name;
1315         if (f.metadata->GetValue().has_value()) {
1316             if (f.type.GetId() == panda_file::Type::TypeId::U32) {
1317                 ss << " = 0x" << std::hex << f.metadata->GetValue().value().GetValue<uint32_t>();
1318             }
1319             if (f.type.GetId() == panda_file::Type::TypeId::U8) {
1320                 ss << " = 0x" << std::hex << static_cast<uint32_t>(f.metadata->GetValue().value().GetValue<uint8_t>());
1321             }
1322         }
1323         if (record_in_table) {
1324             const auto field_iter = record_iter->second.field_annotations.find(f.name);
1325             if (field_iter != record_iter->second.field_annotations.end()) {
1326                 Serialize(*f.metadata, field_iter->second, ss);
1327             } else {
1328                 Serialize(*f.metadata, {}, ss);
1329             }
1330         } else {
1331             Serialize(*f.metadata, {}, ss);
1332         }
1333 
1334         if (print_information) {
1335             os << std::setw(INFO_OFFSET) << std::left << ss.str() << " # " << rec_inf.fields_info.at(field_idx) << "\n";
1336         } else {
1337             os << ss.str() << "\n";
1338         }
1339 
1340         ss.str(std::string());
1341         ss.clear();
1342 
1343         field_idx++;
1344     }
1345 }
1346 
Serialize(const pandasm::Function & method,std::ostream & os,bool print_information) const1347 void Disassembler::Serialize(const pandasm::Function &method, std::ostream &os, bool print_information) const
1348 {
1349     os << ".function " << method.return_type.GetPandasmName() << " " << method.name << "(";
1350 
1351     if (method.params.size() > 0) {
1352         os << method.params[0].type.GetPandasmName() << " a0";
1353 
1354         for (uint8_t i = 1; i < method.params.size(); i++) {
1355             os << ", " << method.params[i].type.GetPandasmName() << " a" << (size_t)i;
1356         }
1357     }
1358     os << ")";
1359 
1360     const std::string signature = pandasm::GetFunctionSignatureFromName(method.name, method.params);
1361 
1362     const auto method_iter = prog_ann_.method_annotations.find(signature);
1363     if (method_iter != prog_ann_.method_annotations.end()) {
1364         Serialize(*method.metadata, method_iter->second, os);
1365     } else {
1366         Serialize(*method.metadata, {}, os);
1367     }
1368 
1369     auto method_info_it = prog_info_.methods_info.find(signature);
1370     bool print_method_info = print_information && method_info_it != prog_info_.methods_info.end();
1371     if (print_method_info) {
1372         const MethodInfo &method_info = method_info_it->second;
1373 
1374         size_t width = 0;
1375         for (const auto &i : method.ins) {
1376             if (i.ToString().size() > width) {
1377                 width = i.ToString().size();
1378             }
1379         }
1380 
1381         os << " { # " << method_info.method_info << "\n#   CODE:\n";
1382 
1383         for (size_t i = 0; i < method.ins.size(); i++) {
1384             os << "\t" << std::setw(width) << std::left << method.ins.at(i).ToString("", true, method.regs_num) << " # "
1385                << method_info.instructions_info.at(i) << "\n";
1386         }
1387     } else {
1388         os << " {\n";
1389 
1390         for (const auto &i : method.ins) {
1391             if (i.set_label) {
1392                 std::string ins = i.ToString("", true, method.regs_num);
1393                 std::string delim = ": ";
1394                 size_t pos = ins.find(delim);
1395                 std::string label = ins.substr(0, pos);
1396                 ins.erase(0, pos + delim.length());
1397                 os << label << ":\n\t" << ins << "\n";
1398             } else {
1399                 os << "\t" << i.ToString("", true, method.regs_num) << "\n";
1400             }
1401         }
1402     }
1403 
1404     if (method.catch_blocks.size() != 0) {
1405         os << "\n";
1406 
1407         for (const auto &catch_block : method.catch_blocks) {
1408             Serialize(catch_block, os);
1409 
1410             os << "\n";
1411         }
1412     }
1413 
1414     if (print_method_info) {
1415         const MethodInfo &method_info = method_info_it->second;
1416         SerializeLineNumberTable(method_info.line_number_table, os);
1417         SerializeLocalVariableTable(method_info.local_variable_table, method, os);
1418     }
1419 
1420     os << "}\n\n";
1421 }
1422 
Serialize(const pandasm::Function::CatchBlock & catch_block,std::ostream & os) const1423 void Disassembler::Serialize(const pandasm::Function::CatchBlock &catch_block, std::ostream &os) const
1424 {
1425     if (catch_block.exception_record == "") {
1426         os << ".catchall ";
1427     } else {
1428         os << ".catch " << catch_block.exception_record << ", ";
1429     }
1430 
1431     os << catch_block.try_begin_label << ", " << catch_block.try_end_label << ", " << catch_block.catch_begin_label;
1432 
1433     if (catch_block.catch_end_label != "") {
1434         os << ", " << catch_block.catch_end_label;
1435     }
1436 }
1437 
Serialize(const pandasm::ItemMetadata & meta,const AnnotationList & ann_list,std::ostream & os) const1438 void Disassembler::Serialize(const pandasm::ItemMetadata &meta, const AnnotationList &ann_list, std::ostream &os) const
1439 {
1440     auto bool_attributes = meta.GetBoolAttributes();
1441     auto attributes = meta.GetAttributes();
1442     if (bool_attributes.empty() && attributes.empty() && ann_list.empty()) {
1443         return;
1444     }
1445 
1446     os << " <";
1447 
1448     size_t size = bool_attributes.size();
1449     size_t idx = 0;
1450     for (const auto &attr : bool_attributes) {
1451         os << attr;
1452         ++idx;
1453 
1454         if (!attributes.empty() || !ann_list.empty() || idx < size) {
1455             os << ", ";
1456         }
1457     }
1458 
1459     size = attributes.size();
1460     idx = 0;
1461     for (const auto &[key, values] : attributes) {
1462         for (size_t i = 0; i < values.size(); i++) {
1463             os << key << "=" << values[i];
1464 
1465             if (i < values.size() - 1) {
1466                 os << ", ";
1467             }
1468         }
1469 
1470         ++idx;
1471 
1472         if (!ann_list.empty() || idx < size) {
1473             os << ", ";
1474         }
1475     }
1476 
1477     size = ann_list.size();
1478     idx = 0;
1479     for (const auto &[key, value] : ann_list) {
1480         os << key << "=" << value;
1481 
1482         ++idx;
1483 
1484         if (idx < size) {
1485             os << ", ";
1486         }
1487     }
1488 
1489     os << ">";
1490 }
1491 
SerializeLineNumberTable(const panda_file::LineNumberTable & line_number_table,std::ostream & os) const1492 void Disassembler::SerializeLineNumberTable(const panda_file::LineNumberTable &line_number_table,
1493                                             std::ostream &os) const
1494 {
1495     if (line_number_table.empty()) {
1496         return;
1497     }
1498 
1499     os << "\n#   LINE_NUMBER_TABLE:\n";
1500     for (const auto &line_info : line_number_table) {
1501         os << "#\tline " << line_info.line << ": " << line_info.offset << "\n";
1502     }
1503 }
1504 
SerializeLocalVariableTable(const panda_file::LocalVariableTable & local_variable_table,const pandasm::Function & method,std::ostream & os) const1505 void Disassembler::SerializeLocalVariableTable(const panda_file::LocalVariableTable &local_variable_table,
1506                                                const pandasm::Function &method, std::ostream &os) const
1507 {
1508     if (local_variable_table.empty()) {
1509         return;
1510     }
1511 
1512     os << "\n#   LOCAL_VARIABLE_TABLE:\n";
1513     os << "#\t Start   End  Register           Name   Signature\n";
1514     const int START_WIDTH = 5;
1515     const int END_WIDTH = 4;
1516     const int REG_WIDTH = 8;
1517     const int NAME_WIDTH = 14;
1518     for (const auto &variable_info : local_variable_table) {
1519         std::ostringstream reg_stream;
1520         reg_stream << variable_info.reg_number << '(';
1521         if (variable_info.reg_number < 0) {
1522             reg_stream << "acc";
1523         } else {
1524             uint32_t vreg = variable_info.reg_number;
1525             uint32_t first_arg_reg = method.GetTotalRegs();
1526             if (vreg < first_arg_reg) {
1527                 reg_stream << 'v' << vreg;
1528             } else {
1529                 reg_stream << 'a' << vreg - first_arg_reg;
1530             }
1531         }
1532         reg_stream << ')';
1533 
1534         os << "#\t " << std::setw(START_WIDTH) << std::right << variable_info.start_offset << "  ";
1535         os << std::setw(END_WIDTH) << std::right << variable_info.end_offset << "  ";
1536         os << std::setw(REG_WIDTH) << std::right << reg_stream.str() << " ";
1537         os << std::setw(NAME_WIDTH) << std::right << variable_info.name << "   " << variable_info.type;
1538         if (!variable_info.type_signature.empty() && variable_info.type_signature != variable_info.type) {
1539             os << " (" << variable_info.type_signature << ")";
1540         }
1541         os << "\n";
1542     }
1543 }
1544 
BytecodeOpcodeToPandasmOpcode(uint8_t o) const1545 pandasm::Opcode Disassembler::BytecodeOpcodeToPandasmOpcode(uint8_t o) const
1546 {
1547     return BytecodeOpcodeToPandasmOpcode(BytecodeInstruction::Opcode(o));
1548 }
1549 
IDToString(BytecodeInstruction bc_ins,panda_file::File::EntityId method_id,size_t idx) const1550 std::string Disassembler::IDToString(BytecodeInstruction bc_ins, panda_file::File::EntityId method_id,
1551                                      size_t idx) const
1552 {
1553     std::stringstream name;
1554     const auto offset = file_->ResolveOffsetByIndex(method_id, bc_ins.GetId(idx).AsIndex());
1555 
1556     if (bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::METHOD_ID)) {
1557         name << GetMethodSignature(offset);
1558     } else if (bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::STRING_ID)) {
1559         name << '\"';
1560         name << StringDataToString(file_->GetStringData(offset));
1561         name << '\"';
1562     } else {
1563         ASSERT(bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::LITERALARRAY_ID));
1564         pandasm::LiteralArray lit_array;
1565         GetLiteralArrayByOffset(&lit_array, panda_file::File::EntityId(offset));
1566         name << SerializeLiteralArray(lit_array);
1567     }
1568 
1569     return name.str();
1570 }
1571 
GetRecordLanguage(panda_file::File::EntityId class_id) const1572 panda::panda_file::SourceLang Disassembler::GetRecordLanguage(panda_file::File::EntityId class_id) const
1573 {
1574     if (file_->IsExternal(class_id)) {
1575         return panda::panda_file::SourceLang::PANDA_ASSEMBLY;
1576     }
1577 
1578     panda_file::ClassDataAccessor cda(*file_, class_id);
1579     return cda.GetSourceLang().value_or(panda_file::SourceLang::PANDA_ASSEMBLY);
1580 }
1581 
translateImmToLabel(pandasm::Ins * pa_ins,LabelTable * label_table,const uint8_t * ins_arr,BytecodeInstruction bc_ins,BytecodeInstruction bc_ins_last,panda_file::File::EntityId code_id)1582 static void translateImmToLabel(pandasm::Ins *pa_ins, LabelTable *label_table, const uint8_t *ins_arr,
1583                                 BytecodeInstruction bc_ins, BytecodeInstruction bc_ins_last,
1584                                 panda_file::File::EntityId code_id)
1585 {
1586     const int32_t jmp_offset = std::get<int64_t>(pa_ins->imms.at(0));
1587     const auto bc_ins_dest = bc_ins.JumpTo(jmp_offset);
1588     if (bc_ins_last.GetAddress() > bc_ins_dest.GetAddress()) {
1589         size_t idx = getBytecodeInstructionNumber(BytecodeInstruction(ins_arr), bc_ins_dest);
1590 
1591         if (idx != std::numeric_limits<size_t>::max()) {
1592             if (label_table->find(idx) == label_table->end()) {
1593                 std::stringstream ss {};
1594                 ss << "jump_label_" << label_table->size();
1595                 (*label_table)[idx] = ss.str();
1596             }
1597 
1598             pa_ins->imms.clear();
1599             pa_ins->ids.push_back(label_table->at(idx));
1600         } else {
1601             LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
1602                                      << "). incorrect instruction at offset: 0x" << (bc_ins.GetAddress() - ins_arr)
1603                                      << ": invalid jump offset 0x" << jmp_offset
1604                                      << " - jumping in the middle of another instruction!";
1605         }
1606     } else {
1607         LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
1608                                  << "). incorrect instruction at offset: 0x" << (bc_ins.GetAddress() - ins_arr)
1609                                  << ": invalid jump offset 0x" << jmp_offset << " - jumping out of bounds!";
1610     }
1611 }
1612 
GetInstructions(pandasm::Function * method,panda_file::File::EntityId method_id,panda_file::File::EntityId code_id) const1613 IdList Disassembler::GetInstructions(pandasm::Function *method, panda_file::File::EntityId method_id,
1614                                      panda_file::File::EntityId code_id) const
1615 {
1616     panda_file::CodeDataAccessor code_accessor(*file_, code_id);
1617 
1618     const auto ins_sz = code_accessor.GetCodeSize();
1619     const auto ins_arr = code_accessor.GetInstructions();
1620 
1621     method->regs_num = code_accessor.GetNumVregs();
1622 
1623     auto bc_ins = BytecodeInstruction(ins_arr);
1624     const auto bc_ins_last = bc_ins.JumpTo(ins_sz);
1625 
1626     LabelTable label_table = GetExceptions(method, method_id, code_id);
1627 
1628     IdList unknown_external_methods {};
1629 
1630     while (bc_ins.GetAddress() != bc_ins_last.GetAddress()) {
1631         if (bc_ins.GetAddress() > bc_ins_last.GetAddress()) {
1632             LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
1633                                      << "). bytecode instructions sequence corrupted for method " << method->name
1634                                      << "! went out of bounds";
1635 
1636             break;
1637         }
1638 
1639         auto pa_ins = BytecodeInstructionToPandasmInstruction(bc_ins, method_id);
1640         if (pa_ins.IsJump()) {
1641             translateImmToLabel(&pa_ins, &label_table, ins_arr, bc_ins, bc_ins_last, code_id);
1642         }
1643 
1644         // check if method id is unknown external method. if so, emplace it in table
1645         if (bc_ins.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) {
1646             const auto arg_method_idx = bc_ins.GetId().AsIndex();
1647             const auto arg_method_id = file_->ResolveMethodIndex(method_id, arg_method_idx);
1648 
1649             const auto arg_method_signature = GetMethodSignature(arg_method_id);
1650 
1651             const bool is_present = prog_.function_table.find(arg_method_signature) != prog_.function_table.cend();
1652             const bool is_external = file_->IsExternal(arg_method_id);
1653 
1654             if (is_external && !is_present) {
1655                 unknown_external_methods.push_back(arg_method_id);
1656             }
1657         }
1658 
1659         method->ins.push_back(pa_ins);
1660         bc_ins = bc_ins.GetNext();
1661     }
1662 
1663     for (const auto &pair : label_table) {
1664         method->ins[pair.first].label = pair.second;
1665         method->ins[pair.first].set_label = true;
1666     }
1667 
1668     return unknown_external_methods;
1669 }
1670 
1671 }  // namespace panda::disasm
1672