• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "disassembler.h"
17 #include "mangling.h"
18 #include "utils/logger.h"
19 
20 #include <iomanip>
21 
22 #include "get_language_specific_metadata.inc"
23 
24 namespace panda::disasm {
25 
Disassemble(const std::string & filename_in,const bool quiet,const bool skip_strings)26 void Disassembler::Disassemble(const std::string &filename_in, const bool quiet, const bool skip_strings)
27 {
28     auto file_new = panda_file::File::Open(filename_in);
29     file_.swap(file_new);
30 
31     if (file_ != nullptr) {
32         prog_ = pandasm::Program {};
33 
34         record_name_to_id_.clear();
35         method_name_to_id_.clear();
36         string_offset_to_name_.clear();
37         skip_strings_ = skip_strings;
38         quiet_ = quiet;
39 
40         prog_info_ = ProgInfo {};
41 
42         prog_ann_ = ProgAnnotations {};
43 
44         GetRecords();
45         GetLiteralArrays();
46 
47         GetLanguageSpecificMetadata();
48     } else {
49         LOG(ERROR, DISASSEMBLER) << "> unable to open specified pandafile: <" << filename_in << ">";
50     }
51 }
52 
CollectInfo()53 void Disassembler::CollectInfo()
54 {
55     LOG(DEBUG, DISASSEMBLER) << "\n[getting program info]\n";
56 
57     debug_info_extractor_ = std::make_unique<panda_file::DebugInfoExtractor>(file_.get());
58 
59     for (const auto &pair : record_name_to_id_) {
60         GetRecordInfo(pair.second, &prog_info_.records_info[pair.first]);
61     }
62 
63     for (const auto &pair : method_name_to_id_) {
64         GetMethodInfo(pair.second, &prog_info_.methods_info[pair.first]);
65     }
66 }
67 
Serialize(std::ostream & os,bool add_separators,bool print_information) const68 void Disassembler::Serialize(std::ostream &os, bool add_separators, bool print_information) const
69 {
70     if (os.bad()) {
71         LOG(DEBUG, DISASSEMBLER) << "> serialization failed. os bad\n";
72 
73         return;
74     }
75 
76     if (file_ != nullptr) {
77         std::string abc_file = GetFileNameByAbsolutePath(file_->GetFilename());
78         os << "# source binary: " << abc_file << "\n\n";
79     }
80 
81     SerializeLanguage(os);
82 
83     if (add_separators) {
84         os << "# ====================\n"
85               "# LITERALS\n\n";
86     }
87 
88     LOG(DEBUG, DISASSEMBLER) << "[serializing literals]";
89 
90     for (const auto &[key, lit_arr] : prog_.literalarray_table) {
91         Serialize(key, lit_arr, os);
92     }
93 
94     for (const auto &[module_offset, array_table] : modulearray_table_) {
95         Serialize(module_offset, array_table, os);
96     }
97 
98     os << "\n";
99 
100     if (add_separators) {
101         os << "# ====================\n"
102               "# RECORDS\n\n";
103     }
104 
105     LOG(DEBUG, DISASSEMBLER) << "[serializing records]";
106 
107     for (const auto &r : prog_.record_table) {
108         Serialize(r.second, os, print_information);
109     }
110 
111     if (add_separators) {
112         os << "# ====================\n"
113               "# METHODS\n\n";
114     }
115 
116     LOG(DEBUG, DISASSEMBLER) << "[serializing methods]";
117 
118     for (const auto &m : prog_.function_table) {
119         Serialize(m.second, os, print_information);
120     }
121 
122     if (add_separators) {
123         os << "# ====================\n"
124         "# STRING\n\n";
125     }
126 
127     LOG(DEBUG, DISASSEMBLER) << "[serializing strings]";
128 
129     for (const auto &[offset, name_value] : string_offset_to_name_) {
130         SerializeStrings(offset, name_value, os);
131     }
132 }
133 
IsSystemType(const std::string & type_name)134 inline bool Disassembler::IsSystemType(const std::string &type_name)
135 {
136     bool is_array_type = type_name.find('[') != std::string::npos;
137     bool is_global = type_name == "_GLOBAL";
138 
139     return is_array_type || is_global;
140 }
141 
GetRecord(pandasm::Record * record,const panda_file::File::EntityId & record_id)142 void Disassembler::GetRecord(pandasm::Record *record, const panda_file::File::EntityId &record_id)
143 {
144     LOG(DEBUG, DISASSEMBLER) << "\n[getting record]\nid: " << record_id << " (0x" << std::hex << record_id << ")";
145 
146     if (record == nullptr) {
147         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but record ptr expected!";
148 
149         return;
150     }
151 
152     record->name = GetFullRecordName(record_id);
153 
154     LOG(DEBUG, DISASSEMBLER) << "name: " << record->name;
155 
156     GetMetaData(record, record_id);
157 
158     if (!file_->IsExternal(record_id)) {
159         GetMethods(record_id);
160         GetFields(record, record_id);
161     }
162 }
163 
AddMethodToTables(const panda_file::File::EntityId & method_id)164 void Disassembler::AddMethodToTables(const panda_file::File::EntityId &method_id)
165 {
166     pandasm::Function new_method("", file_language_);
167     GetMethod(&new_method, method_id);
168 
169     const auto signature = pandasm::GetFunctionSignatureFromName(new_method.name, new_method.params);
170     if (prog_.function_table.find(signature) != prog_.function_table.end()) {
171         return;
172     }
173 
174     GetMethodAnnotations(new_method, method_id);
175     method_name_to_id_.emplace(signature, method_id);
176     prog_.function_synonyms[new_method.name].push_back(signature);
177     prog_.function_table.emplace(signature, std::move(new_method));
178 }
179 
GetMethod(pandasm::Function * method,const panda_file::File::EntityId & method_id)180 void Disassembler::GetMethod(pandasm::Function *method, const panda_file::File::EntityId &method_id)
181 {
182     LOG(DEBUG, DISASSEMBLER) << "\n[getting method]\nid: " << method_id << " (0x" << std::hex << method_id << ")";
183 
184     if (method == nullptr) {
185         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
186 
187         return;
188     }
189 
190     panda_file::MethodDataAccessor method_accessor(*file_, method_id);
191 
192     method->name = GetFullMethodName(method_id);
193 
194     LOG(DEBUG, DISASSEMBLER) << "name: " << method->name;
195 
196     GetParams(method, method_accessor.GetProtoId());
197     GetMetaData(method, method_id);
198 
199     if (method_accessor.GetCodeId().has_value()) {
200         const IdList id_list = GetInstructions(method, method_id, method_accessor.GetCodeId().value());
201 
202         for (const auto &id : id_list) {
203             AddMethodToTables(id);
204         }
205     } else {
206         LOG(ERROR, DISASSEMBLER) << "> error encountered at " << method_id << " (0x" << std::hex << method_id
207                                  << "). implementation of method expected, but no \'CODE\' tag was found!";
208 
209         return;
210     }
211 }
212 
213 template <typename T>
FillLiteralArrayData(pandasm::LiteralArray * lit_array,const panda_file::LiteralTag & tag,const panda_file::LiteralDataAccessor::LiteralValue & value) const214 void Disassembler::FillLiteralArrayData(pandasm::LiteralArray *lit_array, const panda_file::LiteralTag &tag,
215                                         const panda_file::LiteralDataAccessor::LiteralValue &value) const
216 {
217     panda_file::File::EntityId id(std::get<uint32_t>(value));
218     auto sp = file_->GetSpanFromId(id);
219     auto len = panda_file::helpers::Read<sizeof(uint32_t)>(&sp);
220     if (tag != panda_file::LiteralTag::ARRAY_STRING) {
221         for (size_t i = 0; i < len; i++) {
222             pandasm::LiteralArray::Literal lit;
223             lit.tag_ = tag;
224             lit.value_ = bit_cast<T>(panda_file::helpers::Read<sizeof(T)>(&sp));
225             lit_array->literals_.push_back(lit);
226         }
227         return;
228     }
229     for (size_t i = 0; i < len; i++) {
230         auto str_id = panda_file::helpers::Read<sizeof(T)>(&sp);
231         pandasm::LiteralArray::Literal lit;
232         lit.tag_ = tag;
233         lit.value_ = StringDataToString(file_->GetStringData(panda_file::File::EntityId(str_id)));
234         lit_array->literals_.push_back(lit);
235     }
236 }
237 
FillLiteralData(pandasm::LiteralArray * lit_array,const panda_file::LiteralDataAccessor::LiteralValue & value,const panda_file::LiteralTag & tag) const238 void Disassembler::FillLiteralData(pandasm::LiteralArray *lit_array,
239                                    const panda_file::LiteralDataAccessor::LiteralValue &value,
240                                    const panda_file::LiteralTag &tag) const
241 {
242     pandasm::LiteralArray::Literal lit;
243     lit.tag_ = tag;
244     switch (tag) {
245         case panda_file::LiteralTag::BOOL: {
246             lit.value_ = std::get<bool>(value);
247             break;
248         }
249         case panda_file::LiteralTag::ACCESSOR:
250         case panda_file::LiteralTag::NULLVALUE:
251         case panda_file::LiteralTag::BUILTINTYPEINDEX: {
252             lit.value_ = std::get<uint8_t>(value);
253             break;
254         }
255         case panda_file::LiteralTag::METHODAFFILIATE: {
256             lit.value_ = std::get<uint16_t>(value);
257             break;
258         }
259         case panda_file::LiteralTag::LITERALBUFFERINDEX:
260         case panda_file::LiteralTag::INTEGER: {
261             lit.value_ = std::get<uint32_t>(value);
262             break;
263         }
264         case panda_file::LiteralTag::DOUBLE: {
265             lit.value_ = std::get<double>(value);
266             break;
267         }
268         case panda_file::LiteralTag::STRING: {
269             auto str_data = file_->GetStringData(panda_file::File::EntityId(std::get<uint32_t>(value)));
270             lit.value_ = StringDataToString(str_data);
271             break;
272         }
273         case panda_file::LiteralTag::METHOD:
274         case panda_file::LiteralTag::GETTER:
275         case panda_file::LiteralTag::SETTER:
276         case panda_file::LiteralTag::GENERATORMETHOD: {
277             panda_file::MethodDataAccessor mda(*file_, panda_file::File::EntityId(std::get<uint32_t>(value)));
278             lit.value_ = StringDataToString(file_->GetStringData(mda.GetNameId()));
279             break;
280         }
281         case panda_file::LiteralTag::LITERALARRAY: {
282             std::stringstream ss;
283             ss << "0x" << std::hex << std::get<uint32_t>(value);
284             lit.value_ = ss.str();
285             break;
286         }
287         case panda_file::LiteralTag::TAGVALUE: {
288             return;
289         }
290         default: {
291             UNREACHABLE();
292         }
293     }
294     lit_array->literals_.push_back(lit);
295 }
296 
GetLiteralArrayByOffset(pandasm::LiteralArray * lit_array,panda_file::File::EntityId offset) const297 void Disassembler::GetLiteralArrayByOffset(pandasm::LiteralArray *lit_array, panda_file::File::EntityId offset) const
298 {
299     panda_file::LiteralDataAccessor lit_array_accessor(*file_, file_->GetLiteralArraysId());
300     lit_array_accessor.EnumerateLiteralVals(
301         offset, [this, lit_array](const panda_file::LiteralDataAccessor::LiteralValue &value,
302                                   const panda_file::LiteralTag &tag) {
303             switch (tag) {
304                 case panda_file::LiteralTag::ARRAY_U1: {
305                     FillLiteralArrayData<bool>(lit_array, tag, value);
306                     break;
307                 }
308                 case panda_file::LiteralTag::ARRAY_I8:
309                 case panda_file::LiteralTag::ARRAY_U8: {
310                     FillLiteralArrayData<uint8_t>(lit_array, tag, value);
311                     break;
312                 }
313                 case panda_file::LiteralTag::ARRAY_I16:
314                 case panda_file::LiteralTag::ARRAY_U16: {
315                     FillLiteralArrayData<uint16_t>(lit_array, tag, value);
316                     break;
317                 }
318                 case panda_file::LiteralTag::ARRAY_I32:
319                 case panda_file::LiteralTag::ARRAY_U32: {
320                     FillLiteralArrayData<uint32_t>(lit_array, tag, value);
321                     break;
322                 }
323                 case panda_file::LiteralTag::ARRAY_I64:
324                 case panda_file::LiteralTag::ARRAY_U64: {
325                     FillLiteralArrayData<uint64_t>(lit_array, tag, value);
326                     break;
327                 }
328                 case panda_file::LiteralTag::ARRAY_F32: {
329                     FillLiteralArrayData<float>(lit_array, tag, value);
330                     break;
331                 }
332                 case panda_file::LiteralTag::ARRAY_F64: {
333                     FillLiteralArrayData<double>(lit_array, tag, value);
334                     break;
335                 }
336                 case panda_file::LiteralTag::ARRAY_STRING: {
337                     FillLiteralArrayData<uint32_t>(lit_array, tag, value);
338                     break;
339                 }
340                 default: {
341                     FillLiteralData(lit_array, value, tag);
342                     break;
343                 }
344             }
345         });
346 }
347 
GetLiteralArray(pandasm::LiteralArray * lit_array,size_t index) const348 void Disassembler::GetLiteralArray(pandasm::LiteralArray *lit_array, size_t index) const
349 {
350     panda_file::LiteralDataAccessor lit_array_accessor(*file_, file_->GetLiteralArraysId());
351     GetLiteralArrayByOffset(lit_array, lit_array_accessor.GetLiteralArrayId(index));
352 }
353 
IsModuleLiteralOffset(const panda_file::File::EntityId & id) const354 bool Disassembler::IsModuleLiteralOffset(const panda_file::File::EntityId &id) const
355 {
356     return module_literals_.find(id.GetOffset()) != module_literals_.end();
357 }
358 
GetLiteralArrays()359 void Disassembler::GetLiteralArrays()
360 {
361     const auto lit_arrays_id = file_->GetLiteralArraysId();
362 
363     LOG(DEBUG, DISASSEMBLER) << "\n[getting literal arrays]\nid: " << lit_arrays_id << " (0x" << std::hex
364                              << lit_arrays_id << ")";
365 
366     panda_file::LiteralDataAccessor lda(*file_, lit_arrays_id);
367     size_t num_litarrays = lda.GetLiteralNum();
368     for (size_t index = 0; index < num_litarrays; index++) {
369         auto id = lda.GetLiteralArrayId(index);
370         if (IsModuleLiteralOffset(id)) {
371             std::stringstream ss;
372             ss << index << " 0x" << std::hex << id.GetOffset();
373             modulearray_table_.emplace(ss.str(), GetModuleLiteralArray(id));
374             continue;
375         }
376         std::stringstream ss;
377         ss << index << " 0x" << std::hex << id.GetOffset();
378         panda::pandasm::LiteralArray lit_arr;
379         GetLiteralArray(&lit_arr, index);
380         prog_.literalarray_table.emplace(ss.str(), lit_arr);
381     }
382 }
383 
ModuleTagToString(panda_file::ModuleTag & tag) const384 std::string Disassembler::ModuleTagToString(panda_file::ModuleTag &tag) const
385 {
386     switch (tag) {
387         case panda_file::ModuleTag::REGULAR_IMPORT:
388             return "REGULAR_IMPORT";
389         case panda_file::ModuleTag::NAMESPACE_IMPORT:
390             return "NAMESPACE_IMPORT";
391         case panda_file::ModuleTag::LOCAL_EXPORT:
392             return "LOCAL_EXPORT";
393         case panda_file::ModuleTag::INDIRECT_EXPORT:
394             return "INDIRECT_EXPORT";
395         case panda_file::ModuleTag::STAR_EXPORT:
396             return "STAR_EXPORT";
397         default: {
398             UNREACHABLE();
399             break;
400         }
401     }
402     return "";
403 }
404 
GetModuleLiteralArray(panda_file::File::EntityId & module_id) const405 std::vector<std::string> Disassembler::GetModuleLiteralArray(panda_file::File::EntityId &module_id) const
406 {
407     panda_file::ModuleDataAccessor mda(*file_, module_id);
408     const std::vector<uint32_t> &request_modules_offset = mda.getRequestModules();
409     std::vector<std::string> module_literal_array;
410     mda.EnumerateModuleRecord([&](panda_file::ModuleTag tag, uint32_t export_name_offset,
411                                   uint32_t request_module_idx, uint32_t import_name_offset,
412                                   uint32_t local_name_offset) {
413         std::stringstream ss;
414         ss << "ModuleTag: " << ModuleTagToString(tag);
415         if (IsValidOffset(local_name_offset)) {
416             ss << ", local_name: " << GetStringByOffset(local_name_offset);
417         }
418         if (IsValidOffset(export_name_offset)) {
419             ss << ", export_name: " << GetStringByOffset(export_name_offset);
420         }
421         if (IsValidOffset(import_name_offset)) {
422             ss << ", import_name: " << GetStringByOffset(import_name_offset);
423         }
424         if (request_module_idx < request_modules_offset.size()) {
425             auto request_module_offset = request_modules_offset[request_module_idx];
426             ASSERT(IsValidOffset(request_module_offset));
427             ss << ", module_request: " << GetStringByOffset(request_module_offset);
428         }
429         module_literal_array.push_back(ss.str());
430     });
431 
432     return module_literal_array;
433 }
434 
GetRecords()435 void Disassembler::GetRecords()
436 {
437     LOG(DEBUG, DISASSEMBLER) << "\n[getting records]\n";
438 
439     const auto class_idx = file_->GetClasses();
440 
441     for (size_t i = 0; i < class_idx.size(); i++) {
442         uint32_t class_id = class_idx[i];
443         auto class_off = file_->GetHeader()->class_idx_off + sizeof(uint32_t) * i;
444 
445         if (class_id > file_->GetHeader()->file_size) {
446             LOG(ERROR, DISASSEMBLER) << "> error encountered in record at " << class_off << " (0x" << std::hex
447                                      << class_off << "). binary file corrupted. record offset (0x" << class_id
448                                      << ") out of bounds (0x" << file_->GetHeader()->file_size << ")!";
449             break;
450         }
451 
452         const panda_file::File::EntityId record_id {class_id};
453         auto language = GetRecordLanguage(record_id);
454         if (language != file_language_) {
455             if (file_language_ == panda_file::SourceLang::PANDA_ASSEMBLY) {
456                 file_language_ = language;
457             } else if (language != panda_file::SourceLang::PANDA_ASSEMBLY) {
458                 LOG(ERROR, DISASSEMBLER) << "> possible error encountered in record at" << class_off << " (0x"
459                                          << std::hex << class_off << "). record's language  ("
460                                          << panda_file::LanguageToString(language)
461                                          << ")  differs from file's language ("
462                                          << panda_file::LanguageToString(file_language_) << ")!";
463             }
464         }
465 
466         pandasm::Record record("", file_language_);
467         GetRecord(&record, record_id);
468 
469         if (prog_.record_table.find(record.name) == prog_.record_table.end()) {
470             record_name_to_id_.emplace(record.name, record_id);
471             prog_.record_table.emplace(record.name, std::move(record));
472         }
473     }
474 }
475 
GetFields(pandasm::Record * record,const panda_file::File::EntityId & record_id)476 void Disassembler::GetFields(pandasm::Record *record, const panda_file::File::EntityId &record_id)
477 {
478     panda_file::ClassDataAccessor class_accessor {*file_, record_id};
479 
480     class_accessor.EnumerateFields([&](panda_file::FieldDataAccessor &field_accessor) -> void {
481         pandasm::Field field(file_language_);
482 
483         panda_file::File::EntityId field_name_id = field_accessor.GetNameId();
484         field.name = StringDataToString(file_->GetStringData(field_name_id));
485 
486         uint32_t field_type = field_accessor.GetType();
487         field.type = FieldTypeToPandasmType(field_type);
488 
489         GetMetaData(&field, field_accessor.GetFieldId());
490 
491         record->field_list.push_back(std::move(field));
492     });
493 }
494 
GetMethods(const panda_file::File::EntityId & record_id)495 void Disassembler::GetMethods(const panda_file::File::EntityId &record_id)
496 {
497     panda_file::ClassDataAccessor class_accessor {*file_, record_id};
498 
499     class_accessor.EnumerateMethods([&](panda_file::MethodDataAccessor &method_accessor) -> void {
500         AddMethodToTables(method_accessor.GetMethodId());
501     });
502 }
503 
GetMethodAnnotations(pandasm::Function & method,const panda_file::File::EntityId & method_id)504 void Disassembler::GetMethodAnnotations(pandasm::Function &method, const panda_file::File::EntityId &method_id)
505 {
506     static const std::string MODULE_REQUEST_ANN_NAME = "L_ESConcurrentModuleRequestsAnnotation";
507     static const std::string SLOT_NUMBER_ANN_NAME = "L_ESSlotNumberAnnotation";
508 
509     panda_file::MethodDataAccessor mda(*file_, method_id);
510     mda.EnumerateAnnotations([&](panda_file::File::EntityId annotation_id) {
511         panda_file::AnnotationDataAccessor ada(*file_, annotation_id);
512         auto *annotation_name = reinterpret_cast<const char *>(file_->GetStringData(ada.GetClassId()).data);
513         if (std::strcmp("L_ESConcurrentModuleRequestsAnnotation;", annotation_name) == 0) {
514             CreateAnnotationElement(ada, method, MODULE_REQUEST_ANN_NAME,
515                                     "ConcurrentModuleRequest", "concurrentModuleRequestIdx");
516         } else if (std::strcmp("L_ESSlotNumberAnnotation;", annotation_name) == 0) {
517             CreateAnnotationElement(ada, method, SLOT_NUMBER_ANN_NAME, "SlotNumber", "slotNumberIdx");
518         }
519     });
520 }
521 
CreateAnnotationElement(panda_file::AnnotationDataAccessor & ada,pandasm::Function & method,const std::string & ann_name,const std::string & ann_elem_name,const std::string & ann_elem_index)522 void Disassembler::CreateAnnotationElement(panda_file::AnnotationDataAccessor &ada, pandasm::Function &method,
523                                            const std::string &ann_name, const std::string &ann_elem_name,
524                                            const std::string &ann_elem_index)
525 {
526     if (ann_elem_name.empty() || ann_elem_index.empty()) {
527         return;
528     }
529 
530     uint32_t elem_count = ada.GetCount();
531     for (uint32_t i = 0; i < elem_count; i++) {
532         panda_file::AnnotationDataAccessor::Elem adae = ada.GetElement(i);
533         auto *elem_name = reinterpret_cast<const char *>(file_->GetStringData(adae.GetNameId()).data);
534         if (ann_elem_name == elem_name) {
535             uint32_t ann_elem_value = adae.GetScalarValue().GetValue();
536             AddAnnotationElement(method, ann_name, ann_elem_index, ann_elem_value);
537         }
538     }
539 }
540 
AddAnnotationElement(pandasm::Function & method,const std::string & annotation_name,const std::string & key,const uint32_t & value)541 void Disassembler::AddAnnotationElement(pandasm::Function &method, const std::string &annotation_name,
542                                         const std::string &key, const uint32_t &value)
543 {
544     if (annotation_name.empty() || key.empty()) {
545         return;
546     }
547 
548     std::vector<pandasm::AnnotationData> method_annotation = method.metadata->GetAnnotations();
549     const auto ann_iter = std::find_if(method_annotation.begin(), method_annotation.end(),
550                                        [&](pandasm::AnnotationData &ann) -> bool {
551         return ann.GetName() == annotation_name;
552     });
553 
554     pandasm::AnnotationElement annotation_element(key,
555         std::make_unique<pandasm::ScalarValue>(pandasm::ScalarValue::Create<pandasm::Value::Type::U32>(value)));
556     const bool is_annotation = ann_iter != method_annotation.end();
557     if (is_annotation) {
558         ann_iter->AddElement(std::move(annotation_element));
559         method.metadata->SetAnnotations(std::move(method_annotation));
560     } else {
561         std::vector<pandasm::AnnotationElement> elements;
562         pandasm::AnnotationData ann_data(annotation_name, elements);
563         ann_data.AddElement(std::move(annotation_element));
564         std::vector<pandasm::AnnotationData> annotations;
565         annotations.push_back(std::move(ann_data));
566         method.metadata->AddAnnotations(annotations);
567     }
568 }
569 
GetAnnotationByMethodName(const std::string & method_name) const570 std::optional<std::vector<std::string>> Disassembler::GetAnnotationByMethodName(const std::string &method_name) const
571 {
572     const auto method_synonyms_iter = prog_.function_synonyms.find(method_name);
573     bool is_signature = method_synonyms_iter != prog_.function_synonyms.end();
574     if (!is_signature) {
575         return std::nullopt;
576     }
577 
578     const auto method_iter = prog_.function_table.find(method_synonyms_iter->second.back());
579     bool is_method = method_iter != prog_.function_table.end();
580     const auto annotations = method_iter->second.metadata->GetAnnotations();
581     if (!is_method || annotations.empty()) {
582         return std::nullopt;
583     }
584 
585     std::vector<std::string> ann;
586     for (const auto &ann_data : annotations) {
587         ann.emplace_back(ann_data.GetName());
588     }
589     return ann;
590 }
591 
GetStrings() const592 std::vector<std::string> Disassembler::GetStrings() const
593 {
594     std::vector<std::string> strings;
595     for (auto &str_info : string_offset_to_name_) {
596         strings.emplace_back(str_info.second);
597     }
598 
599     return strings;
600 }
601 
GetModuleLiterals() const602 std::vector<std::string> Disassembler::GetModuleLiterals() const
603 {
604     std::vector<std::string> module_literals;
605     for (auto &module_array : modulearray_table_) {
606         for (auto &module : module_array.second) {
607             module_literals.emplace_back(module);
608         }
609     }
610 
611     return module_literals;
612 }
613 
GetParams(pandasm::Function * method,const panda_file::File::EntityId & proto_id) const614 void Disassembler::GetParams(pandasm::Function *method, const panda_file::File::EntityId &proto_id) const
615 {
616     /**
617      * frame size - 2^16 - 1
618      */
619     static const uint32_t MAX_ARG_NUM = 0xFFFF;
620 
621     LOG(DEBUG, DISASSEMBLER) << "[getting params]\nproto id: " << proto_id << " (0x" << std::hex << proto_id << ")";
622 
623     if (method == nullptr) {
624         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
625 
626         return;
627     }
628 
629     panda_file::ProtoDataAccessor proto_accessor(*file_, proto_id);
630 
631     auto params_num = proto_accessor.GetNumArgs();
632     if (params_num > MAX_ARG_NUM) {
633         LOG(ERROR, DISASSEMBLER) << "> error encountered at " << proto_id << " (0x" << std::hex << proto_id
634                                  << "). number of function's arguments (" << std::dec << params_num
635                                  << ") exceeds MAX_ARG_NUM (" << MAX_ARG_NUM << ") !";
636 
637         return;
638     }
639 
640     size_t ref_idx = 0;
641     method->return_type = PFTypeToPandasmType(proto_accessor.GetReturnType(), proto_accessor, ref_idx);
642 
643     for (uint8_t i = 0; i < params_num; i++) {
644         auto arg_type = PFTypeToPandasmType(proto_accessor.GetArgType(i), proto_accessor, ref_idx);
645         method->params.push_back(pandasm::Function::Parameter(arg_type, file_language_));
646     }
647 }
648 
GetExceptions(pandasm::Function * method,panda_file::File::EntityId method_id,panda_file::File::EntityId code_id) const649 LabelTable Disassembler::GetExceptions(pandasm::Function *method, panda_file::File::EntityId method_id,
650                                        panda_file::File::EntityId code_id) const
651 {
652     LOG(DEBUG, DISASSEMBLER) << "[getting exceptions]\ncode id: " << code_id << " (0x" << std::hex << code_id << ")";
653 
654     if (method == nullptr) {
655         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!\n";
656         return LabelTable {};
657     }
658 
659     panda_file::CodeDataAccessor code_accessor(*file_, code_id);
660 
661     const auto bc_ins = BytecodeInstruction(code_accessor.GetInstructions());
662     const auto bc_ins_last = bc_ins.JumpTo(code_accessor.GetCodeSize());
663 
664     size_t try_idx = 0;
665     LabelTable label_table {};
666     code_accessor.EnumerateTryBlocks([&](panda_file::CodeDataAccessor::TryBlock &try_block) {
667         pandasm::Function::CatchBlock catch_block_pa {};
668         if (!LocateTryBlock(bc_ins, bc_ins_last, try_block, &catch_block_pa, &label_table, try_idx)) {
669             return false;
670         }
671         size_t catch_idx = 0;
672         try_block.EnumerateCatchBlocks([&](panda_file::CodeDataAccessor::CatchBlock &catch_block) {
673             auto class_idx = catch_block.GetTypeIdx();
674             if (class_idx == panda_file::INVALID_INDEX) {
675                 catch_block_pa.exception_record = "";
676             } else {
677                 const auto class_id = file_->ResolveClassIndex(method_id, class_idx);
678                 catch_block_pa.exception_record = GetFullRecordName(class_id);
679             }
680             if (!LocateCatchBlock(bc_ins, bc_ins_last, catch_block, &catch_block_pa, &label_table, try_idx,
681                                   catch_idx)) {
682                 return false;
683             }
684 
685             method->catch_blocks.push_back(catch_block_pa);
686             catch_block_pa.catch_begin_label = "";
687             catch_block_pa.catch_end_label = "";
688             catch_idx++;
689 
690             return true;
691         });
692         try_idx++;
693 
694         return true;
695     });
696 
697     return label_table;
698 }
699 
getBytecodeInstructionNumber(BytecodeInstruction bc_ins_first,BytecodeInstruction bc_ins_cur)700 static size_t getBytecodeInstructionNumber(BytecodeInstruction bc_ins_first, BytecodeInstruction bc_ins_cur)
701 {
702     size_t count = 0;
703 
704     while (bc_ins_first.GetAddress() != bc_ins_cur.GetAddress()) {
705         count++;
706         bc_ins_first = bc_ins_first.GetNext();
707         if (bc_ins_first.GetAddress() > bc_ins_cur.GetAddress()) {
708             return std::numeric_limits<size_t>::max();
709         }
710     }
711 
712     return count;
713 }
714 
LocateTryBlock(const BytecodeInstruction & bc_ins,const BytecodeInstruction & bc_ins_last,const panda_file::CodeDataAccessor::TryBlock & try_block,pandasm::Function::CatchBlock * catch_block_pa,LabelTable * label_table,size_t try_idx) const715 bool Disassembler::LocateTryBlock(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last,
716                                   const panda_file::CodeDataAccessor::TryBlock &try_block,
717                                   pandasm::Function::CatchBlock *catch_block_pa, LabelTable *label_table,
718                                   size_t try_idx) const
719 {
720     const auto try_begin_bc_ins = bc_ins.JumpTo(try_block.GetStartPc());
721     const auto try_end_bc_ins = bc_ins.JumpTo(try_block.GetStartPc() + try_block.GetLength());
722 
723     const size_t try_begin_idx = getBytecodeInstructionNumber(bc_ins, try_begin_bc_ins);
724     const size_t try_end_idx = getBytecodeInstructionNumber(bc_ins, try_end_bc_ins);
725 
726     const bool try_begin_offset_in_range = bc_ins_last.GetAddress() > try_begin_bc_ins.GetAddress();
727     const bool try_end_offset_in_range = bc_ins_last.GetAddress() >= try_end_bc_ins.GetAddress();
728     const bool try_begin_offset_valid = try_begin_idx != std::numeric_limits<size_t>::max();
729     const bool try_end_offset_valid = try_end_idx != std::numeric_limits<size_t>::max();
730 
731     if (!try_begin_offset_in_range || !try_begin_offset_valid) {
732         LOG(ERROR, DISASSEMBLER) << "> invalid try block begin offset! address is: 0x" << std::hex
733                                  << try_begin_bc_ins.GetAddress();
734         return false;
735     } else {
736         std::stringstream ss {};
737         ss << "try_begin_label_" << try_idx;
738 
739         LabelTable::iterator it = label_table->find(try_begin_idx);
740         if (it == label_table->end()) {
741             catch_block_pa->try_begin_label = ss.str();
742             label_table->insert(std::pair<size_t, std::string>(try_begin_idx, ss.str()));
743         } else {
744             catch_block_pa->try_begin_label = it->second;
745         }
746     }
747 
748     if (!try_end_offset_in_range || !try_end_offset_valid) {
749         LOG(ERROR, DISASSEMBLER) << "> invalid try block end offset! address is: 0x" << std::hex
750                                  << try_end_bc_ins.GetAddress();
751         return false;
752     } else {
753         std::stringstream ss {};
754         ss << "try_end_label_" << try_idx;
755 
756         LabelTable::iterator it = label_table->find(try_end_idx);
757         if (it == label_table->end()) {
758             catch_block_pa->try_end_label = ss.str();
759             label_table->insert(std::pair<size_t, std::string>(try_end_idx, ss.str()));
760         } else {
761             catch_block_pa->try_end_label = it->second;
762         }
763     }
764 
765     return true;
766 }
767 
LocateCatchBlock(const BytecodeInstruction & bc_ins,const BytecodeInstruction & bc_ins_last,const panda_file::CodeDataAccessor::CatchBlock & catch_block,pandasm::Function::CatchBlock * catch_block_pa,LabelTable * label_table,size_t try_idx,size_t catch_idx) const768 bool Disassembler::LocateCatchBlock(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last,
769                                     const panda_file::CodeDataAccessor::CatchBlock &catch_block,
770                                     pandasm::Function::CatchBlock *catch_block_pa, LabelTable *label_table,
771                                     size_t try_idx, size_t catch_idx) const
772 {
773     const auto handler_begin_offset = catch_block.GetHandlerPc();
774     const auto handler_end_offset = handler_begin_offset + catch_block.GetCodeSize();
775 
776     const auto handler_begin_bc_ins = bc_ins.JumpTo(handler_begin_offset);
777     const auto handler_end_bc_ins = bc_ins.JumpTo(handler_end_offset);
778 
779     const size_t handler_begin_idx = getBytecodeInstructionNumber(bc_ins, handler_begin_bc_ins);
780     const size_t handler_end_idx = getBytecodeInstructionNumber(bc_ins, handler_end_bc_ins);
781 
782     const bool handler_begin_offset_in_range = bc_ins_last.GetAddress() > handler_begin_bc_ins.GetAddress();
783     const bool handler_end_offset_in_range = bc_ins_last.GetAddress() >= handler_end_bc_ins.GetAddress();
784     const bool handler_end_present = catch_block.GetCodeSize() != 0;
785     const bool handler_begin_offset_valid = handler_begin_idx != std::numeric_limits<size_t>::max();
786     const bool handler_end_offset_valid = handler_end_idx != std::numeric_limits<size_t>::max();
787 
788     if (!handler_begin_offset_in_range || !handler_begin_offset_valid) {
789         LOG(ERROR, DISASSEMBLER) << "> invalid catch block begin offset! address is: 0x" << std::hex
790                                  << handler_begin_bc_ins.GetAddress();
791         return false;
792     } else {
793         std::stringstream ss {};
794         ss << "handler_begin_label_" << try_idx << "_" << catch_idx;
795 
796         LabelTable::iterator it = label_table->find(handler_begin_idx);
797         if (it == label_table->end()) {
798             catch_block_pa->catch_begin_label = ss.str();
799             label_table->insert(std::pair<size_t, std::string>(handler_begin_idx, ss.str()));
800         } else {
801             catch_block_pa->catch_begin_label = it->second;
802         }
803     }
804 
805     if (!handler_end_offset_in_range || !handler_end_offset_valid) {
806         LOG(ERROR, DISASSEMBLER) << "> invalid catch block end offset! address is: 0x" << std::hex
807                                  << handler_end_bc_ins.GetAddress();
808         return false;
809     } else if (handler_end_present) {
810         std::stringstream ss {};
811         ss << "handler_end_label_" << try_idx << "_" << catch_idx;
812 
813         LabelTable::iterator it = label_table->find(handler_end_idx);
814         if (it == label_table->end()) {
815             catch_block_pa->catch_end_label = ss.str();
816             label_table->insert(std::pair<size_t, std::string>(handler_end_idx, ss.str()));
817         } else {
818             catch_block_pa->catch_end_label = it->second;
819         }
820     }
821 
822     return true;
823 }
824 
GetMetaData(pandasm::Function * method,const panda_file::File::EntityId & method_id) const825 void Disassembler::GetMetaData(pandasm::Function *method, const panda_file::File::EntityId &method_id) const
826 {
827     LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nmethod id: " << method_id << " (0x" << std::hex << method_id
828                              << ")";
829 
830     if (method == nullptr) {
831         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
832 
833         return;
834     }
835 
836     panda_file::MethodDataAccessor method_accessor(*file_, method_id);
837 
838     const auto method_name_raw = StringDataToString(file_->GetStringData(method_accessor.GetNameId()));
839 
840     if (!method_accessor.IsStatic()) {
841         const auto class_name = StringDataToString(file_->GetStringData(method_accessor.GetClassId()));
842         auto this_type = pandasm::Type::FromDescriptor(class_name);
843 
844         LOG(DEBUG, DISASSEMBLER) << "method (raw: \'" << method_name_raw
845                                  << "\') is not static. emplacing self-argument of type " << this_type.GetName();
846 
847         method->params.insert(method->params.begin(), pandasm::Function::Parameter(this_type, file_language_));
848     } else {
849         method->metadata->SetAttribute("static");
850     }
851 
852     if (file_->IsExternal(method_accessor.GetMethodId())) {
853         method->metadata->SetAttribute("external");
854     }
855 
856     std::string ctor_name = panda::panda_file::GetCtorName(file_language_);
857     std::string cctor_name = panda::panda_file::GetCctorName(file_language_);
858 
859     const bool is_ctor = (method_name_raw == ctor_name);
860     const bool is_cctor = (method_name_raw == cctor_name);
861 
862     if (is_ctor) {
863         method->metadata->SetAttribute("ctor");
864         method->name.replace(method->name.find(ctor_name), ctor_name.length(), "_ctor_");
865     } else if (is_cctor) {
866         method->metadata->SetAttribute("cctor");
867         method->name.replace(method->name.find(cctor_name), cctor_name.length(), "_cctor_");
868     }
869 }
870 
GetMetaData(pandasm::Record * record,const panda_file::File::EntityId & record_id) const871 void Disassembler::GetMetaData(pandasm::Record *record, const panda_file::File::EntityId &record_id) const
872 {
873     LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nrecord id: " << record_id << " (0x" << std::hex << record_id
874                              << ")";
875 
876     if (record == nullptr) {
877         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but record ptr expected!";
878 
879         return;
880     }
881 
882     if (file_->IsExternal(record_id)) {
883         record->metadata->SetAttribute("external");
884     }
885 }
886 
GetMetaData(pandasm::Field * field,const panda_file::File::EntityId & field_id)887 void Disassembler::GetMetaData(pandasm::Field *field, const panda_file::File::EntityId &field_id)
888 {
889     LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nfield id: " << field_id << " (0x" << std::hex << field_id << ")";
890 
891     if (field == nullptr) {
892         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
893 
894         return;
895     }
896 
897     panda_file::FieldDataAccessor field_accessor(*file_, field_id);
898 
899     if (field_accessor.IsExternal()) {
900         field->metadata->SetAttribute("external");
901     }
902 
903     if (field_accessor.IsStatic()) {
904         field->metadata->SetAttribute("static");
905     }
906 
907     if (field->type.GetId() == panda_file::Type::TypeId::U32) {
908         const auto offset = field_accessor.GetValue<uint32_t>().value();
909         static const std::string TYPE_SUMMARY_FIELD_NAME = "typeSummaryOffset";
910         if (field->name != TYPE_SUMMARY_FIELD_NAME) {
911             LOG(DEBUG, DISASSEMBLER) << "Module literalarray " << field->name << " at offset 0x" << std::hex << offset
912                                      << " is excluded";
913             module_literals_.insert(offset);
914         }
915         field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::U32>(offset));
916     }
917     if (field->type.GetId() == panda_file::Type::TypeId::U8) {
918         const auto val = field_accessor.GetValue<uint8_t>().value();
919         field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::U8>(val));
920     }
921 }
922 
AnnotationTagToString(const char tag) const923 std::string Disassembler::AnnotationTagToString(const char tag) const
924 {
925     switch (tag) {
926         case '1':
927             return "u1";
928         case '2':
929             return "i8";
930         case '3':
931             return "u8";
932         case '4':
933             return "i16";
934         case '5':
935             return "u16";
936         case '6':
937             return "i32";
938         case '7':
939             return "u32";
940         case '8':
941             return "i64";
942         case '9':
943             return "u64";
944         case 'A':
945             return "f32";
946         case 'B':
947             return "f64";
948         case 'C':
949             return "string";
950         case 'D':
951             return "record";
952         case 'E':
953             return "method";
954         case 'F':
955             return "enum";
956         case 'G':
957             return "annotation";
958         case 'I':
959             return "void";
960         case 'J':
961             return "method_handle";
962         case 'K':
963             return "u1[]";
964         case 'L':
965             return "i8[]";
966         case 'M':
967             return "u8[]";
968         case 'N':
969             return "i16[]";
970         case 'O':
971             return "u16[]";
972         case 'P':
973             return "i32[]";
974         case 'Q':
975             return "u32[]";
976         case 'R':
977             return "i64[]";
978         case 'S':
979             return "u64[]";
980         case 'T':
981             return "f32[]";
982         case 'U':
983             return "f64[]";
984         case 'V':
985             return "string[]";
986         case 'W':
987             return "record[]";
988         case 'X':
989             return "method[]";
990         case 'Y':
991             return "enum[]";
992         case 'Z':
993             return "annotation[]";
994         case '@':
995             return "method_handle[]";
996         case '*':
997             return "nullptr string";
998         default:
999             return std::string();
1000     }
1001 }
1002 
ScalarValueToString(const panda_file::ScalarValue & value,const std::string & type)1003 std::string Disassembler::ScalarValueToString(const panda_file::ScalarValue &value, const std::string &type)
1004 {
1005     std::stringstream ss;
1006 
1007     if (type == "i8") {
1008         int8_t res = value.Get<int8_t>();
1009         ss << static_cast<int>(res);
1010     } else if (type == "u1" || type == "u8") {
1011         uint8_t res = value.Get<uint8_t>();
1012         ss << static_cast<unsigned int>(res);
1013     } else if (type == "i16") {
1014         ss << value.Get<int16_t>();
1015     } else if (type == "u16") {
1016         ss << value.Get<uint16_t>();
1017     } else if (type == "i32") {
1018         ss << value.Get<int32_t>();
1019     } else if (type == "u32") {
1020         ss << value.Get<uint32_t>();
1021     } else if (type == "i64") {
1022         ss << value.Get<int64_t>();
1023     } else if (type == "u64") {
1024         ss << value.Get<uint64_t>();
1025     } else if (type == "f32") {
1026         ss << value.Get<float>();
1027     } else if (type == "f64") {
1028         ss << value.Get<double>();
1029     } else if (type == "string") {
1030         const auto id = value.Get<panda_file::File::EntityId>();
1031         ss << "\"" << StringDataToString(file_->GetStringData(id)) << "\"";
1032     } else if (type == "record") {
1033         const auto id = value.Get<panda_file::File::EntityId>();
1034         ss << GetFullRecordName(id);
1035     } else if (type == "method") {
1036         const auto id = value.Get<panda_file::File::EntityId>();
1037         AddMethodToTables(id);
1038         ss << GetMethodSignature(id);
1039     } else if (type == "enum") {
1040         const auto id = value.Get<panda_file::File::EntityId>();
1041         panda_file::FieldDataAccessor field_accessor(*file_, id);
1042         ss << GetFullRecordName(field_accessor.GetClassId()) << "."
1043            << StringDataToString(file_->GetStringData(field_accessor.GetNameId()));
1044     } else if (type == "annotation") {
1045         const auto id = value.Get<panda_file::File::EntityId>();
1046         ss << "id_" << id;
1047     } else if (type == "void") {
1048         return std::string();
1049     } else if (type == "method_handle") {
1050     }
1051 
1052     return ss.str();
1053 }
1054 
ArrayValueToString(const panda_file::ArrayValue & value,const std::string & type,const size_t idx)1055 std::string Disassembler::ArrayValueToString(const panda_file::ArrayValue &value, const std::string &type,
1056                                              const size_t idx)
1057 {
1058     std::stringstream ss;
1059 
1060     if (type == "i8") {
1061         int8_t res = value.Get<int8_t>(idx);
1062         ss << static_cast<int>(res);
1063     } else if (type == "u1" || type == "u8") {
1064         uint8_t res = value.Get<uint8_t>(idx);
1065         ss << static_cast<unsigned int>(res);
1066     } else if (type == "i16") {
1067         ss << value.Get<int16_t>(idx);
1068     } else if (type == "u16") {
1069         ss << value.Get<uint16_t>(idx);
1070     } else if (type == "i32") {
1071         ss << value.Get<int32_t>(idx);
1072     } else if (type == "u32") {
1073         ss << value.Get<uint32_t>(idx);
1074     } else if (type == "i64") {
1075         ss << value.Get<int64_t>(idx);
1076     } else if (type == "u64") {
1077         ss << value.Get<uint64_t>(idx);
1078     } else if (type == "f32") {
1079         ss << value.Get<float>(idx);
1080     } else if (type == "f64") {
1081         ss << value.Get<double>(idx);
1082     } else if (type == "string") {
1083         const auto id = value.Get<panda_file::File::EntityId>(idx);
1084         ss << '\"' << StringDataToString(file_->GetStringData(id)) << '\"';
1085     } else if (type == "record") {
1086         const auto id = value.Get<panda_file::File::EntityId>(idx);
1087         ss << GetFullRecordName(id);
1088     } else if (type == "method") {
1089         const auto id = value.Get<panda_file::File::EntityId>(idx);
1090         AddMethodToTables(id);
1091         ss << GetMethodSignature(id);
1092     } else if (type == "enum") {
1093         const auto id = value.Get<panda_file::File::EntityId>(idx);
1094         panda_file::FieldDataAccessor field_accessor(*file_, id);
1095         ss << GetFullRecordName(field_accessor.GetClassId()) << "."
1096            << StringDataToString(file_->GetStringData(field_accessor.GetNameId()));
1097     } else if (type == "annotation") {
1098         const auto id = value.Get<panda_file::File::EntityId>(idx);
1099         ss << "id_" << id;
1100     } else if (type == "method_handle") {
1101     } else if (type == "nullptr string") {
1102     }
1103 
1104     return ss.str();
1105 }
1106 
GetFullMethodName(const panda_file::File::EntityId & method_id) const1107 std::string Disassembler::GetFullMethodName(const panda_file::File::EntityId &method_id) const
1108 {
1109     panda::panda_file::MethodDataAccessor method_accessor(*file_, method_id);
1110 
1111     const auto method_name_raw = StringDataToString(file_->GetStringData(method_accessor.GetNameId()));
1112 
1113     std::string class_name = GetFullRecordName(method_accessor.GetClassId());
1114     if (IsSystemType(class_name)) {
1115         class_name = "";
1116     } else {
1117         class_name += ".";
1118     }
1119 
1120     return class_name + method_name_raw;
1121 }
1122 
GetMethodSignature(const panda_file::File::EntityId & method_id) const1123 std::string Disassembler::GetMethodSignature(const panda_file::File::EntityId &method_id) const
1124 {
1125     panda::panda_file::MethodDataAccessor method_accessor(*file_, method_id);
1126 
1127     pandasm::Function method(GetFullMethodName(method_id), file_language_);
1128     GetParams(&method, method_accessor.GetProtoId());
1129     GetMetaData(&method, method_id);
1130 
1131     return pandasm::GetFunctionSignatureFromName(method.name, method.params);
1132 }
1133 
GetFullRecordName(const panda_file::File::EntityId & class_id) const1134 std::string Disassembler::GetFullRecordName(const panda_file::File::EntityId &class_id) const
1135 {
1136     std::string name = StringDataToString(file_->GetStringData(class_id));
1137 
1138     auto type = pandasm::Type::FromDescriptor(name);
1139     type = pandasm::Type(type.GetComponentName(), type.GetRank());
1140 
1141     return type.GetPandasmName();
1142 }
1143 
GetRecordInfo(const panda_file::File::EntityId & record_id,RecordInfo * record_info) const1144 void Disassembler::GetRecordInfo(const panda_file::File::EntityId &record_id, RecordInfo *record_info) const
1145 {
1146     constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
1147 
1148     if (file_->IsExternal(record_id)) {
1149         return;
1150     }
1151 
1152     panda_file::ClassDataAccessor class_accessor {*file_, record_id};
1153     std::stringstream ss;
1154 
1155     ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1156        << class_accessor.GetClassId() << ", size: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH)
1157        << class_accessor.GetSize() << " (" << std::dec << class_accessor.GetSize() << ")";
1158 
1159     record_info->record_info = ss.str();
1160     ss.str(std::string());
1161 
1162     class_accessor.EnumerateFields([&](panda_file::FieldDataAccessor &field_accessor) -> void {
1163         ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1164            << field_accessor.GetFieldId();
1165 
1166         record_info->fields_info.push_back(ss.str());
1167 
1168         ss.str(std::string());
1169     });
1170 }
1171 
GetMethodInfo(const panda_file::File::EntityId & method_id,MethodInfo * method_info) const1172 void Disassembler::GetMethodInfo(const panda_file::File::EntityId &method_id, MethodInfo *method_info) const
1173 {
1174     constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
1175 
1176     panda_file::MethodDataAccessor method_accessor {*file_, method_id};
1177     std::stringstream ss;
1178 
1179     ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1180        << method_accessor.GetMethodId();
1181 
1182     if (method_accessor.GetCodeId().has_value()) {
1183         ss << ", code offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1184            << method_accessor.GetCodeId().value();
1185 
1186         GetInsInfo(method_accessor.GetCodeId().value(), method_info);
1187     } else {
1188         ss << ", <no code>";
1189     }
1190 
1191     method_info->method_info = ss.str();
1192 
1193     if (method_accessor.GetCodeId()) {
1194         ASSERT(debug_info_extractor_ != nullptr);
1195         method_info->line_number_table = debug_info_extractor_->GetLineNumberTable(method_id);
1196         method_info->column_number_table = debug_info_extractor_->GetColumnNumberTable(method_id);
1197         method_info->local_variable_table = debug_info_extractor_->GetLocalVariableTable(method_id);
1198 
1199         // Add information about parameters into the table
1200         panda_file::CodeDataAccessor codeda(*file_, method_accessor.GetCodeId().value());
1201         auto arg_idx = static_cast<int32_t>(codeda.GetNumVregs());
1202         uint32_t code_size = codeda.GetCodeSize();
1203         for (auto info : debug_info_extractor_->GetParameterInfo(method_id)) {
1204             panda_file::LocalVariableInfo arg_info {info.name, info.signature, "", arg_idx++, 0, code_size};
1205             method_info->local_variable_table.emplace_back(arg_info);
1206         }
1207     }
1208 }
1209 
IsArray(const panda_file::LiteralTag & tag)1210 static bool IsArray(const panda_file::LiteralTag &tag)
1211 {
1212     switch (tag) {
1213         case panda_file::LiteralTag::ARRAY_U1:
1214         case panda_file::LiteralTag::ARRAY_U8:
1215         case panda_file::LiteralTag::ARRAY_I8:
1216         case panda_file::LiteralTag::ARRAY_U16:
1217         case panda_file::LiteralTag::ARRAY_I16:
1218         case panda_file::LiteralTag::ARRAY_U32:
1219         case panda_file::LiteralTag::ARRAY_I32:
1220         case panda_file::LiteralTag::ARRAY_U64:
1221         case panda_file::LiteralTag::ARRAY_I64:
1222         case panda_file::LiteralTag::ARRAY_F32:
1223         case panda_file::LiteralTag::ARRAY_F64:
1224         case panda_file::LiteralTag::ARRAY_STRING:
1225             return true;
1226         default:
1227             return false;
1228     }
1229 }
1230 
SerializeLiteralArray(const pandasm::LiteralArray & lit_array) const1231 std::string Disassembler::SerializeLiteralArray(const pandasm::LiteralArray &lit_array) const
1232 {
1233     std::stringstream ret;
1234     if (lit_array.literals_.empty()) {
1235         return "";
1236     }
1237 
1238     std::stringstream ss;
1239     ss << "{ ";
1240     const auto &tag = lit_array.literals_[0].tag_;
1241     if (IsArray(tag)) {
1242         ss << LiteralTagToString(tag);
1243     }
1244     ss << lit_array.literals_.size();
1245     ss << " [ ";
1246     SerializeValues(lit_array, ss);
1247     ss << "]}";
1248     return ss.str();
1249 }
1250 
Serialize(const std::string & key,const pandasm::LiteralArray & lit_array,std::ostream & os) const1251 void Disassembler::Serialize(const std::string &key, const pandasm::LiteralArray &lit_array, std::ostream &os) const
1252 {
1253     os << key << " ";
1254     os << SerializeLiteralArray(lit_array);
1255     os << "\n";
1256 }
1257 
Serialize(const std::string & module_offset,const std::vector<std::string> & module_array,std::ostream & os) const1258 void Disassembler::Serialize(const std::string &module_offset, const std::vector<std::string> &module_array,
1259                              std::ostream &os) const
1260 {
1261     os << module_offset << " ";
1262     os << SerializeModuleLiteralArray(module_array);
1263     os << "\n";
1264 }
1265 
SerializeModuleLiteralArray(const std::vector<std::string> & module_array) const1266 std::string Disassembler::SerializeModuleLiteralArray(const std::vector<std::string> &module_array) const
1267 {
1268     if (module_array.empty()) {
1269         return "";
1270     }
1271 
1272     std::stringstream ss;
1273     ss << "{ ";
1274     ss << module_array.size();
1275     ss << " [ ";
1276     for (size_t index = 0; index < module_array.size(); index++) {
1277         ss << module_array[index] << "; ";
1278     }
1279     ss << "]}";
1280     return ss.str();
1281 }
1282 
LiteralTagToString(const panda_file::LiteralTag & tag) const1283 std::string Disassembler::LiteralTagToString(const panda_file::LiteralTag &tag) const
1284 {
1285     switch (tag) {
1286         case panda_file::LiteralTag::BOOL:
1287         case panda_file::LiteralTag::ARRAY_U1:
1288             return "u1";
1289         case panda_file::LiteralTag::ARRAY_U8:
1290             return "u8";
1291         case panda_file::LiteralTag::ARRAY_I8:
1292             return "i8";
1293         case panda_file::LiteralTag::ARRAY_U16:
1294             return "u16";
1295         case panda_file::LiteralTag::ARRAY_I16:
1296             return "i16";
1297         case panda_file::LiteralTag::ARRAY_U32:
1298             return "u32";
1299         case panda_file::LiteralTag::INTEGER:
1300         case panda_file::LiteralTag::ARRAY_I32:
1301             return "i32";
1302         case panda_file::LiteralTag::ARRAY_U64:
1303             return "u64";
1304         case panda_file::LiteralTag::ARRAY_I64:
1305             return "i64";
1306         case panda_file::LiteralTag::ARRAY_F32:
1307             return "f32";
1308         case panda_file::LiteralTag::DOUBLE:
1309         case panda_file::LiteralTag::ARRAY_F64:
1310             return "f64";
1311         case panda_file::LiteralTag::STRING:
1312         case panda_file::LiteralTag::ARRAY_STRING:
1313             return "string";
1314         case panda_file::LiteralTag::METHOD:
1315             return "method";
1316         case panda_file::LiteralTag::GETTER:
1317             return "getter";
1318         case panda_file::LiteralTag::SETTER:
1319             return "setter";
1320         case panda_file::LiteralTag::GENERATORMETHOD:
1321             return "generator_method";
1322         case panda_file::LiteralTag::ACCESSOR:
1323             return "accessor";
1324         case panda_file::LiteralTag::METHODAFFILIATE:
1325             return "method_affiliate";
1326         case panda_file::LiteralTag::NULLVALUE:
1327             return "null_value";
1328         case panda_file::LiteralTag::TAGVALUE:
1329             return "tagvalue";
1330         case panda_file::LiteralTag::LITERALBUFFERINDEX:
1331             return "lit_index";
1332         case panda_file::LiteralTag::LITERALARRAY:
1333             return "lit_offset";
1334         case panda_file::LiteralTag::BUILTINTYPEINDEX:
1335             return "builtin_type";
1336         default:
1337             UNREACHABLE();
1338     }
1339 }
1340 
1341 template <typename T>
SerializeValues(const pandasm::LiteralArray & lit_array,T & os) const1342 void Disassembler::SerializeValues(const pandasm::LiteralArray &lit_array, T &os) const
1343 {
1344     switch (lit_array.literals_[0].tag_) {
1345         case panda_file::LiteralTag::ARRAY_U1: {
1346             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1347                 os << std::get<bool>(lit_array.literals_[i].value_) << " ";
1348             }
1349             break;
1350         }
1351         case panda_file::LiteralTag::ARRAY_U8: {
1352             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1353                 os << static_cast<uint16_t>(std::get<uint8_t>(lit_array.literals_[i].value_)) << " ";
1354             }
1355             break;
1356         }
1357         case panda_file::LiteralTag::ARRAY_I8: {
1358             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1359                 os << static_cast<int16_t>(bit_cast<int8_t>(std::get<uint8_t>(lit_array.literals_[i].value_))) << " ";
1360             }
1361             break;
1362         }
1363         case panda_file::LiteralTag::ARRAY_U16: {
1364             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1365                 os << std::get<uint16_t>(lit_array.literals_[i].value_) << " ";
1366             }
1367             break;
1368         }
1369         case panda_file::LiteralTag::ARRAY_I16: {
1370             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1371                 os << bit_cast<int16_t>(std::get<uint16_t>(lit_array.literals_[i].value_)) << " ";
1372             }
1373             break;
1374         }
1375         case panda_file::LiteralTag::ARRAY_U32: {
1376             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1377                 os << std::get<uint32_t>(lit_array.literals_[i].value_) << " ";
1378             }
1379             break;
1380         }
1381         case panda_file::LiteralTag::ARRAY_I32: {
1382             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1383                 os << bit_cast<int32_t>(std::get<uint32_t>(lit_array.literals_[i].value_)) << " ";
1384             }
1385             break;
1386         }
1387         case panda_file::LiteralTag::ARRAY_U64: {
1388             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1389                 os << std::get<uint64_t>(lit_array.literals_[i].value_) << " ";
1390             }
1391             break;
1392         }
1393         case panda_file::LiteralTag::ARRAY_I64: {
1394             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1395                 os << bit_cast<int64_t>(std::get<uint64_t>(lit_array.literals_[i].value_)) << " ";
1396             }
1397             break;
1398         }
1399         case panda_file::LiteralTag::ARRAY_F32: {
1400             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1401                 os << std::get<float>(lit_array.literals_[i].value_) << " ";
1402             }
1403             break;
1404         }
1405         case panda_file::LiteralTag::ARRAY_F64: {
1406             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1407                 os << std::get<double>(lit_array.literals_[i].value_) << " ";
1408             }
1409             break;
1410         }
1411         case panda_file::LiteralTag::ARRAY_STRING: {
1412             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1413                 os << "\"" << std::get<std::string>(lit_array.literals_[i].value_) << "\" ";
1414             }
1415             break;
1416         }
1417         default:
1418             SerializeLiterals(lit_array, os);
1419     }
1420 }
1421 
1422 template <typename T>
SerializeLiterals(const pandasm::LiteralArray & lit_array,T & os) const1423 void Disassembler::SerializeLiterals(const pandasm::LiteralArray &lit_array, T &os) const
1424 {
1425     for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1426         const auto &tag = lit_array.literals_[i].tag_;
1427         os << LiteralTagToString(tag) << ":";
1428         const auto &val = lit_array.literals_[i].value_;
1429         switch (lit_array.literals_[i].tag_) {
1430             case panda_file::LiteralTag::BOOL: {
1431                 os << std::get<bool>(val);
1432                 break;
1433             }
1434             case panda_file::LiteralTag::LITERALBUFFERINDEX:
1435             case panda_file::LiteralTag::INTEGER: {
1436                 os << bit_cast<int32_t>(std::get<uint32_t>(val));
1437                 break;
1438             }
1439             case panda_file::LiteralTag::DOUBLE: {
1440                 os << std::get<double>(val);
1441                 break;
1442             }
1443             case panda_file::LiteralTag::STRING: {
1444                 os << "\"" << std::get<std::string>(val) << "\"";
1445                 break;
1446             }
1447             case panda_file::LiteralTag::METHOD:
1448             case panda_file::LiteralTag::GETTER:
1449             case panda_file::LiteralTag::SETTER:
1450             case panda_file::LiteralTag::GENERATORMETHOD: {
1451                 os << std::get<std::string>(val);
1452                 break;
1453             }
1454             case panda_file::LiteralTag::NULLVALUE:
1455             case panda_file::LiteralTag::ACCESSOR: {
1456                 os << static_cast<int16_t>(bit_cast<int8_t>(std::get<uint8_t>(val)));
1457                 break;
1458             }
1459             case panda_file::LiteralTag::METHODAFFILIATE: {
1460                 os << std::get<uint16_t>(val);
1461                 break;
1462             }
1463             case panda_file::LiteralTag::LITERALARRAY: {
1464                 os << std::get<std::string>(val);
1465                 break;
1466             }
1467             case panda_file::LiteralTag::BUILTINTYPEINDEX: {
1468                 os << static_cast<int16_t>(std::get<uint8_t>(val));
1469                 break;
1470             }
1471             default:
1472                 UNREACHABLE();
1473         }
1474         os << ", ";
1475     }
1476 }
1477 
Serialize(const pandasm::Record & record,std::ostream & os,bool print_information) const1478 void Disassembler::Serialize(const pandasm::Record &record, std::ostream &os, bool print_information) const
1479 {
1480     if (IsSystemType(record.name)) {
1481         return;
1482     }
1483 
1484     os << ".record " << record.name;
1485 
1486     const auto record_iter = prog_ann_.record_annotations.find(record.name);
1487     const bool record_in_table = record_iter != prog_ann_.record_annotations.end();
1488     if (record_in_table) {
1489         Serialize(*record.metadata, record_iter->second.ann_list, os);
1490     } else {
1491         Serialize(*record.metadata, {}, os);
1492     }
1493 
1494     if (record.metadata->IsForeign()) {
1495         os << "\n\n";
1496         return;
1497     }
1498 
1499     os << " {";
1500 
1501     if (print_information && prog_info_.records_info.find(record.name) != prog_info_.records_info.end()) {
1502         os << " # " << prog_info_.records_info.at(record.name).record_info << "\n";
1503         SerializeFields(record, os, true);
1504     } else {
1505         os << "\n";
1506         SerializeFields(record, os, false);
1507     }
1508 
1509     os << "}\n\n";
1510 }
1511 
SerializeFields(const pandasm::Record & record,std::ostream & os,bool print_information) const1512 void Disassembler::SerializeFields(const pandasm::Record &record, std::ostream &os, bool print_information) const
1513 {
1514     constexpr size_t INFO_OFFSET = 80;
1515 
1516     const auto record_iter = prog_ann_.record_annotations.find(record.name);
1517     const bool record_in_table = record_iter != prog_ann_.record_annotations.end();
1518 
1519     const auto rec_inf = (print_information) ? (prog_info_.records_info.at(record.name)) : (RecordInfo {});
1520 
1521     size_t field_idx = 0;
1522 
1523     std::stringstream ss;
1524     for (const auto &f : record.field_list) {
1525         std::string file = GetFileNameByAbsolutePath(f.name);
1526         ss << "\t" << f.type.GetPandasmName() << " " << file;
1527         if (f.metadata->GetValue().has_value()) {
1528             if (f.type.GetId() == panda_file::Type::TypeId::U32) {
1529                 ss << " = 0x" << std::hex << f.metadata->GetValue().value().GetValue<uint32_t>();
1530             }
1531             if (f.type.GetId() == panda_file::Type::TypeId::U8) {
1532                 ss << " = 0x" << std::hex << static_cast<uint32_t>(f.metadata->GetValue().value().GetValue<uint8_t>());
1533             }
1534         }
1535         if (record_in_table) {
1536             const auto field_iter = record_iter->second.field_annotations.find(f.name);
1537             if (field_iter != record_iter->second.field_annotations.end()) {
1538                 Serialize(*f.metadata, field_iter->second, ss);
1539             } else {
1540                 Serialize(*f.metadata, {}, ss);
1541             }
1542         } else {
1543             Serialize(*f.metadata, {}, ss);
1544         }
1545 
1546         if (print_information) {
1547             os << std::setw(INFO_OFFSET) << std::left << ss.str() << " # " << rec_inf.fields_info.at(field_idx) << "\n";
1548         } else {
1549             os << ss.str() << "\n";
1550         }
1551 
1552         ss.str(std::string());
1553         ss.clear();
1554 
1555         field_idx++;
1556     }
1557 }
1558 
SerializeMethodAnnotations(const pandasm::Function & method,std::ostream & os) const1559 void Disassembler::SerializeMethodAnnotations(const pandasm::Function &method, std::ostream &os) const
1560 {
1561     const auto annotations = method.metadata->GetAnnotations();
1562     if (annotations.empty()) {
1563         return;
1564     }
1565 
1566     for (const auto &ann : annotations) {
1567         os << ann.GetName() << ":\n";
1568         std::stringstream ss;
1569         std::vector<pandasm::AnnotationElement> elements = ann.GetElements();
1570         if (elements.empty()) {
1571             continue;
1572         }
1573         uint32_t idx = elements.size() - 1;
1574         ss << "\t" << "u32" << " " << elements.back().GetName() << " { ";
1575         for (const auto &elem : elements) {
1576             ss << "0x" << std::hex << elem.GetValue()->GetAsScalar()->GetValue<uint32_t>();
1577             if (idx > 0) {
1578                 ss << ", ";
1579             }
1580             --idx;
1581         }
1582         ss << " }";
1583         os << ss.str() << "\n";
1584     }
1585 }
1586 
Serialize(const pandasm::Function & method,std::ostream & os,bool print_information) const1587 void Disassembler::Serialize(const pandasm::Function &method, std::ostream &os, bool print_information) const
1588 {
1589     SerializeMethodAnnotations(method, os);
1590     os << ".function " << method.return_type.GetPandasmName() << " " << method.name << "(";
1591 
1592     if (method.params.size() > 0) {
1593         os << method.params[0].type.GetPandasmName() << " a0";
1594 
1595         for (uint8_t i = 1; i < method.params.size(); i++) {
1596             os << ", " << method.params[i].type.GetPandasmName() << " a" << (size_t)i;
1597         }
1598     }
1599     os << ")";
1600 
1601     const std::string signature = pandasm::GetFunctionSignatureFromName(method.name, method.params);
1602 
1603     const auto method_iter = prog_ann_.method_annotations.find(signature);
1604     if (method_iter != prog_ann_.method_annotations.end()) {
1605         Serialize(*method.metadata, method_iter->second, os);
1606     } else {
1607         Serialize(*method.metadata, {}, os);
1608     }
1609 
1610     auto method_info_it = prog_info_.methods_info.find(signature);
1611     bool print_method_info = print_information && method_info_it != prog_info_.methods_info.end();
1612     if (print_method_info) {
1613         const MethodInfo &method_info = method_info_it->second;
1614 
1615         size_t width = 0;
1616         for (const auto &i : method.ins) {
1617             if (i.ToString().size() > width) {
1618                 width = i.ToString().size();
1619             }
1620         }
1621 
1622         os << " { # " << method_info.method_info << "\n#   CODE:\n";
1623 
1624         for (size_t i = 0; i < method.ins.size(); i++) {
1625             os << "\t" << std::setw(width) << std::left << method.ins.at(i).ToString("", true, method.regs_num) << " # "
1626                << method_info.instructions_info.at(i) << "\n";
1627         }
1628     } else {
1629         os << " {\n";
1630 
1631         for (const auto &i : method.ins) {
1632             if (i.set_label) {
1633                 std::string ins = i.ToString("", true, method.regs_num);
1634                 std::string delim = ": ";
1635                 size_t pos = ins.find(delim);
1636                 std::string label = ins.substr(0, pos);
1637                 ins.erase(0, pos + delim.length());
1638                 os << label << ":\n\t" << ins << "\n";
1639             } else {
1640                 os << "\t" << i.ToString("", true, method.regs_num) << "\n";
1641             }
1642         }
1643     }
1644 
1645     if (method.catch_blocks.size() != 0) {
1646         os << "\n";
1647 
1648         for (const auto &catch_block : method.catch_blocks) {
1649             Serialize(catch_block, os);
1650 
1651             os << "\n";
1652         }
1653     }
1654 
1655     if (print_method_info) {
1656         const MethodInfo &method_info = method_info_it->second;
1657         SerializeLineNumberTable(method_info.line_number_table, os);
1658         SerializeColumnNumberTable(method_info.column_number_table, os);
1659         SerializeLocalVariableTable(method_info.local_variable_table, method, os);
1660     }
1661 
1662     os << "}\n\n";
1663 }
1664 
SerializeStrings(const panda_file::File::EntityId & offset,const std::string & name_value,std::ostream & os) const1665 void Disassembler::SerializeStrings(const panda_file::File::EntityId &offset, const std::string &name_value,
1666                                     std::ostream &os) const
1667 {
1668     os << "[offset:0x" << std::hex <<offset<< ", name_value:" << name_value<< "]" <<std::endl;
1669 }
1670 
Serialize(const pandasm::Function::CatchBlock & catch_block,std::ostream & os) const1671 void Disassembler::Serialize(const pandasm::Function::CatchBlock &catch_block, std::ostream &os) const
1672 {
1673     if (catch_block.exception_record == "") {
1674         os << ".catchall ";
1675     } else {
1676         os << ".catch " << catch_block.exception_record << ", ";
1677     }
1678 
1679     os << catch_block.try_begin_label << ", " << catch_block.try_end_label << ", " << catch_block.catch_begin_label;
1680 
1681     if (catch_block.catch_end_label != "") {
1682         os << ", " << catch_block.catch_end_label;
1683     }
1684 }
1685 
Serialize(const pandasm::ItemMetadata & meta,const AnnotationList & ann_list,std::ostream & os) const1686 void Disassembler::Serialize(const pandasm::ItemMetadata &meta, const AnnotationList &ann_list, std::ostream &os) const
1687 {
1688     auto bool_attributes = meta.GetBoolAttributes();
1689     auto attributes = meta.GetAttributes();
1690     if (bool_attributes.empty() && attributes.empty() && ann_list.empty()) {
1691         return;
1692     }
1693 
1694     os << " <";
1695 
1696     size_t size = bool_attributes.size();
1697     size_t idx = 0;
1698     for (const auto &attr : bool_attributes) {
1699         os << attr;
1700         ++idx;
1701 
1702         if (!attributes.empty() || !ann_list.empty() || idx < size) {
1703             os << ", ";
1704         }
1705     }
1706 
1707     size = attributes.size();
1708     idx = 0;
1709     for (const auto &[key, values] : attributes) {
1710         for (size_t i = 0; i < values.size(); i++) {
1711             os << key << "=" << values[i];
1712 
1713             if (i < values.size() - 1) {
1714                 os << ", ";
1715             }
1716         }
1717 
1718         ++idx;
1719 
1720         if (!ann_list.empty() || idx < size) {
1721             os << ", ";
1722         }
1723     }
1724 
1725     size = ann_list.size();
1726     idx = 0;
1727     for (const auto &[key, value] : ann_list) {
1728         os << key << "=" << value;
1729 
1730         ++idx;
1731 
1732         if (idx < size) {
1733             os << ", ";
1734         }
1735     }
1736 
1737     os << ">";
1738 }
1739 
SerializeLineNumberTable(const panda_file::LineNumberTable & line_number_table,std::ostream & os) const1740 void Disassembler::SerializeLineNumberTable(const panda_file::LineNumberTable &line_number_table,
1741                                             std::ostream &os) const
1742 {
1743     if (line_number_table.empty()) {
1744         return;
1745     }
1746 
1747     os << "\n#   LINE_NUMBER_TABLE:\n";
1748     for (const auto &line_info : line_number_table) {
1749         os << "#\tline " << line_info.line << ": " << line_info.offset << "\n";
1750     }
1751 }
1752 
SerializeColumnNumberTable(const panda_file::ColumnNumberTable & column_number_table,std::ostream & os) const1753 void Disassembler::SerializeColumnNumberTable(const panda_file::ColumnNumberTable &column_number_table,
1754                                               std::ostream &os) const
1755 {
1756     if (column_number_table.empty()) {
1757         return;
1758     }
1759 
1760     os << "\n#   COLUMN_NUMBER_TABLE:\n";
1761     for (const auto &column_info : column_number_table) {
1762         os << "#\tcolumn " << column_info.column << ": " << column_info.offset << "\n";
1763     }
1764 }
1765 
SerializeLocalVariableTable(const panda_file::LocalVariableTable & local_variable_table,const pandasm::Function & method,std::ostream & os) const1766 void Disassembler::SerializeLocalVariableTable(const panda_file::LocalVariableTable &local_variable_table,
1767                                                const pandasm::Function &method, std::ostream &os) const
1768 {
1769     if (local_variable_table.empty()) {
1770         return;
1771     }
1772 
1773     os << "\n#   LOCAL_VARIABLE_TABLE:\n";
1774     os << "#\t Start   End  Register           Name   Signature\n";
1775     const int START_WIDTH = 5;
1776     const int END_WIDTH = 4;
1777     const int REG_WIDTH = 8;
1778     const int NAME_WIDTH = 14;
1779     for (const auto &variable_info : local_variable_table) {
1780         std::ostringstream reg_stream;
1781         reg_stream << variable_info.reg_number << '(';
1782         if (variable_info.reg_number < 0) {
1783             reg_stream << "acc";
1784         } else {
1785             uint32_t vreg = variable_info.reg_number;
1786             uint32_t first_arg_reg = method.GetTotalRegs();
1787             if (vreg < first_arg_reg) {
1788                 reg_stream << 'v' << vreg;
1789             } else {
1790                 reg_stream << 'a' << vreg - first_arg_reg;
1791             }
1792         }
1793         reg_stream << ')';
1794 
1795         os << "#\t " << std::setw(START_WIDTH) << std::right << variable_info.start_offset << "  ";
1796         os << std::setw(END_WIDTH) << std::right << variable_info.end_offset << "  ";
1797         os << std::setw(REG_WIDTH) << std::right << reg_stream.str() << " ";
1798         os << std::setw(NAME_WIDTH) << std::right << variable_info.name << "   " << variable_info.type;
1799         if (!variable_info.type_signature.empty() && variable_info.type_signature != variable_info.type) {
1800             os << " (" << variable_info.type_signature << ")";
1801         }
1802         os << "\n";
1803     }
1804 }
1805 
BytecodeOpcodeToPandasmOpcode(uint8_t o) const1806 pandasm::Opcode Disassembler::BytecodeOpcodeToPandasmOpcode(uint8_t o) const
1807 {
1808     return BytecodeOpcodeToPandasmOpcode(BytecodeInstruction::Opcode(o));
1809 }
1810 
IDToString(BytecodeInstruction bc_ins,panda_file::File::EntityId method_id,size_t idx) const1811 std::string Disassembler::IDToString(BytecodeInstruction bc_ins, panda_file::File::EntityId method_id,
1812                                      size_t idx) const
1813 {
1814     std::stringstream name;
1815     const auto offset = file_->ResolveOffsetByIndex(method_id, bc_ins.GetId(idx).AsIndex());
1816     std::string str_data = StringDataToString(file_->GetStringData(offset));
1817 
1818     if (bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::METHOD_ID)) {
1819         name << GetMethodSignature(offset);
1820     } else if (bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::STRING_ID)) {
1821         name << '\"';
1822         name << str_data;
1823         name << '\"';
1824         string_offset_to_name_.emplace(offset, str_data);
1825     } else {
1826         ASSERT(bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::LITERALARRAY_ID));
1827         pandasm::LiteralArray lit_array;
1828         GetLiteralArrayByOffset(&lit_array, panda_file::File::EntityId(offset));
1829         name << SerializeLiteralArray(lit_array);
1830     }
1831 
1832     return name.str();
1833 }
1834 
GetRecordLanguage(panda_file::File::EntityId class_id) const1835 panda::panda_file::SourceLang Disassembler::GetRecordLanguage(panda_file::File::EntityId class_id) const
1836 {
1837     if (file_->IsExternal(class_id)) {
1838         return panda::panda_file::SourceLang::PANDA_ASSEMBLY;
1839     }
1840 
1841     panda_file::ClassDataAccessor cda(*file_, class_id);
1842     return cda.GetSourceLang().value_or(panda_file::SourceLang::PANDA_ASSEMBLY);
1843 }
1844 
translateImmToLabel(pandasm::Ins * pa_ins,LabelTable * label_table,const uint8_t * ins_arr,BytecodeInstruction bc_ins,BytecodeInstruction bc_ins_last,panda_file::File::EntityId code_id)1845 static void translateImmToLabel(pandasm::Ins *pa_ins, LabelTable *label_table, const uint8_t *ins_arr,
1846                                 BytecodeInstruction bc_ins, BytecodeInstruction bc_ins_last,
1847                                 panda_file::File::EntityId code_id)
1848 {
1849     const int32_t jmp_offset = std::get<int64_t>(pa_ins->imms.at(0));
1850     const auto bc_ins_dest = bc_ins.JumpTo(jmp_offset);
1851     if (bc_ins_last.GetAddress() > bc_ins_dest.GetAddress()) {
1852         size_t idx = getBytecodeInstructionNumber(BytecodeInstruction(ins_arr), bc_ins_dest);
1853         if (idx != std::numeric_limits<size_t>::max()) {
1854             if (label_table->find(idx) == label_table->end()) {
1855                 std::stringstream ss {};
1856                 ss << "jump_label_" << label_table->size();
1857                 (*label_table)[idx] = ss.str();
1858             }
1859 
1860             pa_ins->imms.clear();
1861             pa_ins->ids.push_back(label_table->at(idx));
1862         } else {
1863             LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
1864                                      << "). incorrect instruction at offset: 0x" << (bc_ins.GetAddress() - ins_arr)
1865                                      << ": invalid jump offset 0x" << jmp_offset
1866                                      << " - jumping in the middle of another instruction!";
1867         }
1868     } else {
1869         LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
1870                                  << "). incorrect instruction at offset: 0x" << (bc_ins.GetAddress() - ins_arr)
1871                                  << ": invalid jump offset 0x" << jmp_offset << " - jumping out of bounds!";
1872     }
1873 }
1874 
GetInstructions(pandasm::Function * method,panda_file::File::EntityId method_id,panda_file::File::EntityId code_id) const1875 IdList Disassembler::GetInstructions(pandasm::Function *method, panda_file::File::EntityId method_id,
1876                                      panda_file::File::EntityId code_id) const
1877 {
1878     panda_file::CodeDataAccessor code_accessor(*file_, code_id);
1879 
1880     const auto ins_sz = code_accessor.GetCodeSize();
1881     const auto ins_arr = code_accessor.GetInstructions();
1882 
1883     method->regs_num = code_accessor.GetNumVregs();
1884 
1885     auto bc_ins = BytecodeInstruction(ins_arr);
1886     const auto bc_ins_last = bc_ins.JumpTo(ins_sz);
1887 
1888     LabelTable label_table = GetExceptions(method, method_id, code_id);
1889 
1890     IdList unknown_external_methods {};
1891 
1892     while (bc_ins.GetAddress() != bc_ins_last.GetAddress()) {
1893         if (bc_ins.GetAddress() > bc_ins_last.GetAddress()) {
1894             LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
1895                                      << "). bytecode instructions sequence corrupted for method " << method->name
1896                                      << "! went out of bounds";
1897 
1898             break;
1899         }
1900 
1901         auto pa_ins = BytecodeInstructionToPandasmInstruction(bc_ins, method_id);
1902         if (pa_ins.IsJump()) {
1903             translateImmToLabel(&pa_ins, &label_table, ins_arr, bc_ins, bc_ins_last, code_id);
1904         }
1905 
1906         // check if method id is unknown external method. if so, emplace it in table
1907         if (bc_ins.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) {
1908             const auto arg_method_idx = bc_ins.GetId().AsIndex();
1909             const auto arg_method_id = file_->ResolveMethodIndex(method_id, arg_method_idx);
1910 
1911             const auto arg_method_signature = GetMethodSignature(arg_method_id);
1912 
1913             const bool is_present = prog_.function_table.find(arg_method_signature) != prog_.function_table.cend();
1914             const bool is_external = file_->IsExternal(arg_method_id);
1915             if (is_external && !is_present) {
1916                 unknown_external_methods.push_back(arg_method_id);
1917             }
1918         }
1919 
1920         method->ins.push_back(pa_ins);
1921         bc_ins = bc_ins.GetNext();
1922     }
1923 
1924     for (const auto &pair : label_table) {
1925         method->ins[pair.first].label = pair.second;
1926         method->ins[pair.first].set_label = true;
1927     }
1928 
1929     return unknown_external_methods;
1930 }
1931 
GetColumnNumber()1932 std::vector<size_t> Disassembler::GetColumnNumber()
1933 {
1934     std::vector<size_t> columnNumber;
1935     for (const auto &method_info : prog_info_.methods_info) {
1936         for (const auto &column_number : method_info.second.column_number_table) {
1937             columnNumber.push_back(column_number.column);
1938         }
1939     }
1940     return columnNumber;
1941 }
1942 
GetLineNumber()1943 std::vector<size_t> Disassembler::GetLineNumber()
1944 {
1945     std::vector<size_t> lineNumber;
1946     for (const auto &method_info : prog_info_.methods_info) {
1947         for (const auto &line_number : method_info.second.line_number_table) {
1948             lineNumber.push_back(line_number.line);
1949         }
1950     }
1951     return lineNumber;
1952 }
1953 
1954 }  // namespace panda::disasm
1955