• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "disassembler.h"
17 #include "mangling.h"
18 #include "utils/logger.h"
19 #include "utils/const_value.h"
20 
21 #include <iomanip>
22 
23 #include "get_language_specific_metadata.inc"
24 
25 namespace panda::disasm {
26 
Disassemble(const std::string & filename_in,const bool quiet,const bool skip_strings)27 void Disassembler::Disassemble(const std::string &filename_in, const bool quiet, const bool skip_strings)
28 {
29     auto file_new = panda_file::File::Open(filename_in);
30     file_.swap(file_new);
31 
32     if (file_ != nullptr) {
33         prog_ = pandasm::Program {};
34 
35         record_name_to_id_.clear();
36         method_name_to_id_.clear();
37         string_offset_to_name_.clear();
38         skip_strings_ = skip_strings;
39         quiet_ = quiet;
40 
41         prog_info_ = ProgInfo {};
42 
43         prog_ann_ = ProgAnnotations {};
44 
45         GetRecords();
46         GetLiteralArrays();
47 
48         GetLanguageSpecificMetadata();
49     } else {
50         LOG(ERROR, DISASSEMBLER) << "> unable to open specified pandafile: <" << filename_in << ">";
51     }
52 }
53 
CollectInfo()54 void Disassembler::CollectInfo()
55 {
56     LOG(DEBUG, DISASSEMBLER) << "\n[getting program info]\n";
57 
58     debug_info_extractor_ = std::make_unique<panda_file::DebugInfoExtractor>(file_.get());
59 
60     for (const auto &pair : record_name_to_id_) {
61         GetRecordInfo(pair.second, &prog_info_.records_info[pair.first]);
62     }
63 
64     for (const auto &pair : method_name_to_id_) {
65         GetMethodInfo(pair.second, &prog_info_.methods_info[pair.first]);
66     }
67 }
68 
Serialize(std::ostream & os,bool add_separators,bool print_information) const69 void Disassembler::Serialize(std::ostream &os, bool add_separators, bool print_information) const
70 {
71     if (os.bad()) {
72         LOG(DEBUG, DISASSEMBLER) << "> serialization failed. os bad\n";
73 
74         return;
75     }
76 
77     if (file_ != nullptr) {
78         std::string abc_file = GetFileNameByPath(file_->GetFilename());
79         os << "# source binary: " << abc_file << "\n\n";
80     }
81 
82     SerializeLanguage(os);
83 
84     if (add_separators) {
85         os << "# ====================\n"
86               "# LITERALS\n\n";
87     }
88 
89     LOG(DEBUG, DISASSEMBLER) << "[serializing literals]";
90 
91     for (const auto &[key, lit_arr] : prog_.literalarray_table) {
92         Serialize(key, lit_arr, os);
93     }
94 
95     for (const auto &[module_offset, array_table] : modulearray_table_) {
96         Serialize(module_offset, array_table, os);
97     }
98 
99     os << "\n";
100 
101     if (add_separators) {
102         os << "# ====================\n"
103               "# RECORDS\n\n";
104     }
105 
106     LOG(DEBUG, DISASSEMBLER) << "[serializing records]";
107 
108     for (const auto &r : prog_.record_table) {
109         Serialize(r.second, os, print_information);
110     }
111 
112     if (add_separators) {
113         os << "# ====================\n"
114               "# METHODS\n\n";
115     }
116 
117     LOG(DEBUG, DISASSEMBLER) << "[serializing methods]";
118 
119     for (const auto &m : prog_.function_table) {
120         Serialize(m.second, os, print_information);
121     }
122 
123     if (add_separators) {
124         os << "# ====================\n"
125         "# STRING\n\n";
126     }
127 
128     LOG(DEBUG, DISASSEMBLER) << "[serializing strings]";
129 
130     for (const auto &[offset, name_value] : string_offset_to_name_) {
131         SerializeStrings(offset, name_value, os);
132     }
133 }
134 
IsSystemType(const std::string & type_name)135 inline bool Disassembler::IsSystemType(const std::string &type_name)
136 {
137     bool is_array_type = type_name.find('[') != std::string::npos;
138     bool is_global = type_name == "_GLOBAL";
139 
140     return is_array_type || is_global;
141 }
142 
GetRecord(pandasm::Record * record,const panda_file::File::EntityId & record_id)143 void Disassembler::GetRecord(pandasm::Record *record, const panda_file::File::EntityId &record_id)
144 {
145     LOG(DEBUG, DISASSEMBLER) << "\n[getting record]\nid: " << record_id << " (0x" << std::hex << record_id << ")";
146 
147     if (record == nullptr) {
148         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but record ptr expected!";
149 
150         return;
151     }
152 
153     record->name = GetFullRecordName(record_id);
154 
155     LOG(DEBUG, DISASSEMBLER) << "name: " << record->name;
156 
157     GetMetaData(record, record_id);
158 
159     if (!file_->IsExternal(record_id)) {
160         GetMethods(record_id);
161         GetFields(record, record_id);
162     }
163 }
164 
AddMethodToTables(const panda_file::File::EntityId & method_id)165 void Disassembler::AddMethodToTables(const panda_file::File::EntityId &method_id)
166 {
167     pandasm::Function new_method("", file_language_);
168     GetMethod(&new_method, method_id);
169 
170     const auto signature = pandasm::GetFunctionSignatureFromName(new_method.name, new_method.params);
171     if (prog_.function_table.find(signature) != prog_.function_table.end()) {
172         return;
173     }
174 
175     GetMethodAnnotations(new_method, method_id);
176     method_name_to_id_.emplace(signature, method_id);
177     prog_.function_synonyms[new_method.name].push_back(signature);
178     prog_.function_table.emplace(signature, std::move(new_method));
179 }
180 
GetMethod(pandasm::Function * method,const panda_file::File::EntityId & method_id)181 void Disassembler::GetMethod(pandasm::Function *method, const panda_file::File::EntityId &method_id)
182 {
183     LOG(DEBUG, DISASSEMBLER) << "\n[getting method]\nid: " << method_id << " (0x" << std::hex << method_id << ")";
184 
185     if (method == nullptr) {
186         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
187 
188         return;
189     }
190 
191     panda_file::MethodDataAccessor method_accessor(*file_, method_id);
192 
193     method->name = GetFullMethodName(method_id);
194 
195     LOG(DEBUG, DISASSEMBLER) << "name: " << method->name;
196 
197     GetMetaData(method, method_id);
198 
199     if (method_accessor.GetCodeId().has_value()) {
200         auto code_id = method_accessor.GetCodeId().value();
201         GetParams(method, code_id);
202         const IdList id_list = GetInstructions(method, method_id, code_id);
203 
204         for (const auto &id : id_list) {
205             AddMethodToTables(id);
206         }
207     } else {
208         LOG(ERROR, DISASSEMBLER) << "> error encountered at " << method_id << " (0x" << std::hex << method_id
209                                  << "). implementation of method expected, but no \'CODE\' tag was found!";
210 
211         return;
212     }
213 }
214 
215 template <typename T>
FillLiteralArrayData(pandasm::LiteralArray * lit_array,const panda_file::LiteralTag & tag,const panda_file::LiteralDataAccessor::LiteralValue & value) const216 void Disassembler::FillLiteralArrayData(pandasm::LiteralArray *lit_array, const panda_file::LiteralTag &tag,
217                                         const panda_file::LiteralDataAccessor::LiteralValue &value) const
218 {
219     panda_file::File::EntityId id(std::get<uint32_t>(value));
220     auto sp = file_->GetSpanFromId(id);
221     auto len = panda_file::helpers::Read<sizeof(uint32_t)>(&sp);
222     if (tag != panda_file::LiteralTag::ARRAY_STRING) {
223         for (size_t i = 0; i < len; i++) {
224             pandasm::LiteralArray::Literal lit;
225             lit.tag_ = tag;
226             lit.value_ = bit_cast<T>(panda_file::helpers::Read<sizeof(T)>(&sp));
227             lit_array->literals_.push_back(lit);
228         }
229         return;
230     }
231     for (size_t i = 0; i < len; i++) {
232         auto str_id = panda_file::helpers::Read<sizeof(T)>(&sp);
233         pandasm::LiteralArray::Literal lit;
234         lit.tag_ = tag;
235         lit.value_ = StringDataToString(file_->GetStringData(panda_file::File::EntityId(str_id)));
236         lit_array->literals_.push_back(lit);
237     }
238 }
239 
FillLiteralData(pandasm::LiteralArray * lit_array,const panda_file::LiteralDataAccessor::LiteralValue & value,const panda_file::LiteralTag & tag) const240 void Disassembler::FillLiteralData(pandasm::LiteralArray *lit_array,
241                                    const panda_file::LiteralDataAccessor::LiteralValue &value,
242                                    const panda_file::LiteralTag &tag) const
243 {
244     pandasm::LiteralArray::Literal lit;
245     lit.tag_ = tag;
246     switch (tag) {
247         case panda_file::LiteralTag::BOOL: {
248             lit.value_ = std::get<bool>(value);
249             break;
250         }
251         case panda_file::LiteralTag::ACCESSOR:
252         case panda_file::LiteralTag::NULLVALUE:
253         case panda_file::LiteralTag::BUILTINTYPEINDEX: {
254             lit.value_ = std::get<uint8_t>(value);
255             break;
256         }
257         case panda_file::LiteralTag::METHODAFFILIATE: {
258             lit.value_ = std::get<uint16_t>(value);
259             break;
260         }
261         case panda_file::LiteralTag::LITERALBUFFERINDEX:
262         case panda_file::LiteralTag::INTEGER: {
263             lit.value_ = std::get<uint32_t>(value);
264             break;
265         }
266         case panda_file::LiteralTag::DOUBLE: {
267             lit.value_ = std::get<double>(value);
268             break;
269         }
270         case panda_file::LiteralTag::STRING: {
271             auto str_data = file_->GetStringData(panda_file::File::EntityId(std::get<uint32_t>(value)));
272             lit.value_ = StringDataToString(str_data);
273             break;
274         }
275         case panda_file::LiteralTag::METHOD:
276         case panda_file::LiteralTag::GETTER:
277         case panda_file::LiteralTag::SETTER:
278         case panda_file::LiteralTag::GENERATORMETHOD: {
279             panda_file::MethodDataAccessor mda(*file_, panda_file::File::EntityId(std::get<uint32_t>(value)));
280             lit.value_ = StringDataToString(file_->GetStringData(mda.GetNameId()));
281             break;
282         }
283         case panda_file::LiteralTag::LITERALARRAY: {
284             std::stringstream ss;
285             ss << "0x" << std::hex << std::get<uint32_t>(value);
286             lit.value_ = ss.str();
287             break;
288         }
289         case panda_file::LiteralTag::TAGVALUE: {
290             return;
291         }
292         default: {
293             UNREACHABLE();
294         }
295     }
296     lit_array->literals_.push_back(lit);
297 }
298 
GetLiteralArrayByOffset(pandasm::LiteralArray * lit_array,panda_file::File::EntityId offset) const299 void Disassembler::GetLiteralArrayByOffset(pandasm::LiteralArray *lit_array, panda_file::File::EntityId offset) const
300 {
301     panda_file::LiteralDataAccessor lit_array_accessor(*file_, file_->GetLiteralArraysId());
302     lit_array_accessor.EnumerateLiteralVals(
303         offset, [this, lit_array](const panda_file::LiteralDataAccessor::LiteralValue &value,
304                                   const panda_file::LiteralTag &tag) {
305             switch (tag) {
306                 case panda_file::LiteralTag::ARRAY_U1: {
307                     FillLiteralArrayData<bool>(lit_array, tag, value);
308                     break;
309                 }
310                 case panda_file::LiteralTag::ARRAY_I8:
311                 case panda_file::LiteralTag::ARRAY_U8: {
312                     FillLiteralArrayData<uint8_t>(lit_array, tag, value);
313                     break;
314                 }
315                 case panda_file::LiteralTag::ARRAY_I16:
316                 case panda_file::LiteralTag::ARRAY_U16: {
317                     FillLiteralArrayData<uint16_t>(lit_array, tag, value);
318                     break;
319                 }
320                 case panda_file::LiteralTag::ARRAY_I32:
321                 case panda_file::LiteralTag::ARRAY_U32: {
322                     FillLiteralArrayData<uint32_t>(lit_array, tag, value);
323                     break;
324                 }
325                 case panda_file::LiteralTag::ARRAY_I64:
326                 case panda_file::LiteralTag::ARRAY_U64: {
327                     FillLiteralArrayData<uint64_t>(lit_array, tag, value);
328                     break;
329                 }
330                 case panda_file::LiteralTag::ARRAY_F32: {
331                     FillLiteralArrayData<float>(lit_array, tag, value);
332                     break;
333                 }
334                 case panda_file::LiteralTag::ARRAY_F64: {
335                     FillLiteralArrayData<double>(lit_array, tag, value);
336                     break;
337                 }
338                 case panda_file::LiteralTag::ARRAY_STRING: {
339                     FillLiteralArrayData<uint32_t>(lit_array, tag, value);
340                     break;
341                 }
342                 default: {
343                     FillLiteralData(lit_array, value, tag);
344                     break;
345                 }
346             }
347         });
348 }
349 
GetLiteralArray(pandasm::LiteralArray * lit_array,size_t index) const350 void Disassembler::GetLiteralArray(pandasm::LiteralArray *lit_array, size_t index) const
351 {
352     panda_file::LiteralDataAccessor lit_array_accessor(*file_, file_->GetLiteralArraysId());
353     GetLiteralArrayByOffset(lit_array, lit_array_accessor.GetLiteralArrayId(index));
354 }
355 
IsModuleLiteralOffset(const panda_file::File::EntityId & id) const356 bool Disassembler::IsModuleLiteralOffset(const panda_file::File::EntityId &id) const
357 {
358     return module_literals_.find(id.GetOffset()) != module_literals_.end();
359 }
360 
GetLiteralArrays()361 void Disassembler::GetLiteralArrays()
362 {
363     const auto lit_arrays_id = file_->GetLiteralArraysId();
364 
365     LOG(DEBUG, DISASSEMBLER) << "\n[getting literal arrays]\nid: " << lit_arrays_id << " (0x" << std::hex
366                              << lit_arrays_id << ")";
367 
368     panda_file::LiteralDataAccessor lda(*file_, lit_arrays_id);
369     size_t num_litarrays = lda.GetLiteralNum();
370     for (size_t index = 0; index < num_litarrays; index++) {
371         auto id = lda.GetLiteralArrayId(index);
372         if (module_request_phase_literals_.count(id.GetOffset())) {
373             continue;
374         }
375         if (IsModuleLiteralOffset(id)) {
376             std::stringstream ss;
377             ss << index << " 0x" << std::hex << id.GetOffset();
378             modulearray_table_.emplace(ss.str(), GetModuleLiteralArray(id));
379             continue;
380         }
381         std::stringstream ss;
382         ss << index << " 0x" << std::hex << id.GetOffset();
383         panda::pandasm::LiteralArray lit_arr;
384         GetLiteralArray(&lit_arr, index);
385         prog_.literalarray_table.emplace(ss.str(), lit_arr);
386     }
387 }
388 
ModuleTagToString(panda_file::ModuleTag & tag) const389 std::string Disassembler::ModuleTagToString(panda_file::ModuleTag &tag) const
390 {
391     switch (tag) {
392         case panda_file::ModuleTag::REGULAR_IMPORT:
393             return "REGULAR_IMPORT";
394         case panda_file::ModuleTag::NAMESPACE_IMPORT:
395             return "NAMESPACE_IMPORT";
396         case panda_file::ModuleTag::LOCAL_EXPORT:
397             return "LOCAL_EXPORT";
398         case panda_file::ModuleTag::INDIRECT_EXPORT:
399             return "INDIRECT_EXPORT";
400         case panda_file::ModuleTag::STAR_EXPORT:
401             return "STAR_EXPORT";
402         default: {
403             UNREACHABLE();
404             break;
405         }
406     }
407     return "";
408 }
409 
GetModuleLiteralArray(panda_file::File::EntityId & module_id) const410 std::vector<std::string> Disassembler::GetModuleLiteralArray(panda_file::File::EntityId &module_id) const
411 {
412     panda_file::ModuleDataAccessor mda(*file_, module_id);
413     const std::vector<uint32_t> &request_modules_offset = mda.getRequestModules();
414     std::vector<std::string> module_literal_array;
415     std::stringstream module_requests_stringstream;
416     module_requests_stringstream << "\tMODULE_REQUEST_ARRAY: {\n";
417     for (size_t index = 0; index < request_modules_offset.size(); ++index) {
418         module_requests_stringstream << "\t\t" << index <<
419             " : " << GetStringByOffset(request_modules_offset[index]) << ",\n";
420     }
421     module_requests_stringstream << "\t}";
422     module_literal_array.push_back(module_requests_stringstream.str());
423     mda.EnumerateModuleRecord([&](panda_file::ModuleTag tag, uint32_t export_name_offset,
424                                   uint32_t request_module_idx, uint32_t import_name_offset,
425                                   uint32_t local_name_offset) {
426         std::stringstream ss;
427         ss << "\tModuleTag: " << ModuleTagToString(tag);
428         if (tag == panda_file::ModuleTag::REGULAR_IMPORT ||
429             tag == panda_file::ModuleTag::NAMESPACE_IMPORT || tag == panda_file::ModuleTag::LOCAL_EXPORT) {
430             if (!IsValidOffset(local_name_offset)) {
431                 LOG(FATAL, DISASSEMBLER) << "Get invalid local name offset!" << std::endl;
432             }
433             ss << ", local_name: " << GetStringByOffset(local_name_offset);
434         }
435         if (tag == panda_file::ModuleTag::LOCAL_EXPORT || tag == panda_file::ModuleTag::INDIRECT_EXPORT) {
436             if (!IsValidOffset(export_name_offset)) {
437                 LOG(FATAL, DISASSEMBLER) << "Get invalid export name offset!" << std::endl;
438             }
439             ss << ", export_name: " << GetStringByOffset(export_name_offset);
440         }
441         if (tag == panda_file::ModuleTag::REGULAR_IMPORT || tag == panda_file::ModuleTag::INDIRECT_EXPORT) {
442             if (!IsValidOffset(import_name_offset)) {
443                 LOG(FATAL, DISASSEMBLER) << "Get invalid import name offset!" << std::endl;
444             }
445             ss << ", import_name: " << GetStringByOffset(import_name_offset);
446         }
447         auto request_module_offset = request_modules_offset[request_module_idx];
448         if (tag != panda_file::ModuleTag::LOCAL_EXPORT) {
449             if (request_module_idx >= request_modules_offset.size() || !IsValidOffset(request_module_offset)) {
450                 LOG(FATAL, DISASSEMBLER) << "Get invalid request module offset!" << std::endl;
451             }
452             ss << ", module_request: " << GetStringByOffset(request_module_offset);
453         }
454         module_literal_array.push_back(ss.str());
455     });
456 
457     return module_literal_array;
458 }
459 
GetRecords()460 void Disassembler::GetRecords()
461 {
462     LOG(DEBUG, DISASSEMBLER) << "\n[getting records]\n";
463 
464     const auto class_idx = file_->GetClasses();
465 
466     for (size_t i = 0; i < class_idx.size(); i++) {
467         uint32_t class_id = class_idx[i];
468         auto class_off = file_->GetHeader()->class_idx_off + sizeof(uint32_t) * i;
469 
470         if (class_id > file_->GetHeader()->file_size) {
471             LOG(ERROR, DISASSEMBLER) << "> error encountered in record at " << class_off << " (0x" << std::hex
472                                      << class_off << "). binary file corrupted. record offset (0x" << class_id
473                                      << ") out of bounds (0x" << file_->GetHeader()->file_size << ")!";
474             break;
475         }
476 
477         const panda_file::File::EntityId record_id {class_id};
478         auto language = GetRecordLanguage(record_id);
479         if (language != file_language_) {
480             if (file_language_ == panda_file::SourceLang::PANDA_ASSEMBLY) {
481                 file_language_ = language;
482             } else if (language != panda_file::SourceLang::PANDA_ASSEMBLY) {
483                 LOG(ERROR, DISASSEMBLER) << "> possible error encountered in record at" << class_off << " (0x"
484                                          << std::hex << class_off << "). record's language  ("
485                                          << panda_file::LanguageToString(language)
486                                          << ")  differs from file's language ("
487                                          << panda_file::LanguageToString(file_language_) << ")!";
488             }
489         }
490 
491         pandasm::Record record("", file_language_);
492         GetRecord(&record, record_id);
493 
494         if (prog_.record_table.find(record.name) == prog_.record_table.end()) {
495             record_name_to_id_.emplace(record.name, record_id);
496             prog_.record_table.emplace(record.name, std::move(record));
497         }
498     }
499 }
500 
GetFields(pandasm::Record * record,const panda_file::File::EntityId & record_id)501 void Disassembler::GetFields(pandasm::Record *record, const panda_file::File::EntityId &record_id)
502 {
503     panda_file::ClassDataAccessor class_accessor {*file_, record_id};
504 
505     class_accessor.EnumerateFields([&](panda_file::FieldDataAccessor &field_accessor) -> void {
506         pandasm::Field field(file_language_);
507 
508         panda_file::File::EntityId field_name_id = field_accessor.GetNameId();
509         field.name = StringDataToString(file_->GetStringData(field_name_id));
510 
511         uint32_t field_type = field_accessor.GetType();
512         field.type = FieldTypeToPandasmType(field_type);
513 
514         GetMetaData(&field, field_accessor.GetFieldId(), record->name == ark::SCOPE_NAME_RECORD);
515 
516         record->field_list.push_back(std::move(field));
517     });
518 }
519 
GetMethods(const panda_file::File::EntityId & record_id)520 void Disassembler::GetMethods(const panda_file::File::EntityId &record_id)
521 {
522     panda_file::ClassDataAccessor class_accessor {*file_, record_id};
523 
524     class_accessor.EnumerateMethods([&](panda_file::MethodDataAccessor &method_accessor) -> void {
525         AddMethodToTables(method_accessor.GetMethodId());
526     });
527 }
528 
GetMethodAnnotations(pandasm::Function & method,const panda_file::File::EntityId & method_id)529 void Disassembler::GetMethodAnnotations(pandasm::Function &method, const panda_file::File::EntityId &method_id)
530 {
531     static const std::string MODULE_REQUEST_ANN_NAME = "L_ESConcurrentModuleRequestsAnnotation";
532     static const std::string SLOT_NUMBER_ANN_NAME = "L_ESSlotNumberAnnotation";
533 
534     panda_file::MethodDataAccessor mda(*file_, method_id);
535     mda.EnumerateAnnotations([&](panda_file::File::EntityId annotation_id) {
536         panda_file::AnnotationDataAccessor ada(*file_, annotation_id);
537         auto *annotation_name = reinterpret_cast<const char *>(file_->GetStringData(ada.GetClassId()).data);
538         if (std::strcmp("L_ESConcurrentModuleRequestsAnnotation;", annotation_name) == 0) {
539             CreateAnnotationElement(ada, method, MODULE_REQUEST_ANN_NAME,
540                                     "ConcurrentModuleRequest", "concurrentModuleRequestIdx");
541         } else if (std::strcmp("L_ESSlotNumberAnnotation;", annotation_name) == 0) {
542             CreateAnnotationElement(ada, method, SLOT_NUMBER_ANN_NAME, "SlotNumber", "slotNumberIdx");
543         }
544     });
545 }
546 
CreateAnnotationElement(panda_file::AnnotationDataAccessor & ada,pandasm::Function & method,const std::string & ann_name,const std::string & ann_elem_name,const std::string & ann_elem_index)547 void Disassembler::CreateAnnotationElement(panda_file::AnnotationDataAccessor &ada, pandasm::Function &method,
548                                            const std::string &ann_name, const std::string &ann_elem_name,
549                                            const std::string &ann_elem_index)
550 {
551     if (ann_elem_name.empty() || ann_elem_index.empty()) {
552         return;
553     }
554 
555     uint32_t elem_count = ada.GetCount();
556     for (uint32_t i = 0; i < elem_count; i++) {
557         panda_file::AnnotationDataAccessor::Elem adae = ada.GetElement(i);
558         auto *elem_name = reinterpret_cast<const char *>(file_->GetStringData(adae.GetNameId()).data);
559         if (ann_elem_name == elem_name) {
560             uint32_t ann_elem_value = adae.GetScalarValue().GetValue();
561             AddAnnotationElement(method, ann_name, ann_elem_index, ann_elem_value);
562         }
563     }
564 }
565 
AddAnnotationElement(pandasm::Function & method,const std::string & annotation_name,const std::string & key,const uint32_t & value)566 void Disassembler::AddAnnotationElement(pandasm::Function &method, const std::string &annotation_name,
567                                         const std::string &key, const uint32_t &value)
568 {
569     if (annotation_name.empty() || key.empty()) {
570         return;
571     }
572 
573     std::vector<pandasm::AnnotationData> method_annotation = method.metadata->GetAnnotations();
574     const auto ann_iter = std::find_if(method_annotation.begin(), method_annotation.end(),
575                                        [&](pandasm::AnnotationData &ann) -> bool {
576         return ann.GetName() == annotation_name;
577     });
578 
579     pandasm::AnnotationElement annotation_element(key,
580         std::make_unique<pandasm::ScalarValue>(pandasm::ScalarValue::Create<pandasm::Value::Type::U32>(value)));
581     const bool is_annotation = ann_iter != method_annotation.end();
582     if (is_annotation) {
583         ann_iter->AddElement(std::move(annotation_element));
584         method.metadata->SetAnnotations(std::move(method_annotation));
585     } else {
586         std::vector<pandasm::AnnotationElement> elements;
587         pandasm::AnnotationData ann_data(annotation_name, elements);
588         ann_data.AddElement(std::move(annotation_element));
589         std::vector<pandasm::AnnotationData> annotations;
590         annotations.push_back(std::move(ann_data));
591         method.metadata->AddAnnotations(annotations);
592     }
593 }
594 
GetAnnotationByMethodName(const std::string & method_name) const595 std::optional<std::vector<std::string>> Disassembler::GetAnnotationByMethodName(const std::string &method_name) const
596 {
597     const auto method_synonyms_iter = prog_.function_synonyms.find(method_name);
598     bool is_signature = method_synonyms_iter != prog_.function_synonyms.end();
599     if (!is_signature) {
600         return std::nullopt;
601     }
602 
603     const auto method_iter = prog_.function_table.find(method_synonyms_iter->second.back());
604     bool is_method = method_iter != prog_.function_table.end();
605     const auto annotations = method_iter->second.metadata->GetAnnotations();
606     if (!is_method || annotations.empty()) {
607         return std::nullopt;
608     }
609 
610     std::vector<std::string> ann;
611     for (const auto &ann_data : annotations) {
612         ann.emplace_back(ann_data.GetName());
613     }
614     return ann;
615 }
616 
GetStrings() const617 std::vector<std::string> Disassembler::GetStrings() const
618 {
619     std::vector<std::string> strings;
620     for (auto &str_info : string_offset_to_name_) {
621         strings.emplace_back(str_info.second);
622     }
623 
624     return strings;
625 }
626 
GetModuleLiterals() const627 std::vector<std::string> Disassembler::GetModuleLiterals() const
628 {
629     std::vector<std::string> module_literals;
630     for (auto &module_array : modulearray_table_) {
631         for (auto &module : module_array.second) {
632             module_literals.emplace_back(module);
633         }
634     }
635 
636     return module_literals;
637 }
638 
GetParams(pandasm::Function * method,const panda_file::File::EntityId & code_id) const639 void Disassembler::GetParams(pandasm::Function *method, const panda_file::File::EntityId &code_id) const
640 {
641     /**
642      * frame size - 2^16 - 1
643      */
644     static const uint32_t MAX_ARG_NUM = 0xFFFF;
645 
646     LOG(DEBUG, DISASSEMBLER) << "[getting params number]\ncode id: " << code_id << " (0x" << std::hex << code_id << ")";
647 
648     if (method == nullptr) {
649         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
650 
651         return;
652     }
653 
654     panda_file::CodeDataAccessor code_accessor(*file_, code_id);
655 
656     auto params_num = code_accessor.GetNumArgs();
657     if (params_num > MAX_ARG_NUM) {
658         LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
659                                  << "). number of function's arguments (" << std::dec << params_num
660                                  << ") exceeds MAX_ARG_NUM (" << MAX_ARG_NUM << ") !";
661 
662         return;
663     }
664 
665     method->return_type = pandasm::Type("any", 0);
666 
667     for (uint8_t i = 0; i < params_num; i++) {
668         method->params.push_back(pandasm::Function::Parameter(pandasm::Type("any", 0), file_language_));
669     }
670 }
671 
GetExceptions(pandasm::Function * method,panda_file::File::EntityId method_id,panda_file::File::EntityId code_id) const672 LabelTable Disassembler::GetExceptions(pandasm::Function *method, panda_file::File::EntityId method_id,
673                                        panda_file::File::EntityId code_id) const
674 {
675     LOG(DEBUG, DISASSEMBLER) << "[getting exceptions]\ncode id: " << code_id << " (0x" << std::hex << code_id << ")";
676 
677     if (method == nullptr) {
678         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!\n";
679         return LabelTable {};
680     }
681 
682     panda_file::CodeDataAccessor code_accessor(*file_, code_id);
683 
684     const auto bc_ins = BytecodeInstruction(code_accessor.GetInstructions());
685     const auto bc_ins_last = bc_ins.JumpTo(code_accessor.GetCodeSize());
686 
687     size_t try_idx = 0;
688     LabelTable label_table {};
689     code_accessor.EnumerateTryBlocks([&](panda_file::CodeDataAccessor::TryBlock &try_block) {
690         pandasm::Function::CatchBlock catch_block_pa {};
691         if (!LocateTryBlock(bc_ins, bc_ins_last, try_block, &catch_block_pa, &label_table, try_idx)) {
692             return false;
693         }
694         size_t catch_idx = 0;
695         try_block.EnumerateCatchBlocks([&](panda_file::CodeDataAccessor::CatchBlock &catch_block) {
696             auto class_idx = catch_block.GetTypeIdx();
697             if (class_idx == panda_file::INVALID_INDEX) {
698                 catch_block_pa.exception_record = "";
699             } else {
700                 const auto class_id = file_->ResolveClassIndex(method_id, class_idx);
701                 catch_block_pa.exception_record = GetFullRecordName(class_id);
702             }
703             if (!LocateCatchBlock(bc_ins, bc_ins_last, catch_block, &catch_block_pa, &label_table, try_idx,
704                                   catch_idx)) {
705                 return false;
706             }
707 
708             method->catch_blocks.push_back(catch_block_pa);
709             catch_block_pa.catch_begin_label = "";
710             catch_block_pa.catch_end_label = "";
711             catch_idx++;
712 
713             return true;
714         });
715         try_idx++;
716 
717         return true;
718     });
719 
720     return label_table;
721 }
722 
getBytecodeInstructionNumber(BytecodeInstruction bc_ins_first,BytecodeInstruction bc_ins_cur)723 static size_t getBytecodeInstructionNumber(BytecodeInstruction bc_ins_first, BytecodeInstruction bc_ins_cur)
724 {
725     size_t count = 0;
726 
727     while (bc_ins_first.GetAddress() != bc_ins_cur.GetAddress()) {
728         count++;
729         bc_ins_first = bc_ins_first.GetNext();
730         if (bc_ins_first.GetAddress() > bc_ins_cur.GetAddress()) {
731             return std::numeric_limits<size_t>::max();
732         }
733     }
734 
735     return count;
736 }
737 
LocateTryBlock(const BytecodeInstruction & bc_ins,const BytecodeInstruction & bc_ins_last,const panda_file::CodeDataAccessor::TryBlock & try_block,pandasm::Function::CatchBlock * catch_block_pa,LabelTable * label_table,size_t try_idx) const738 bool Disassembler::LocateTryBlock(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last,
739                                   const panda_file::CodeDataAccessor::TryBlock &try_block,
740                                   pandasm::Function::CatchBlock *catch_block_pa, LabelTable *label_table,
741                                   size_t try_idx) const
742 {
743     const auto try_begin_bc_ins = bc_ins.JumpTo(try_block.GetStartPc());
744     const auto try_end_bc_ins = bc_ins.JumpTo(try_block.GetStartPc() + try_block.GetLength());
745 
746     const size_t try_begin_idx = getBytecodeInstructionNumber(bc_ins, try_begin_bc_ins);
747     const size_t try_end_idx = getBytecodeInstructionNumber(bc_ins, try_end_bc_ins);
748 
749     const bool try_begin_offset_in_range = bc_ins_last.GetAddress() > try_begin_bc_ins.GetAddress();
750     const bool try_end_offset_in_range = bc_ins_last.GetAddress() >= try_end_bc_ins.GetAddress();
751     const bool try_begin_offset_valid = try_begin_idx != std::numeric_limits<size_t>::max();
752     const bool try_end_offset_valid = try_end_idx != std::numeric_limits<size_t>::max();
753 
754     if (!try_begin_offset_in_range || !try_begin_offset_valid) {
755         LOG(ERROR, DISASSEMBLER) << "> invalid try block begin offset! address is: 0x" << std::hex
756                                  << try_begin_bc_ins.GetAddress();
757         return false;
758     } else {
759         std::stringstream ss {};
760         ss << "try_begin_label_" << try_idx;
761 
762         LabelTable::iterator it = label_table->find(try_begin_idx);
763         if (it == label_table->end()) {
764             catch_block_pa->try_begin_label = ss.str();
765             label_table->insert(std::pair<size_t, std::string>(try_begin_idx, ss.str()));
766         } else {
767             catch_block_pa->try_begin_label = it->second;
768         }
769     }
770 
771     if (!try_end_offset_in_range || !try_end_offset_valid) {
772         LOG(ERROR, DISASSEMBLER) << "> invalid try block end offset! address is: 0x" << std::hex
773                                  << try_end_bc_ins.GetAddress();
774         return false;
775     } else {
776         std::stringstream ss {};
777         ss << "try_end_label_" << try_idx;
778 
779         LabelTable::iterator it = label_table->find(try_end_idx);
780         if (it == label_table->end()) {
781             catch_block_pa->try_end_label = ss.str();
782             label_table->insert(std::pair<size_t, std::string>(try_end_idx, ss.str()));
783         } else {
784             catch_block_pa->try_end_label = it->second;
785         }
786     }
787 
788     return true;
789 }
790 
LocateCatchBlock(const BytecodeInstruction & bc_ins,const BytecodeInstruction & bc_ins_last,const panda_file::CodeDataAccessor::CatchBlock & catch_block,pandasm::Function::CatchBlock * catch_block_pa,LabelTable * label_table,size_t try_idx,size_t catch_idx) const791 bool Disassembler::LocateCatchBlock(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last,
792                                     const panda_file::CodeDataAccessor::CatchBlock &catch_block,
793                                     pandasm::Function::CatchBlock *catch_block_pa, LabelTable *label_table,
794                                     size_t try_idx, size_t catch_idx) const
795 {
796     const auto handler_begin_offset = catch_block.GetHandlerPc();
797     const auto handler_end_offset = handler_begin_offset + catch_block.GetCodeSize();
798 
799     const auto handler_begin_bc_ins = bc_ins.JumpTo(handler_begin_offset);
800     const auto handler_end_bc_ins = bc_ins.JumpTo(handler_end_offset);
801 
802     const size_t handler_begin_idx = getBytecodeInstructionNumber(bc_ins, handler_begin_bc_ins);
803     const size_t handler_end_idx = getBytecodeInstructionNumber(bc_ins, handler_end_bc_ins);
804 
805     const bool handler_begin_offset_in_range = bc_ins_last.GetAddress() > handler_begin_bc_ins.GetAddress();
806     const bool handler_end_offset_in_range = bc_ins_last.GetAddress() >= handler_end_bc_ins.GetAddress();
807     const bool handler_end_present = catch_block.GetCodeSize() != 0;
808     const bool handler_begin_offset_valid = handler_begin_idx != std::numeric_limits<size_t>::max();
809     const bool handler_end_offset_valid = handler_end_idx != std::numeric_limits<size_t>::max();
810 
811     if (!handler_begin_offset_in_range || !handler_begin_offset_valid) {
812         LOG(ERROR, DISASSEMBLER) << "> invalid catch block begin offset! address is: 0x" << std::hex
813                                  << handler_begin_bc_ins.GetAddress();
814         return false;
815     } else {
816         std::stringstream ss {};
817         ss << "handler_begin_label_" << try_idx << "_" << catch_idx;
818 
819         LabelTable::iterator it = label_table->find(handler_begin_idx);
820         if (it == label_table->end()) {
821             catch_block_pa->catch_begin_label = ss.str();
822             label_table->insert(std::pair<size_t, std::string>(handler_begin_idx, ss.str()));
823         } else {
824             catch_block_pa->catch_begin_label = it->second;
825         }
826     }
827 
828     if (!handler_end_offset_in_range || !handler_end_offset_valid) {
829         LOG(ERROR, DISASSEMBLER) << "> invalid catch block end offset! address is: 0x" << std::hex
830                                  << handler_end_bc_ins.GetAddress();
831         return false;
832     } else if (handler_end_present) {
833         std::stringstream ss {};
834         ss << "handler_end_label_" << try_idx << "_" << catch_idx;
835 
836         LabelTable::iterator it = label_table->find(handler_end_idx);
837         if (it == label_table->end()) {
838             catch_block_pa->catch_end_label = ss.str();
839             label_table->insert(std::pair<size_t, std::string>(handler_end_idx, ss.str()));
840         } else {
841             catch_block_pa->catch_end_label = it->second;
842         }
843     }
844 
845     return true;
846 }
847 
GetMetaData(pandasm::Function * method,const panda_file::File::EntityId & method_id) const848 void Disassembler::GetMetaData(pandasm::Function *method, const panda_file::File::EntityId &method_id) const
849 {
850     LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nmethod id: " << method_id << " (0x" << std::hex << method_id
851                              << ")";
852 
853     if (method == nullptr) {
854         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
855 
856         return;
857     }
858 
859     panda_file::MethodDataAccessor method_accessor(*file_, method_id);
860 
861     const auto method_name_raw = StringDataToString(file_->GetStringData(method_accessor.GetNameId()));
862 
863     if (!method_accessor.IsStatic()) {
864         const auto class_name = StringDataToString(file_->GetStringData(method_accessor.GetClassId()));
865         auto this_type = pandasm::Type::FromDescriptor(class_name);
866 
867         LOG(DEBUG, DISASSEMBLER) << "method (raw: \'" << method_name_raw
868                                  << "\') is not static. emplacing self-argument of type " << this_type.GetName();
869 
870         method->params.insert(method->params.begin(), pandasm::Function::Parameter(this_type, file_language_));
871     } else {
872         method->metadata->SetAttribute("static");
873     }
874 
875     if (file_->IsExternal(method_accessor.GetMethodId())) {
876         method->metadata->SetAttribute("external");
877     }
878 
879     std::string ctor_name = panda::panda_file::GetCtorName(file_language_);
880     std::string cctor_name = panda::panda_file::GetCctorName(file_language_);
881 
882     const bool is_ctor = (method_name_raw == ctor_name);
883     const bool is_cctor = (method_name_raw == cctor_name);
884 
885     if (is_ctor) {
886         method->metadata->SetAttribute("ctor");
887         method->name.replace(method->name.find(ctor_name), ctor_name.length(), "_ctor_");
888     } else if (is_cctor) {
889         method->metadata->SetAttribute("cctor");
890         method->name.replace(method->name.find(cctor_name), cctor_name.length(), "_cctor_");
891     }
892 }
893 
GetMetaData(pandasm::Record * record,const panda_file::File::EntityId & record_id) const894 void Disassembler::GetMetaData(pandasm::Record *record, const panda_file::File::EntityId &record_id) const
895 {
896     LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nrecord id: " << record_id << " (0x" << std::hex << record_id
897                              << ")";
898 
899     if (record == nullptr) {
900         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but record ptr expected!";
901 
902         return;
903     }
904 
905     if (file_->IsExternal(record_id)) {
906         record->metadata->SetAttribute("external");
907     }
908 }
909 
GetMetaData(pandasm::Field * field,const panda_file::File::EntityId & field_id,bool is_scope_names_record)910 void Disassembler::GetMetaData(pandasm::Field *field,
911                                const panda_file::File::EntityId &field_id,
912                                bool is_scope_names_record)
913 {
914     LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nfield id: " << field_id << " (0x" << std::hex << field_id << ")";
915 
916     if (field == nullptr) {
917         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
918 
919         return;
920     }
921 
922     panda_file::FieldDataAccessor field_accessor(*file_, field_id);
923 
924     if (field_accessor.IsExternal()) {
925         field->metadata->SetAttribute("external");
926     }
927 
928     if (field_accessor.IsStatic()) {
929         field->metadata->SetAttribute("static");
930     }
931 
932     if (field->type.GetId() == panda_file::Type::TypeId::U32) {
933         const auto offset = field_accessor.GetValue<uint32_t>().value();
934         bool is_scope_name_field = is_scope_names_record || field->name == ark::SCOPE_NAMES;
935         if (field->name == ark::MODULE_REQUEST_PAHSE_IDX) {
936             module_request_phase_literals_.insert(offset);
937         } else if (field->name != ark::TYPE_SUMMARY_FIELD_NAME && !is_scope_name_field) {
938             LOG(DEBUG, DISASSEMBLER) << "Module literalarray " << field->name << " at offset 0x" << std::hex << offset
939                                      << " is excluded";
940             module_literals_.insert(offset);
941         }
942         field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::U32>(offset));
943     }
944     if (field->type.GetId() == panda_file::Type::TypeId::U8) {
945         const auto val = field_accessor.GetValue<uint8_t>().value();
946         field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::U8>(val));
947     }
948 }
949 
AnnotationTagToString(const char tag) const950 std::string Disassembler::AnnotationTagToString(const char tag) const
951 {
952     switch (tag) {
953         case '1':
954             return "u1";
955         case '2':
956             return "i8";
957         case '3':
958             return "u8";
959         case '4':
960             return "i16";
961         case '5':
962             return "u16";
963         case '6':
964             return "i32";
965         case '7':
966             return "u32";
967         case '8':
968             return "i64";
969         case '9':
970             return "u64";
971         case 'A':
972             return "f32";
973         case 'B':
974             return "f64";
975         case 'C':
976             return "string";
977         case 'D':
978             return "record";
979         case 'E':
980             return "method";
981         case 'F':
982             return "enum";
983         case 'G':
984             return "annotation";
985         case 'I':
986             return "void";
987         case 'J':
988             return "method_handle";
989         case 'K':
990             return "u1[]";
991         case 'L':
992             return "i8[]";
993         case 'M':
994             return "u8[]";
995         case 'N':
996             return "i16[]";
997         case 'O':
998             return "u16[]";
999         case 'P':
1000             return "i32[]";
1001         case 'Q':
1002             return "u32[]";
1003         case 'R':
1004             return "i64[]";
1005         case 'S':
1006             return "u64[]";
1007         case 'T':
1008             return "f32[]";
1009         case 'U':
1010             return "f64[]";
1011         case 'V':
1012             return "string[]";
1013         case 'W':
1014             return "record[]";
1015         case 'X':
1016             return "method[]";
1017         case 'Y':
1018             return "enum[]";
1019         case 'Z':
1020             return "annotation[]";
1021         case '@':
1022             return "method_handle[]";
1023         case '*':
1024             return "nullptr string";
1025         default:
1026             return std::string();
1027     }
1028 }
1029 
ScalarValueToString(const panda_file::ScalarValue & value,const std::string & type)1030 std::string Disassembler::ScalarValueToString(const panda_file::ScalarValue &value, const std::string &type)
1031 {
1032     std::stringstream ss;
1033 
1034     if (type == "i8") {
1035         int8_t res = value.Get<int8_t>();
1036         ss << static_cast<int>(res);
1037     } else if (type == "u1" || type == "u8") {
1038         uint8_t res = value.Get<uint8_t>();
1039         ss << static_cast<unsigned int>(res);
1040     } else if (type == "i16") {
1041         ss << value.Get<int16_t>();
1042     } else if (type == "u16") {
1043         ss << value.Get<uint16_t>();
1044     } else if (type == "i32") {
1045         ss << value.Get<int32_t>();
1046     } else if (type == "u32") {
1047         ss << value.Get<uint32_t>();
1048     } else if (type == "i64") {
1049         ss << value.Get<int64_t>();
1050     } else if (type == "u64") {
1051         ss << value.Get<uint64_t>();
1052     } else if (type == "f32") {
1053         ss << value.Get<float>();
1054     } else if (type == "f64") {
1055         ss << value.Get<double>();
1056     } else if (type == "string") {
1057         const auto id = value.Get<panda_file::File::EntityId>();
1058         ss << "\"" << StringDataToString(file_->GetStringData(id)) << "\"";
1059     } else if (type == "record") {
1060         const auto id = value.Get<panda_file::File::EntityId>();
1061         ss << GetFullRecordName(id);
1062     } else if (type == "method") {
1063         const auto id = value.Get<panda_file::File::EntityId>();
1064         AddMethodToTables(id);
1065         ss << GetMethodSignature(id);
1066     } else if (type == "enum") {
1067         const auto id = value.Get<panda_file::File::EntityId>();
1068         panda_file::FieldDataAccessor field_accessor(*file_, id);
1069         ss << GetFullRecordName(field_accessor.GetClassId()) << "."
1070            << StringDataToString(file_->GetStringData(field_accessor.GetNameId()));
1071     } else if (type == "annotation") {
1072         const auto id = value.Get<panda_file::File::EntityId>();
1073         ss << "id_" << id;
1074     } else if (type == "void") {
1075         return std::string();
1076     } else if (type == "method_handle") {
1077     }
1078 
1079     return ss.str();
1080 }
1081 
ArrayValueToString(const panda_file::ArrayValue & value,const std::string & type,const size_t idx)1082 std::string Disassembler::ArrayValueToString(const panda_file::ArrayValue &value, const std::string &type,
1083                                              const size_t idx)
1084 {
1085     std::stringstream ss;
1086 
1087     if (type == "i8") {
1088         int8_t res = value.Get<int8_t>(idx);
1089         ss << static_cast<int>(res);
1090     } else if (type == "u1" || type == "u8") {
1091         uint8_t res = value.Get<uint8_t>(idx);
1092         ss << static_cast<unsigned int>(res);
1093     } else if (type == "i16") {
1094         ss << value.Get<int16_t>(idx);
1095     } else if (type == "u16") {
1096         ss << value.Get<uint16_t>(idx);
1097     } else if (type == "i32") {
1098         ss << value.Get<int32_t>(idx);
1099     } else if (type == "u32") {
1100         ss << value.Get<uint32_t>(idx);
1101     } else if (type == "i64") {
1102         ss << value.Get<int64_t>(idx);
1103     } else if (type == "u64") {
1104         ss << value.Get<uint64_t>(idx);
1105     } else if (type == "f32") {
1106         ss << value.Get<float>(idx);
1107     } else if (type == "f64") {
1108         ss << value.Get<double>(idx);
1109     } else if (type == "string") {
1110         const auto id = value.Get<panda_file::File::EntityId>(idx);
1111         ss << '\"' << StringDataToString(file_->GetStringData(id)) << '\"';
1112     } else if (type == "record") {
1113         const auto id = value.Get<panda_file::File::EntityId>(idx);
1114         ss << GetFullRecordName(id);
1115     } else if (type == "method") {
1116         const auto id = value.Get<panda_file::File::EntityId>(idx);
1117         AddMethodToTables(id);
1118         ss << GetMethodSignature(id);
1119     } else if (type == "enum") {
1120         const auto id = value.Get<panda_file::File::EntityId>(idx);
1121         panda_file::FieldDataAccessor field_accessor(*file_, id);
1122         ss << GetFullRecordName(field_accessor.GetClassId()) << "."
1123            << StringDataToString(file_->GetStringData(field_accessor.GetNameId()));
1124     } else if (type == "annotation") {
1125         const auto id = value.Get<panda_file::File::EntityId>(idx);
1126         ss << "id_" << id;
1127     } else if (type == "method_handle") {
1128     } else if (type == "nullptr string") {
1129     }
1130 
1131     return ss.str();
1132 }
1133 
GetFullMethodName(const panda_file::File::EntityId & method_id) const1134 std::string Disassembler::GetFullMethodName(const panda_file::File::EntityId &method_id) const
1135 {
1136     panda::panda_file::MethodDataAccessor method_accessor(*file_, method_id);
1137 
1138     const auto method_name_raw = StringDataToString(file_->GetStringData(method_accessor.GetNameId()));
1139 
1140     std::string class_name = GetFullRecordName(method_accessor.GetClassId());
1141     if (IsSystemType(class_name)) {
1142         class_name = "";
1143     } else {
1144         class_name += ".";
1145     }
1146 
1147     return class_name + method_name_raw;
1148 }
1149 
GetMethodSignature(const panda_file::File::EntityId & method_id) const1150 std::string Disassembler::GetMethodSignature(const panda_file::File::EntityId &method_id) const
1151 {
1152     panda::panda_file::MethodDataAccessor method_accessor(*file_, method_id);
1153 
1154     pandasm::Function method(GetFullMethodName(method_id), file_language_);
1155     if (method_accessor.GetCodeId().has_value()) {
1156         GetParams(&method, method_accessor.GetCodeId().value());
1157     }
1158     GetMetaData(&method, method_id);
1159 
1160     return pandasm::GetFunctionSignatureFromName(method.name, method.params);
1161 }
1162 
GetFullRecordName(const panda_file::File::EntityId & class_id) const1163 std::string Disassembler::GetFullRecordName(const panda_file::File::EntityId &class_id) const
1164 {
1165     std::string name = StringDataToString(file_->GetStringData(class_id));
1166 
1167     auto type = pandasm::Type::FromDescriptor(name);
1168     type = pandasm::Type(type.GetComponentName(), type.GetRank());
1169 
1170     return type.GetPandasmName();
1171 }
1172 
GetRecordInfo(const panda_file::File::EntityId & record_id,RecordInfo * record_info) const1173 void Disassembler::GetRecordInfo(const panda_file::File::EntityId &record_id, RecordInfo *record_info) const
1174 {
1175     constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
1176 
1177     if (file_->IsExternal(record_id)) {
1178         return;
1179     }
1180 
1181     panda_file::ClassDataAccessor class_accessor {*file_, record_id};
1182     std::stringstream ss;
1183 
1184     ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1185        << class_accessor.GetClassId() << ", size: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH)
1186        << class_accessor.GetSize() << " (" << std::dec << class_accessor.GetSize() << ")";
1187 
1188     record_info->record_info = ss.str();
1189     ss.str(std::string());
1190 
1191     class_accessor.EnumerateFields([&](panda_file::FieldDataAccessor &field_accessor) -> void {
1192         ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1193            << field_accessor.GetFieldId();
1194 
1195         record_info->fields_info.push_back(ss.str());
1196 
1197         ss.str(std::string());
1198     });
1199 }
1200 
GetMethodInfo(const panda_file::File::EntityId & method_id,MethodInfo * method_info) const1201 void Disassembler::GetMethodInfo(const panda_file::File::EntityId &method_id, MethodInfo *method_info) const
1202 {
1203     constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
1204 
1205     panda_file::MethodDataAccessor method_accessor {*file_, method_id};
1206     std::stringstream ss;
1207 
1208     ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1209        << method_accessor.GetMethodId();
1210 
1211     if (method_accessor.GetCodeId().has_value()) {
1212         ss << ", code offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1213            << method_accessor.GetCodeId().value();
1214 
1215         GetInsInfo(method_accessor.GetCodeId().value(), method_info);
1216     } else {
1217         ss << ", <no code>";
1218     }
1219 
1220     method_info->method_info = ss.str();
1221 
1222     if (method_accessor.GetCodeId()) {
1223         ASSERT(debug_info_extractor_ != nullptr);
1224         method_info->line_number_table = debug_info_extractor_->GetLineNumberTable(method_id);
1225         method_info->column_number_table = debug_info_extractor_->GetColumnNumberTable(method_id);
1226         method_info->local_variable_table = debug_info_extractor_->GetLocalVariableTable(method_id);
1227 
1228         // Add information about parameters into the table
1229         panda_file::CodeDataAccessor codeda(*file_, method_accessor.GetCodeId().value());
1230         auto arg_idx = static_cast<int32_t>(codeda.GetNumVregs());
1231         uint32_t code_size = codeda.GetCodeSize();
1232         for (auto info : debug_info_extractor_->GetParameterInfo(method_id)) {
1233             panda_file::LocalVariableInfo arg_info {info.name, info.signature, "", arg_idx++, 0, code_size};
1234             method_info->local_variable_table.emplace_back(arg_info);
1235         }
1236     }
1237 }
1238 
IsArray(const panda_file::LiteralTag & tag)1239 static bool IsArray(const panda_file::LiteralTag &tag)
1240 {
1241     switch (tag) {
1242         case panda_file::LiteralTag::ARRAY_U1:
1243         case panda_file::LiteralTag::ARRAY_U8:
1244         case panda_file::LiteralTag::ARRAY_I8:
1245         case panda_file::LiteralTag::ARRAY_U16:
1246         case panda_file::LiteralTag::ARRAY_I16:
1247         case panda_file::LiteralTag::ARRAY_U32:
1248         case panda_file::LiteralTag::ARRAY_I32:
1249         case panda_file::LiteralTag::ARRAY_U64:
1250         case panda_file::LiteralTag::ARRAY_I64:
1251         case panda_file::LiteralTag::ARRAY_F32:
1252         case panda_file::LiteralTag::ARRAY_F64:
1253         case panda_file::LiteralTag::ARRAY_STRING:
1254             return true;
1255         default:
1256             return false;
1257     }
1258 }
1259 
SerializeLiteralArray(const pandasm::LiteralArray & lit_array) const1260 std::string Disassembler::SerializeLiteralArray(const pandasm::LiteralArray &lit_array) const
1261 {
1262     std::stringstream ret;
1263     if (lit_array.literals_.empty()) {
1264         return "";
1265     }
1266 
1267     std::stringstream ss;
1268     ss << "{ ";
1269     const auto &tag = lit_array.literals_[0].tag_;
1270     if (IsArray(tag)) {
1271         ss << LiteralTagToString(tag);
1272     }
1273     ss << lit_array.literals_.size();
1274     ss << " [ ";
1275     SerializeValues(lit_array, ss);
1276     ss << "]}";
1277     return ss.str();
1278 }
1279 
Serialize(const std::string & key,const pandasm::LiteralArray & lit_array,std::ostream & os) const1280 void Disassembler::Serialize(const std::string &key, const pandasm::LiteralArray &lit_array, std::ostream &os) const
1281 {
1282     os << key << " ";
1283     os << SerializeLiteralArray(lit_array);
1284     os << "\n";
1285 }
1286 
Serialize(const std::string & module_offset,const std::vector<std::string> & module_array,std::ostream & os) const1287 void Disassembler::Serialize(const std::string &module_offset, const std::vector<std::string> &module_array,
1288                              std::ostream &os) const
1289 {
1290     os << module_offset << " ";
1291     os << SerializeModuleLiteralArray(module_array);
1292     os << "\n";
1293 }
1294 
SerializeModuleLiteralArray(const std::vector<std::string> & module_array) const1295 std::string Disassembler::SerializeModuleLiteralArray(const std::vector<std::string> &module_array) const
1296 {
1297     if (module_array.empty()) {
1298         return "";
1299     }
1300 
1301     std::stringstream ss;
1302     ss << "{ ";
1303     ss << (module_array.size() - 1); // Only needs to show the count of module tag, exclude module request array
1304     ss << " [\n";
1305     for (size_t index = 0; index < module_array.size(); index++) {
1306         ss << module_array[index] << ";\n";
1307     }
1308     ss << "]}";
1309     return ss.str();
1310 }
1311 
LiteralTagToString(const panda_file::LiteralTag & tag) const1312 std::string Disassembler::LiteralTagToString(const panda_file::LiteralTag &tag) const
1313 {
1314     switch (tag) {
1315         case panda_file::LiteralTag::BOOL:
1316         case panda_file::LiteralTag::ARRAY_U1:
1317             return "u1";
1318         case panda_file::LiteralTag::ARRAY_U8:
1319             return "u8";
1320         case panda_file::LiteralTag::ARRAY_I8:
1321             return "i8";
1322         case panda_file::LiteralTag::ARRAY_U16:
1323             return "u16";
1324         case panda_file::LiteralTag::ARRAY_I16:
1325             return "i16";
1326         case panda_file::LiteralTag::ARRAY_U32:
1327             return "u32";
1328         case panda_file::LiteralTag::INTEGER:
1329         case panda_file::LiteralTag::ARRAY_I32:
1330             return "i32";
1331         case panda_file::LiteralTag::ARRAY_U64:
1332             return "u64";
1333         case panda_file::LiteralTag::ARRAY_I64:
1334             return "i64";
1335         case panda_file::LiteralTag::ARRAY_F32:
1336             return "f32";
1337         case panda_file::LiteralTag::DOUBLE:
1338         case panda_file::LiteralTag::ARRAY_F64:
1339             return "f64";
1340         case panda_file::LiteralTag::STRING:
1341         case panda_file::LiteralTag::ARRAY_STRING:
1342             return "string";
1343         case panda_file::LiteralTag::METHOD:
1344             return "method";
1345         case panda_file::LiteralTag::GETTER:
1346             return "getter";
1347         case panda_file::LiteralTag::SETTER:
1348             return "setter";
1349         case panda_file::LiteralTag::GENERATORMETHOD:
1350             return "generator_method";
1351         case panda_file::LiteralTag::ACCESSOR:
1352             return "accessor";
1353         case panda_file::LiteralTag::METHODAFFILIATE:
1354             return "method_affiliate";
1355         case panda_file::LiteralTag::NULLVALUE:
1356             return "null_value";
1357         case panda_file::LiteralTag::TAGVALUE:
1358             return "tagvalue";
1359         case panda_file::LiteralTag::LITERALBUFFERINDEX:
1360             return "lit_index";
1361         case panda_file::LiteralTag::LITERALARRAY:
1362             return "lit_offset";
1363         case panda_file::LiteralTag::BUILTINTYPEINDEX:
1364             return "builtin_type";
1365         default:
1366             UNREACHABLE();
1367     }
1368 }
1369 
1370 template <typename T>
SerializeValues(const pandasm::LiteralArray & lit_array,T & os) const1371 void Disassembler::SerializeValues(const pandasm::LiteralArray &lit_array, T &os) const
1372 {
1373     switch (lit_array.literals_[0].tag_) {
1374         case panda_file::LiteralTag::ARRAY_U1: {
1375             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1376                 os << std::get<bool>(lit_array.literals_[i].value_) << " ";
1377             }
1378             break;
1379         }
1380         case panda_file::LiteralTag::ARRAY_U8: {
1381             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1382                 os << static_cast<uint16_t>(std::get<uint8_t>(lit_array.literals_[i].value_)) << " ";
1383             }
1384             break;
1385         }
1386         case panda_file::LiteralTag::ARRAY_I8: {
1387             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1388                 os << static_cast<int16_t>(bit_cast<int8_t>(std::get<uint8_t>(lit_array.literals_[i].value_))) << " ";
1389             }
1390             break;
1391         }
1392         case panda_file::LiteralTag::ARRAY_U16: {
1393             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1394                 os << std::get<uint16_t>(lit_array.literals_[i].value_) << " ";
1395             }
1396             break;
1397         }
1398         case panda_file::LiteralTag::ARRAY_I16: {
1399             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1400                 os << bit_cast<int16_t>(std::get<uint16_t>(lit_array.literals_[i].value_)) << " ";
1401             }
1402             break;
1403         }
1404         case panda_file::LiteralTag::ARRAY_U32: {
1405             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1406                 os << std::get<uint32_t>(lit_array.literals_[i].value_) << " ";
1407             }
1408             break;
1409         }
1410         case panda_file::LiteralTag::ARRAY_I32: {
1411             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1412                 os << bit_cast<int32_t>(std::get<uint32_t>(lit_array.literals_[i].value_)) << " ";
1413             }
1414             break;
1415         }
1416         case panda_file::LiteralTag::ARRAY_U64: {
1417             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1418                 os << std::get<uint64_t>(lit_array.literals_[i].value_) << " ";
1419             }
1420             break;
1421         }
1422         case panda_file::LiteralTag::ARRAY_I64: {
1423             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1424                 os << bit_cast<int64_t>(std::get<uint64_t>(lit_array.literals_[i].value_)) << " ";
1425             }
1426             break;
1427         }
1428         case panda_file::LiteralTag::ARRAY_F32: {
1429             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1430                 os << std::get<float>(lit_array.literals_[i].value_) << " ";
1431             }
1432             break;
1433         }
1434         case panda_file::LiteralTag::ARRAY_F64: {
1435             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1436                 os << std::get<double>(lit_array.literals_[i].value_) << " ";
1437             }
1438             break;
1439         }
1440         case panda_file::LiteralTag::ARRAY_STRING: {
1441             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1442                 os << "\"" << std::get<std::string>(lit_array.literals_[i].value_) << "\" ";
1443             }
1444             break;
1445         }
1446         default:
1447             SerializeLiterals(lit_array, os);
1448     }
1449 }
1450 
1451 template <typename T>
SerializeLiterals(const pandasm::LiteralArray & lit_array,T & os) const1452 void Disassembler::SerializeLiterals(const pandasm::LiteralArray &lit_array, T &os) const
1453 {
1454     for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1455         const auto &tag = lit_array.literals_[i].tag_;
1456         os << LiteralTagToString(tag) << ":";
1457         const auto &val = lit_array.literals_[i].value_;
1458         switch (lit_array.literals_[i].tag_) {
1459             case panda_file::LiteralTag::BOOL: {
1460                 os << std::get<bool>(val);
1461                 break;
1462             }
1463             case panda_file::LiteralTag::LITERALBUFFERINDEX:
1464             case panda_file::LiteralTag::INTEGER: {
1465                 os << bit_cast<int32_t>(std::get<uint32_t>(val));
1466                 break;
1467             }
1468             case panda_file::LiteralTag::DOUBLE: {
1469                 os << std::get<double>(val);
1470                 break;
1471             }
1472             case panda_file::LiteralTag::STRING: {
1473                 os << "\"" << std::get<std::string>(val) << "\"";
1474                 break;
1475             }
1476             case panda_file::LiteralTag::METHOD:
1477             case panda_file::LiteralTag::GETTER:
1478             case panda_file::LiteralTag::SETTER:
1479             case panda_file::LiteralTag::GENERATORMETHOD: {
1480                 os << std::get<std::string>(val);
1481                 break;
1482             }
1483             case panda_file::LiteralTag::NULLVALUE:
1484             case panda_file::LiteralTag::ACCESSOR: {
1485                 os << static_cast<int16_t>(bit_cast<int8_t>(std::get<uint8_t>(val)));
1486                 break;
1487             }
1488             case panda_file::LiteralTag::METHODAFFILIATE: {
1489                 os << std::get<uint16_t>(val);
1490                 break;
1491             }
1492             case panda_file::LiteralTag::LITERALARRAY: {
1493                 os << std::get<std::string>(val);
1494                 break;
1495             }
1496             case panda_file::LiteralTag::BUILTINTYPEINDEX: {
1497                 os << static_cast<int16_t>(std::get<uint8_t>(val));
1498                 break;
1499             }
1500             default:
1501                 UNREACHABLE();
1502         }
1503         os << ", ";
1504     }
1505 }
1506 
Serialize(const pandasm::Record & record,std::ostream & os,bool print_information) const1507 void Disassembler::Serialize(const pandasm::Record &record, std::ostream &os, bool print_information) const
1508 {
1509     if (IsSystemType(record.name)) {
1510         return;
1511     }
1512 
1513     os << ".record " << record.name;
1514 
1515     const auto record_iter = prog_ann_.record_annotations.find(record.name);
1516     const bool record_in_table = record_iter != prog_ann_.record_annotations.end();
1517     if (record_in_table) {
1518         Serialize(*record.metadata, record_iter->second.ann_list, os);
1519     } else {
1520         Serialize(*record.metadata, {}, os);
1521     }
1522 
1523     if (record.metadata->IsForeign()) {
1524         os << "\n\n";
1525         return;
1526     }
1527 
1528     os << " {";
1529 
1530     if (print_information && prog_info_.records_info.find(record.name) != prog_info_.records_info.end()) {
1531         os << " # " << prog_info_.records_info.at(record.name).record_info << "\n";
1532         SerializeFields(record, os, true);
1533     } else {
1534         os << "\n";
1535         SerializeFields(record, os, false);
1536     }
1537 
1538     os << "}\n\n";
1539 }
1540 
SerializeFields(const pandasm::Record & record,std::ostream & os,bool print_information) const1541 void Disassembler::SerializeFields(const pandasm::Record &record, std::ostream &os, bool print_information) const
1542 {
1543     constexpr size_t INFO_OFFSET = 80;
1544 
1545     const auto record_iter = prog_ann_.record_annotations.find(record.name);
1546     const bool record_in_table = record_iter != prog_ann_.record_annotations.end();
1547 
1548     const auto rec_inf = (print_information) ? (prog_info_.records_info.at(record.name)) : (RecordInfo {});
1549 
1550     size_t field_idx = 0;
1551 
1552     std::stringstream ss;
1553     for (const auto &f : record.field_list) {
1554         std::string file = GetFileNameByPath(f.name);
1555         ss << "\t" << f.type.GetPandasmName() << " " << file;
1556         if (f.metadata->GetValue().has_value()) {
1557             if (f.type.GetId() == panda_file::Type::TypeId::U32) {
1558                 ss << " = 0x" << std::hex << f.metadata->GetValue().value().GetValue<uint32_t>();
1559             }
1560             if (f.type.GetId() == panda_file::Type::TypeId::U8) {
1561                 ss << " = 0x" << std::hex << static_cast<uint32_t>(f.metadata->GetValue().value().GetValue<uint8_t>());
1562             }
1563         }
1564         if (record_in_table) {
1565             const auto field_iter = record_iter->second.field_annotations.find(f.name);
1566             if (field_iter != record_iter->second.field_annotations.end()) {
1567                 Serialize(*f.metadata, field_iter->second, ss);
1568             } else {
1569                 Serialize(*f.metadata, {}, ss);
1570             }
1571         } else {
1572             Serialize(*f.metadata, {}, ss);
1573         }
1574 
1575         if (print_information) {
1576             os << std::setw(INFO_OFFSET) << std::left << ss.str() << " # " << rec_inf.fields_info.at(field_idx) << "\n";
1577         } else {
1578             os << ss.str() << "\n";
1579         }
1580 
1581         ss.str(std::string());
1582         ss.clear();
1583 
1584         field_idx++;
1585     }
1586 }
1587 
SerializeMethodAnnotations(const pandasm::Function & method,std::ostream & os) const1588 void Disassembler::SerializeMethodAnnotations(const pandasm::Function &method, std::ostream &os) const
1589 {
1590     const auto annotations = method.metadata->GetAnnotations();
1591     if (annotations.empty()) {
1592         return;
1593     }
1594 
1595     for (const auto &ann : annotations) {
1596         os << ann.GetName() << ":\n";
1597         std::stringstream ss;
1598         std::vector<pandasm::AnnotationElement> elements = ann.GetElements();
1599         if (elements.empty()) {
1600             continue;
1601         }
1602         uint32_t idx = elements.size() - 1;
1603         ss << "\t" << "u32" << " " << elements.back().GetName() << " { ";
1604         for (const auto &elem : elements) {
1605             ss << "0x" << std::hex << elem.GetValue()->GetAsScalar()->GetValue<uint32_t>();
1606             if (idx > 0) {
1607                 ss << ", ";
1608             }
1609             --idx;
1610         }
1611         ss << " }";
1612         os << ss.str() << "\n";
1613     }
1614 }
1615 
SerializeInstructions(const pandasm::Function & method,std::ostream & os,const std::map<std::string,MethodInfo>::const_iterator & method_info_it,bool print_method_info) const1616 void Disassembler::SerializeInstructions(const pandasm::Function &method, std::ostream &os,
1617                                          const std::map<std::string, MethodInfo>::const_iterator &method_info_it,
1618                                          bool print_method_info) const
1619 {
1620     std::string delim = ": ";
1621     size_t width = 0;
1622     if (print_method_info) {
1623         for (const auto &i : method.ins) {
1624             size_t ins_size = i.ToString().size();
1625             if (i.set_label) {
1626                 ins_size = ins_size - i.label.size() - delim.length();
1627             }
1628 
1629             if (ins_size > width && ins_size < ark::INSTRUCTION_WIDTH_LIMIT) {
1630                 width = i.ToString().size();
1631             }
1632         }
1633     }
1634 
1635     for (size_t i = 0; i < method.ins.size(); i++) {
1636         std::string ins = method.ins[i].ToString("", true, method.regs_num);
1637         if (method.ins[i].set_label) {
1638             size_t pos = ins.find(delim);
1639             std::string label = ins.substr(0, pos);
1640             ins.erase(0, pos + delim.length());
1641             os << label << ":\n";
1642         }
1643 
1644         if (ins != "") {
1645             os << "\t" << std::setw(width) << std::left << ins;
1646             if (print_method_info && i < method_info_it->second.instructions_info.size()) {
1647                 os << " # " << method_info_it->second.instructions_info.at(i);
1648             }
1649             os << "\n";
1650         }
1651     }
1652 }
1653 
Serialize(const pandasm::Function & method,std::ostream & os,bool print_information) const1654 void Disassembler::Serialize(const pandasm::Function &method, std::ostream &os, bool print_information) const
1655 {
1656     SerializeMethodAnnotations(method, os);
1657     os << ".function " << method.return_type.GetPandasmName() << " " << method.name << "(";
1658 
1659     if (method.params.size() > 0) {
1660         os << method.params[0].type.GetPandasmName() << " a0";
1661 
1662         for (uint8_t i = 1; i < method.params.size(); i++) {
1663             os << ", " << method.params[i].type.GetPandasmName() << " a" << (size_t)i;
1664         }
1665     }
1666     os << ")";
1667 
1668     const std::string signature = pandasm::GetFunctionSignatureFromName(method.name, method.params);
1669 
1670     const auto method_iter = prog_ann_.method_annotations.find(signature);
1671     if (method_iter != prog_ann_.method_annotations.end()) {
1672         Serialize(*method.metadata, method_iter->second, os);
1673     } else {
1674         Serialize(*method.metadata, {}, os);
1675     }
1676 
1677     auto method_info_it = prog_info_.methods_info.find(signature);
1678     bool print_method_info = print_information && method_info_it != prog_info_.methods_info.end();
1679     if (print_method_info) {
1680         os << " { # " << method_info_it->second.method_info << "\n#   CODE:\n";
1681     } else {
1682         os << " {\n";
1683     }
1684     SerializeInstructions(method, os, method_info_it, print_method_info);
1685 
1686     if (method.catch_blocks.size() != 0) {
1687         os << "\n";
1688 
1689         for (const auto &catch_block : method.catch_blocks) {
1690             Serialize(catch_block, os);
1691 
1692             os << "\n";
1693         }
1694     }
1695 
1696     if (print_method_info) {
1697         const MethodInfo &method_info = method_info_it->second;
1698         SerializeLineNumberTable(method_info.line_number_table, os);
1699         SerializeColumnNumberTable(method_info.column_number_table, os);
1700         SerializeLocalVariableTable(method_info.local_variable_table, method, os);
1701     }
1702 
1703     os << "}\n\n";
1704 }
1705 
SerializeStrings(const panda_file::File::EntityId & offset,const std::string & name_value,std::ostream & os) const1706 void Disassembler::SerializeStrings(const panda_file::File::EntityId &offset, const std::string &name_value,
1707                                     std::ostream &os) const
1708 {
1709     os << "[offset:0x" << std::hex <<offset<< ", name_value:" << name_value<< "]" <<std::endl;
1710 }
1711 
Serialize(const pandasm::Function::CatchBlock & catch_block,std::ostream & os) const1712 void Disassembler::Serialize(const pandasm::Function::CatchBlock &catch_block, std::ostream &os) const
1713 {
1714     if (catch_block.exception_record == "") {
1715         os << ".catchall ";
1716     } else {
1717         os << ".catch " << catch_block.exception_record << ", ";
1718     }
1719 
1720     os << catch_block.try_begin_label << ", " << catch_block.try_end_label << ", " << catch_block.catch_begin_label;
1721 
1722     if (catch_block.catch_end_label != "") {
1723         os << ", " << catch_block.catch_end_label;
1724     }
1725 }
1726 
Serialize(const pandasm::ItemMetadata & meta,const AnnotationList & ann_list,std::ostream & os) const1727 void Disassembler::Serialize(const pandasm::ItemMetadata &meta, const AnnotationList &ann_list, std::ostream &os) const
1728 {
1729     auto bool_attributes = meta.GetBoolAttributes();
1730     auto attributes = meta.GetAttributes();
1731     if (bool_attributes.empty() && attributes.empty() && ann_list.empty()) {
1732         return;
1733     }
1734 
1735     os << " <";
1736 
1737     size_t size = bool_attributes.size();
1738     size_t idx = 0;
1739     for (const auto &attr : bool_attributes) {
1740         os << attr;
1741         ++idx;
1742 
1743         if (!attributes.empty() || !ann_list.empty() || idx < size) {
1744             os << ", ";
1745         }
1746     }
1747 
1748     size = attributes.size();
1749     idx = 0;
1750     for (const auto &[key, values] : attributes) {
1751         for (size_t i = 0; i < values.size(); i++) {
1752             os << key << "=" << values[i];
1753 
1754             if (i < values.size() - 1) {
1755                 os << ", ";
1756             }
1757         }
1758 
1759         ++idx;
1760 
1761         if (!ann_list.empty() || idx < size) {
1762             os << ", ";
1763         }
1764     }
1765 
1766     size = ann_list.size();
1767     idx = 0;
1768     for (const auto &[key, value] : ann_list) {
1769         os << key << "=" << value;
1770 
1771         ++idx;
1772 
1773         if (idx < size) {
1774             os << ", ";
1775         }
1776     }
1777 
1778     os << ">";
1779 }
1780 
SerializeLineNumberTable(const panda_file::LineNumberTable & line_number_table,std::ostream & os) const1781 void Disassembler::SerializeLineNumberTable(const panda_file::LineNumberTable &line_number_table,
1782                                             std::ostream &os) const
1783 {
1784     if (line_number_table.empty()) {
1785         return;
1786     }
1787 
1788     os << "\n#   LINE_NUMBER_TABLE:\n";
1789     for (const auto &line_info : line_number_table) {
1790         os << "#\tline " << line_info.line << ": " << line_info.offset << "\n";
1791     }
1792 }
1793 
SerializeColumnNumberTable(const panda_file::ColumnNumberTable & column_number_table,std::ostream & os) const1794 void Disassembler::SerializeColumnNumberTable(const panda_file::ColumnNumberTable &column_number_table,
1795                                               std::ostream &os) const
1796 {
1797     if (column_number_table.empty()) {
1798         return;
1799     }
1800 
1801     os << "\n#   COLUMN_NUMBER_TABLE:\n";
1802     for (const auto &column_info : column_number_table) {
1803         os << "#\tcolumn " << column_info.column << ": " << column_info.offset << "\n";
1804     }
1805 }
1806 
SerializeLocalVariableTable(const panda_file::LocalVariableTable & local_variable_table,const pandasm::Function & method,std::ostream & os) const1807 void Disassembler::SerializeLocalVariableTable(const panda_file::LocalVariableTable &local_variable_table,
1808                                                const pandasm::Function &method, std::ostream &os) const
1809 {
1810     if (local_variable_table.empty()) {
1811         return;
1812     }
1813 
1814     os << "\n#   LOCAL_VARIABLE_TABLE:\n";
1815     os << "#\t Start   End  Register           Name   Signature\n";
1816     const int START_WIDTH = 5;
1817     const int END_WIDTH = 4;
1818     const int REG_WIDTH = 8;
1819     const int NAME_WIDTH = 14;
1820     for (const auto &variable_info : local_variable_table) {
1821         std::ostringstream reg_stream;
1822         reg_stream << variable_info.reg_number << '(';
1823         if (variable_info.reg_number < 0) {
1824             reg_stream << "acc";
1825         } else {
1826             uint32_t vreg = variable_info.reg_number;
1827             uint32_t first_arg_reg = method.GetTotalRegs();
1828             if (vreg < first_arg_reg) {
1829                 reg_stream << 'v' << vreg;
1830             } else {
1831                 reg_stream << 'a' << vreg - first_arg_reg;
1832             }
1833         }
1834         reg_stream << ')';
1835 
1836         os << "#\t " << std::setw(START_WIDTH) << std::right << variable_info.start_offset << "  ";
1837         os << std::setw(END_WIDTH) << std::right << variable_info.end_offset << "  ";
1838         os << std::setw(REG_WIDTH) << std::right << reg_stream.str() << " ";
1839         os << std::setw(NAME_WIDTH) << std::right << variable_info.name << "   " << variable_info.type;
1840         if (!variable_info.type_signature.empty() && variable_info.type_signature != variable_info.type) {
1841             os << " (" << variable_info.type_signature << ")";
1842         }
1843         os << "\n";
1844     }
1845 }
1846 
BytecodeOpcodeToPandasmOpcode(uint8_t o) const1847 pandasm::Opcode Disassembler::BytecodeOpcodeToPandasmOpcode(uint8_t o) const
1848 {
1849     return BytecodeOpcodeToPandasmOpcode(BytecodeInstruction::Opcode(o));
1850 }
1851 
IDToString(BytecodeInstruction bc_ins,panda_file::File::EntityId method_id,size_t idx) const1852 std::string Disassembler::IDToString(BytecodeInstruction bc_ins, panda_file::File::EntityId method_id,
1853                                      size_t idx) const
1854 {
1855     std::stringstream name;
1856     const auto offset = file_->ResolveOffsetByIndex(method_id, bc_ins.GetId(idx).AsIndex());
1857     std::string str_data = StringDataToString(file_->GetStringData(offset));
1858     if (bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::METHOD_ID)) {
1859         name << GetMethodSignature(offset);
1860     } else if (bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::STRING_ID)) {
1861         name << '\"';
1862         name << str_data;
1863         name << '\"';
1864         string_offset_to_name_.emplace(offset, str_data);
1865     } else {
1866         ASSERT(bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::LITERALARRAY_ID));
1867         pandasm::LiteralArray lit_array;
1868         GetLiteralArrayByOffset(&lit_array, panda_file::File::EntityId(offset));
1869         name << SerializeLiteralArray(lit_array);
1870     }
1871 
1872     return name.str();
1873 }
1874 
GetRecordLanguage(panda_file::File::EntityId class_id) const1875 panda::panda_file::SourceLang Disassembler::GetRecordLanguage(panda_file::File::EntityId class_id) const
1876 {
1877     if (file_->IsExternal(class_id)) {
1878         return panda::panda_file::SourceLang::PANDA_ASSEMBLY;
1879     }
1880 
1881     panda_file::ClassDataAccessor cda(*file_, class_id);
1882     return cda.GetSourceLang().value_or(panda_file::SourceLang::PANDA_ASSEMBLY);
1883 }
1884 
translateImmToLabel(pandasm::Ins * pa_ins,LabelTable * label_table,const uint8_t * ins_arr,BytecodeInstruction bc_ins,BytecodeInstruction bc_ins_last,panda_file::File::EntityId code_id)1885 static void translateImmToLabel(pandasm::Ins *pa_ins, LabelTable *label_table, const uint8_t *ins_arr,
1886                                 BytecodeInstruction bc_ins, BytecodeInstruction bc_ins_last,
1887                                 panda_file::File::EntityId code_id)
1888 {
1889     const int32_t jmp_offset = std::get<int64_t>(pa_ins->imms.at(0));
1890     const auto bc_ins_dest = bc_ins.JumpTo(jmp_offset);
1891     if (bc_ins_last.GetAddress() > bc_ins_dest.GetAddress()) {
1892         size_t idx = getBytecodeInstructionNumber(BytecodeInstruction(ins_arr), bc_ins_dest);
1893         if (idx != std::numeric_limits<size_t>::max()) {
1894             if (label_table->find(idx) == label_table->end()) {
1895                 std::stringstream ss {};
1896                 ss << "jump_label_" << label_table->size();
1897                 (*label_table)[idx] = ss.str();
1898             }
1899 
1900             pa_ins->imms.clear();
1901             pa_ins->ids.push_back(label_table->at(idx));
1902         } else {
1903             LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
1904                                      << "). incorrect instruction at offset: 0x" << (bc_ins.GetAddress() - ins_arr)
1905                                      << ": invalid jump offset 0x" << jmp_offset
1906                                      << " - jumping in the middle of another instruction!";
1907         }
1908     } else {
1909         LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
1910                                  << "). incorrect instruction at offset: 0x" << (bc_ins.GetAddress() - ins_arr)
1911                                  << ": invalid jump offset 0x" << jmp_offset << " - jumping out of bounds!";
1912     }
1913 }
1914 
GetInstructions(pandasm::Function * method,panda_file::File::EntityId method_id,panda_file::File::EntityId code_id) const1915 IdList Disassembler::GetInstructions(pandasm::Function *method, panda_file::File::EntityId method_id,
1916                                      panda_file::File::EntityId code_id) const
1917 {
1918     panda_file::CodeDataAccessor code_accessor(*file_, code_id);
1919 
1920     const auto ins_sz = code_accessor.GetCodeSize();
1921     const auto ins_arr = code_accessor.GetInstructions();
1922 
1923     method->regs_num = code_accessor.GetNumVregs();
1924 
1925     auto bc_ins = BytecodeInstruction(ins_arr);
1926     const auto bc_ins_last = bc_ins.JumpTo(ins_sz);
1927 
1928     LabelTable label_table = GetExceptions(method, method_id, code_id);
1929 
1930     IdList unknown_external_methods {};
1931 
1932     while (bc_ins.GetAddress() != bc_ins_last.GetAddress()) {
1933         if (bc_ins.GetAddress() > bc_ins_last.GetAddress()) {
1934             LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
1935                                      << "). bytecode instructions sequence corrupted for method " << method->name
1936                                      << "! went out of bounds";
1937 
1938             break;
1939         }
1940 
1941         auto pa_ins = BytecodeInstructionToPandasmInstruction(bc_ins, method_id);
1942         if (pa_ins.IsJump()) {
1943             translateImmToLabel(&pa_ins, &label_table, ins_arr, bc_ins, bc_ins_last, code_id);
1944         }
1945 
1946         // check if method id is unknown external method. if so, emplace it in table
1947         if (bc_ins.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) {
1948             const auto arg_method_idx = bc_ins.GetId().AsIndex();
1949             const auto arg_method_id = file_->ResolveMethodIndex(method_id, arg_method_idx);
1950 
1951             const auto arg_method_signature = GetMethodSignature(arg_method_id);
1952 
1953             const bool is_present = prog_.function_table.find(arg_method_signature) != prog_.function_table.cend();
1954             const bool is_external = file_->IsExternal(arg_method_id);
1955             if (is_external && !is_present) {
1956                 unknown_external_methods.push_back(arg_method_id);
1957             }
1958         }
1959 
1960         method->AddInstruction(pa_ins);
1961         bc_ins = bc_ins.GetNext();
1962     }
1963 
1964     size_t instruction_count = method->ins.size();
1965     for (const auto &pair : label_table) {
1966         if (pair.first > instruction_count) {
1967             LOG(ERROR, DISASSEMBLER) << "> Wrong label index got, count of instructions is " << instruction_count
1968                                      << ", but the label index is " << pair.first;
1969             continue;
1970         }
1971 
1972         // In some case, the end label can be after the last instruction
1973         // Creating an invalid instruction for the label to make sure it can be serialized
1974         if (pair.first == instruction_count) {
1975             pandasm::Ins ins{};
1976             ins.opcode = pandasm::Opcode::INVALID;
1977             method->AddInstruction(ins);
1978         }
1979 
1980         method->ins[pair.first].label = pair.second;
1981         method->ins[pair.first].set_label = true;
1982     }
1983 
1984     return unknown_external_methods;
1985 }
1986 
GetColumnNumber()1987 std::vector<size_t> Disassembler::GetColumnNumber()
1988 {
1989     std::vector<size_t> columnNumber;
1990     for (const auto &method_info : prog_info_.methods_info) {
1991         for (const auto &column_number : method_info.second.column_number_table) {
1992             columnNumber.push_back(column_number.column);
1993         }
1994     }
1995     return columnNumber;
1996 }
1997 
GetLineNumber()1998 std::vector<size_t> Disassembler::GetLineNumber()
1999 {
2000     std::vector<size_t> lineNumber;
2001     for (const auto &method_info : prog_info_.methods_info) {
2002         for (const auto &line_number : method_info.second.line_number_table) {
2003             lineNumber.push_back(line_number.line);
2004         }
2005     }
2006     return lineNumber;
2007 }
2008 
2009 }  // namespace panda::disasm
2010