• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2025 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "disassembler.h"
17 #include "libpandafile/util/collect_util.h"
18 #include "mangling.h"
19 #include "utils/logger.h"
20 #include "utils/const_value.h"
21 
22 #include <iomanip>
23 #include <type_traits>
24 
25 #include "get_language_specific_metadata.inc"
26 
27 namespace panda::disasm {
28 
Disassemble(const std::string & filename_in,const bool quiet,const bool skip_strings)29 void Disassembler::Disassemble(const std::string &filename_in, const bool quiet, const bool skip_strings)
30 {
31     auto file_new = panda_file::File::Open(filename_in);
32     file_.swap(file_new);
33 
34     if (file_ != nullptr) {
35         prog_ = pandasm::Program {};
36 
37         record_name_to_id_.clear();
38         method_name_to_id_.clear();
39         string_offset_to_name_.clear();
40         skip_strings_ = skip_strings;
41         quiet_ = quiet;
42 
43         prog_info_ = ProgInfo {};
44 
45         prog_ann_ = ProgAnnotations {};
46 
47         GetRecords();
48         GetLiteralArrays();
49 
50         GetLanguageSpecificMetadata();
51     } else {
52         LOG(ERROR, DISASSEMBLER) << "> unable to open specified pandafile: <" << filename_in << ">";
53     }
54 }
55 
CollectInfo()56 void Disassembler::CollectInfo()
57 {
58     LOG(DEBUG, DISASSEMBLER) << "\n[getting program info]\n";
59 
60     debug_info_extractor_ = std::make_unique<panda_file::DebugInfoExtractor>(file_.get());
61 
62     for (const auto &pair : record_name_to_id_) {
63         GetRecordInfo(pair.second, &prog_info_.records_info[pair.first]);
64     }
65 
66     for (const auto &pair : method_name_to_id_) {
67         GetMethodInfo(pair.second, &prog_info_.methods_info[pair.first]);
68     }
69 }
70 
Serialize(std::ostream & os,bool add_separators,bool print_information) const71 void Disassembler::Serialize(std::ostream &os, bool add_separators, bool print_information) const
72 {
73     if (os.bad()) {
74         LOG(DEBUG, DISASSEMBLER) << "> serialization failed. os bad\n";
75         return;
76     }
77 
78     if (file_ != nullptr) {
79         std::string abc_file = GetFileNameByPath(file_->GetFilename());
80         os << "# source binary: " << abc_file << "\n\n";
81     }
82 
83     if (add_separators) {
84         os << "# ====================\n"
85               "# LITERALS\n\n";
86     }
87 
88     LOG(DEBUG, DISASSEMBLER) << "[serializing literals]";
89 
90     for (const auto &[key, lit_arr] : prog_.literalarray_table) {
91         Serialize(key, lit_arr, os);
92     }
93 
94     for (const auto &[module_offset, array_table] : modulearray_table_) {
95         Serialize(module_offset, array_table, os);
96     }
97 
98     os << "\n";
99 
100     if (add_separators) {
101         os << "# ====================\n"
102               "# RECORDS\n\n";
103     }
104 
105     LOG(DEBUG, DISASSEMBLER) << "[serializing records]";
106 
107     for (const auto &r : prog_.record_table) {
108         Serialize(r.second, os, print_information);
109     }
110 
111     if (add_separators) {
112         os << "# ====================\n"
113               "# METHODS\n\n";
114     }
115 
116     LOG(DEBUG, DISASSEMBLER) << "[serializing methods]";
117 
118     for (const auto &m : prog_.function_table) {
119         Serialize(m.second, os, print_information);
120     }
121 
122     if (add_separators) {
123         os << "# ====================\n"
124               "# STRING\n\n";
125     }
126 
127     LOG(DEBUG, DISASSEMBLER) << "[serializing strings]";
128 
129     for (const auto &[offset, name_value] : string_offset_to_name_) {
130         SerializeStrings(offset, name_value, os);
131     }
132 }
133 
IsSystemType(const std::string & type_name)134 inline bool Disassembler::IsSystemType(const std::string &type_name)
135 {
136     bool is_array_type = type_name.find('[') != std::string::npos;
137     bool is_global = type_name == "_GLOBAL";
138 
139     return is_array_type || is_global;
140 }
141 
GetRecord(pandasm::Record * record,const panda_file::File::EntityId & record_id)142 void Disassembler::GetRecord(pandasm::Record *record, const panda_file::File::EntityId &record_id)
143 {
144     LOG(DEBUG, DISASSEMBLER) << "\n[getting record]\nid: " << record_id << " (0x" << std::hex << record_id << ")";
145 
146     if (record == nullptr) {
147         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but record ptr expected!";
148 
149         return;
150     }
151 
152     record->name = GetFullRecordName(record_id);
153 
154     LOG(DEBUG, DISASSEMBLER) << "name: " << record->name;
155 
156     GetMetaData(record, record_id);
157 
158     if (!file_->IsExternal(record_id)) {
159         GetMethods(record_id);
160         GetFields(record, record_id);
161     }
162 }
163 
AddMethodToTables(const panda_file::File::EntityId & method_id)164 void Disassembler::AddMethodToTables(const panda_file::File::EntityId &method_id)
165 {
166     pandasm::Function new_method("", GetMethodLanguage(method_id));
167     GetMethod(&new_method, method_id);
168 
169     const auto signature = pandasm::GetFunctionSignatureFromName(new_method.name, new_method.params);
170     if (prog_.function_table.find(signature) != prog_.function_table.end()) {
171         return;
172     }
173 
174     GetMethodAnnotations(new_method, method_id);
175     method_name_to_id_.emplace(signature, method_id);
176     prog_.function_synonyms[new_method.name].push_back(signature);
177     prog_.function_table.emplace(signature, std::move(new_method));
178 }
179 
GetMethod(pandasm::Function * method,const panda_file::File::EntityId & method_id)180 void Disassembler::GetMethod(pandasm::Function *method, const panda_file::File::EntityId &method_id)
181 {
182     LOG(DEBUG, DISASSEMBLER) << "\n[getting method]\nid: " << method_id << " (0x" << std::hex << method_id << ")";
183 
184     if (method == nullptr) {
185         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
186 
187         return;
188     }
189 
190     panda_file::MethodDataAccessor method_accessor(*file_, method_id);
191 
192     method->name = GetFullMethodName(method_id);
193 
194     LOG(DEBUG, DISASSEMBLER) << "name: " << method->name;
195 
196     GetMetaData(method, method_id);
197 
198     if (method_accessor.GetCodeId().has_value()) {
199         auto code_id = method_accessor.GetCodeId().value();
200         GetParams(method, code_id);
201         const IdList id_list = GetInstructions(method, method_id, code_id);
202 
203         for (const auto &id : id_list) {
204             AddMethodToTables(id);
205         }
206     } else {
207         LOG(ERROR, DISASSEMBLER) << "> error encountered at " << method_id << " (0x" << std::hex << method_id
208                                  << "). implementation of method expected, but no \'CODE\' tag was found!";
209 
210         return;
211     }
212 }
213 
214 template <typename T>
FillLiteralArrayData(pandasm::LiteralArray * lit_array,const panda_file::LiteralTag & tag,const panda_file::LiteralDataAccessor::LiteralValue & value) const215 void Disassembler::FillLiteralArrayData(pandasm::LiteralArray *lit_array, const panda_file::LiteralTag &tag,
216                                         const panda_file::LiteralDataAccessor::LiteralValue &value) const
217 {
218     panda_file::File::EntityId id(std::get<uint32_t>(value));
219     auto sp = file_->GetSpanFromId(id);
220     auto len = panda_file::helpers::Read<sizeof(uint32_t)>(&sp);
221     if (tag != panda_file::LiteralTag::ARRAY_STRING) {
222         for (size_t i = 0; i < len; i++) {
223             pandasm::LiteralArray::Literal lit;
224             lit.tag_ = tag;
225             lit.value_ = bit_cast<T>(panda_file::helpers::Read<sizeof(T)>(&sp));
226             lit_array->literals_.push_back(lit);
227         }
228         return;
229     }
230     for (size_t i = 0; i < len; i++) {
231         auto str_id = panda_file::helpers::Read<sizeof(T)>(&sp);
232         pandasm::LiteralArray::Literal lit;
233         lit.tag_ = tag;
234         lit.value_ = StringDataToString(file_->GetStringData(panda_file::File::EntityId(str_id)));
235         lit_array->literals_.push_back(lit);
236     }
237 }
238 
FillLiteralData(pandasm::LiteralArray * lit_array,const panda_file::LiteralDataAccessor::LiteralValue & value,const panda_file::LiteralTag & tag) const239 void Disassembler::FillLiteralData(pandasm::LiteralArray *lit_array,
240                                    const panda_file::LiteralDataAccessor::LiteralValue &value,
241                                    const panda_file::LiteralTag &tag) const
242 {
243     pandasm::LiteralArray::Literal lit;
244     lit.tag_ = tag;
245     switch (tag) {
246         case panda_file::LiteralTag::BOOL: {
247             lit.value_ = std::get<bool>(value);
248             break;
249         }
250         case panda_file::LiteralTag::ACCESSOR:
251         case panda_file::LiteralTag::NULLVALUE:
252         case panda_file::LiteralTag::BUILTINTYPEINDEX: {
253             lit.value_ = std::get<uint8_t>(value);
254             break;
255         }
256         case panda_file::LiteralTag::METHODAFFILIATE: {
257             lit.value_ = std::get<uint16_t>(value);
258             break;
259         }
260         case panda_file::LiteralTag::LITERALBUFFERINDEX:
261         case panda_file::LiteralTag::INTEGER: {
262             lit.value_ = std::get<uint32_t>(value);
263             break;
264         }
265         case panda_file::LiteralTag::DOUBLE: {
266             lit.value_ = std::get<double>(value);
267             break;
268         }
269         case panda_file::LiteralTag::STRING:
270         case panda_file::LiteralTag::ETS_IMPLEMENTS: {
271             auto str_data = file_->GetStringData(panda_file::File::EntityId(std::get<uint32_t>(value)));
272             lit.value_ = StringDataToString(str_data);
273             break;
274         }
275         case panda_file::LiteralTag::METHOD:
276         case panda_file::LiteralTag::GETTER:
277         case panda_file::LiteralTag::SETTER:
278         case panda_file::LiteralTag::GENERATORMETHOD: {
279             panda_file::MethodDataAccessor mda(*file_, panda_file::File::EntityId(std::get<uint32_t>(value)));
280             lit.value_ = StringDataToString(file_->GetStringData(mda.GetNameId()));
281             break;
282         }
283         case panda_file::LiteralTag::LITERALARRAY: {
284             std::stringstream ss;
285             ss << "0x" << std::hex << std::get<uint32_t>(value);
286             lit.value_ = ss.str();
287             break;
288         }
289         case panda_file::LiteralTag::TAGVALUE: {
290             return;
291         }
292         default: {
293             UNREACHABLE();
294         }
295     }
296     lit_array->literals_.push_back(lit);
297 }
298 
GetLiteralArrayByOffset(pandasm::LiteralArray * lit_array,panda_file::File::EntityId offset) const299 void Disassembler::GetLiteralArrayByOffset(pandasm::LiteralArray *lit_array, panda_file::File::EntityId offset) const
300 {
301     panda_file::LiteralDataAccessor lit_array_accessor(*file_, file_->GetLiteralArraysId());
302     lit_array_accessor.EnumerateLiteralVals(
303         offset, [this, lit_array](const panda_file::LiteralDataAccessor::LiteralValue &value,
304                                   const panda_file::LiteralTag &tag) {
305             switch (tag) {
306                 case panda_file::LiteralTag::ARRAY_U1: {
307                     FillLiteralArrayData<bool>(lit_array, tag, value);
308                     break;
309                 }
310                 case panda_file::LiteralTag::ARRAY_I8:
311                 case panda_file::LiteralTag::ARRAY_U8: {
312                     FillLiteralArrayData<uint8_t>(lit_array, tag, value);
313                     break;
314                 }
315                 case panda_file::LiteralTag::ARRAY_I16:
316                 case panda_file::LiteralTag::ARRAY_U16: {
317                     FillLiteralArrayData<uint16_t>(lit_array, tag, value);
318                     break;
319                 }
320                 case panda_file::LiteralTag::ARRAY_I32:
321                 case panda_file::LiteralTag::ARRAY_U32: {
322                     FillLiteralArrayData<uint32_t>(lit_array, tag, value);
323                     break;
324                 }
325                 case panda_file::LiteralTag::ARRAY_I64:
326                 case panda_file::LiteralTag::ARRAY_U64: {
327                     FillLiteralArrayData<uint64_t>(lit_array, tag, value);
328                     break;
329                 }
330                 case panda_file::LiteralTag::ARRAY_F32: {
331                     FillLiteralArrayData<float>(lit_array, tag, value);
332                     break;
333                 }
334                 case panda_file::LiteralTag::ARRAY_F64: {
335                     FillLiteralArrayData<double>(lit_array, tag, value);
336                     break;
337                 }
338                 case panda_file::LiteralTag::ARRAY_STRING: {
339                     FillLiteralArrayData<uint32_t>(lit_array, tag, value);
340                     break;
341                 }
342                 default: {
343                     FillLiteralData(lit_array, value, tag);
344                     break;
345                 }
346             }
347         });
348 }
349 
GetLiteralArray(pandasm::LiteralArray * lit_array,size_t index) const350 void Disassembler::GetLiteralArray(pandasm::LiteralArray *lit_array, size_t index) const
351 {
352     panda_file::LiteralDataAccessor lit_array_accessor(*file_, file_->GetLiteralArraysId());
353     GetLiteralArrayByOffset(lit_array, lit_array_accessor.GetLiteralArrayId(index));
354 }
355 
IsModuleLiteralOffset(const panda_file::File::EntityId & id) const356 bool Disassembler::IsModuleLiteralOffset(const panda_file::File::EntityId &id) const
357 {
358     return module_literals_.find(id.GetOffset()) != module_literals_.end();
359 }
360 
GetLiteralArrays()361 void Disassembler::GetLiteralArrays()
362 {
363     if (panda_file::ContainsLiteralArrayInHeader(file_->GetHeader()->version)) {
364         const auto lit_arrays_id = file_->GetLiteralArraysId();
365         LOG(DEBUG, DISASSEMBLER) << "\n[getting literal arrays]\nid: " << lit_arrays_id << " (0x" << std::hex
366                                  << lit_arrays_id << ")";
367 
368         panda_file::LiteralDataAccessor lda(*file_, lit_arrays_id);
369         size_t num_litarrays = lda.GetLiteralNum();
370         for (size_t index = 0; index < num_litarrays; index++) {
371             auto id = lda.GetLiteralArrayId(index);
372             if (module_request_phase_literals_.count(id.GetOffset())) {
373                 continue;
374             }
375             FillLiteralArrayTable(id, index);
376         }
377     } else {
378         panda::libpandafile::CollectUtil collect_util;
379         std::unordered_set<uint32_t> literal_array_ids;
380         collect_util.CollectLiteralArray(*file_, literal_array_ids);
381         size_t index = 0;
382         for (uint32_t literal_array_id : literal_array_ids) {
383             panda_file::File::EntityId id {literal_array_id};
384             FillLiteralArrayTable(id, index);
385             index++;
386         }
387     }
388 }
389 
FillLiteralArrayTable(panda_file::File::EntityId & id,size_t index)390 void Disassembler::FillLiteralArrayTable(panda_file::File::EntityId &id, size_t index)
391 {
392     if (IsModuleLiteralOffset(id)) {
393         std::stringstream ss;
394         ss << index << " 0x" << std::hex << id.GetOffset();
395         modulearray_table_.emplace(ss.str(), GetModuleLiteralArray(id));
396         return;
397     }
398     std::stringstream ss;
399     ss << index << " 0x" << std::hex << id.GetOffset();
400     panda::pandasm::LiteralArray lit_arr;
401     GetLiteralArrayByOffset(&lit_arr, id);
402     prog_.literalarray_table.emplace(ss.str(), lit_arr);
403 }
404 
ModuleTagToString(panda_file::ModuleTag & tag) const405 std::string Disassembler::ModuleTagToString(panda_file::ModuleTag &tag) const
406 {
407     switch (tag) {
408         case panda_file::ModuleTag::REGULAR_IMPORT:
409             return "REGULAR_IMPORT";
410         case panda_file::ModuleTag::NAMESPACE_IMPORT:
411             return "NAMESPACE_IMPORT";
412         case panda_file::ModuleTag::LOCAL_EXPORT:
413             return "LOCAL_EXPORT";
414         case panda_file::ModuleTag::INDIRECT_EXPORT:
415             return "INDIRECT_EXPORT";
416         case panda_file::ModuleTag::STAR_EXPORT:
417             return "STAR_EXPORT";
418         default: {
419             UNREACHABLE();
420             break;
421         }
422     }
423     return "";
424 }
425 
GetModuleLiteralArray(panda_file::File::EntityId & module_id) const426 std::vector<std::string> Disassembler::GetModuleLiteralArray(panda_file::File::EntityId &module_id) const
427 {
428     panda_file::ModuleDataAccessor mda(*file_, module_id);
429     const std::vector<uint32_t> &request_modules_offset = mda.getRequestModules();
430     std::vector<std::string> module_literal_array;
431     std::stringstream module_requests_stringstream;
432     module_requests_stringstream << "\tMODULE_REQUEST_ARRAY: {\n";
433     for (size_t index = 0; index < request_modules_offset.size(); ++index) {
434         module_requests_stringstream << "\t\t" << index <<
435             " : " << GetStringByOffset(request_modules_offset[index]) << ",\n";
436     }
437     module_requests_stringstream << "\t}";
438     module_literal_array.push_back(module_requests_stringstream.str());
439     mda.EnumerateModuleRecord([&](panda_file::ModuleTag tag, uint32_t export_name_offset, uint32_t request_module_idx,
440                                   uint32_t import_name_offset, uint32_t local_name_offset) {
441         std::stringstream ss;
442         ss << "\tModuleTag: " << ModuleTagToString(tag);
443         if (tag == panda_file::ModuleTag::REGULAR_IMPORT ||
444             tag == panda_file::ModuleTag::NAMESPACE_IMPORT || tag == panda_file::ModuleTag::LOCAL_EXPORT) {
445             if (!IsValidOffset(local_name_offset)) {
446                 LOG(ERROR, DISASSEMBLER) << "Get invalid local name offset!" << std::endl;
447                 return;
448             }
449             ss << ", local_name: " << GetStringByOffset(local_name_offset);
450         }
451         if (tag == panda_file::ModuleTag::LOCAL_EXPORT || tag == panda_file::ModuleTag::INDIRECT_EXPORT) {
452             if (!IsValidOffset(export_name_offset)) {
453                 LOG(ERROR, DISASSEMBLER) << "Get invalid export name offset!" << std::endl;
454                 return;
455             }
456             ss << ", export_name: " << GetStringByOffset(export_name_offset);
457         }
458         if (tag == panda_file::ModuleTag::REGULAR_IMPORT || tag == panda_file::ModuleTag::INDIRECT_EXPORT) {
459             if (!IsValidOffset(import_name_offset)) {
460                 LOG(ERROR, DISASSEMBLER) << "Get invalid import name offset!" << std::endl;
461                 return;
462             }
463             ss << ", import_name: " << GetStringByOffset(import_name_offset);
464         }
465         if (tag != panda_file::ModuleTag::LOCAL_EXPORT) {
466             if (request_module_idx >= request_modules_offset.size() ||
467                 !IsValidOffset(request_modules_offset[request_module_idx])) {
468                 LOG(ERROR, DISASSEMBLER) << "Get invalid request module offset!" << std::endl;
469                 return;
470             }
471             ss << ", module_request: " << GetStringByOffset(request_modules_offset[request_module_idx]);
472         }
473         module_literal_array.push_back(ss.str());
474     });
475 
476     return module_literal_array;
477 }
478 
GetRecords()479 void Disassembler::GetRecords()
480 {
481     LOG(DEBUG, DISASSEMBLER) << "\n[getting records]\n";
482 
483     const auto class_idx = file_->GetClasses();
484 
485     for (size_t i = 0; i < class_idx.size(); i++) {
486         uint32_t class_id = class_idx[i];
487         auto class_off = file_->GetHeader()->class_idx_off + sizeof(uint32_t) * i;
488 
489         if (class_id > file_->GetHeader()->file_size) {
490             LOG(ERROR, DISASSEMBLER) << "> error encountered in record at " << class_off << " (0x" << std::hex
491                                      << class_off << "). binary file corrupted. record offset (0x" << class_id
492                                      << ") out of bounds (0x" << file_->GetHeader()->file_size << ")!";
493             break;
494         }
495 
496         const panda_file::File::EntityId record_id {class_id};
497         auto language = GetRecordLanguage(record_id);
498 
499         pandasm::Record record("", language);
500         GetRecord(&record, record_id);
501 
502         if (prog_.record_table.find(record.name) == prog_.record_table.end()) {
503             record_name_to_id_.emplace(record.name, record_id);
504             prog_.record_table.emplace(record.name, std::move(record));
505         }
506     }
507 }
508 
GetFields(pandasm::Record * record,const panda_file::File::EntityId & record_id)509 void Disassembler::GetFields(pandasm::Record *record, const panda_file::File::EntityId &record_id)
510 {
511     panda_file::ClassDataAccessor class_accessor {*file_, record_id};
512 
513     class_accessor.EnumerateFields([&](panda_file::FieldDataAccessor &field_accessor) -> void {
514         pandasm::Field field(record->language);
515 
516         panda_file::File::EntityId field_name_id = field_accessor.GetNameId();
517         field.name = StringDataToString(file_->GetStringData(field_name_id));
518 
519         uint32_t field_type = field_accessor.GetType();
520         field.type = FieldTypeToPandasmType(field_type);
521 
522         GetMetaData(&field, field_accessor.GetFieldId(), record->name == ark::SCOPE_NAME_RECORD);
523 
524         record->field_list.push_back(std::move(field));
525     });
526 }
527 
GetMethods(const panda_file::File::EntityId & record_id)528 void Disassembler::GetMethods(const panda_file::File::EntityId &record_id)
529 {
530     panda_file::ClassDataAccessor class_accessor {*file_, record_id};
531 
532     class_accessor.EnumerateMethods([&](panda_file::MethodDataAccessor &method_accessor) -> void {
533         AddMethodToTables(method_accessor.GetMethodId());
534     });
535 }
536 
GetAnnotationElements(pandasm::Function & method,const panda_file::AnnotationDataAccessor & ada,const std::string & annotation_name)537 void Disassembler::GetAnnotationElements(pandasm::Function &method, const panda_file::AnnotationDataAccessor &ada,
538                                          const std::string &annotation_name)
539 {
540     uint32_t elem_count = ada.GetCount();
541     for (uint32_t i = 0; i < elem_count; i++) {
542         panda_file::AnnotationDataAccessor::Elem adae = ada.GetElement(i);
543         const auto &elem_name =
544             std::string {reinterpret_cast<const char *>(file_->GetStringData(adae.GetNameId()).data)};
545         panda_file::AnnotationDataAccessor::Tag tag = ada.GetTag(i);
546         auto value_type = pandasm::Value::GetCharAsType(tag.GetItem());
547         switch (value_type) {
548             case pandasm::Value::Type::U1: {
549                 bool ann_elem_value = adae.GetScalarValue().Get<bool>();
550                 AddAnnotationElement<bool>(method, annotation_name, elem_name, ann_elem_value);
551                 break;
552             }
553             case pandasm::Value::Type::U32: {
554                 uint32_t ann_elem_value = adae.GetScalarValue().Get<uint32_t>();
555                 AddAnnotationElement<uint32_t>(method, annotation_name, elem_name, ann_elem_value);
556                 break;
557             }
558             case pandasm::Value::Type::F64: {
559                 double ann_elem_value = adae.GetScalarValue().Get<double>();
560                 AddAnnotationElement<double>(method, annotation_name, elem_name, ann_elem_value);
561                 break;
562             }
563             case pandasm::Value::Type::STRING: {
564                 uint32_t string_id = adae.GetScalarValue().Get<uint32_t>();
565                 std::string_view ann_elem_value {
566                     reinterpret_cast<const char *>(file_->GetStringData(panda_file::File::EntityId(string_id)).data)};
567                 AddAnnotationElement<std::string_view>(method, annotation_name, elem_name, ann_elem_value);
568                 break;
569             }
570             case pandasm::Value::Type::LITERALARRAY: {
571                 uint32_t literalArray_offset = adae.GetScalarValue().Get<uint32_t>();
572                 AddAnnotationElement<panda::pandasm::LiteralArray, std::string_view>(
573                     method, annotation_name, elem_name, std::string_view {std::to_string(literalArray_offset)});
574                 break;
575             }
576             default:
577                 UNREACHABLE();
578         }
579     }
580 }
581 
GetMethodAnnotations(pandasm::Function & method,const panda_file::File::EntityId & method_id)582 void Disassembler::GetMethodAnnotations(pandasm::Function &method, const panda_file::File::EntityId &method_id)
583 {
584     panda_file::MethodDataAccessor mda(*file_, method_id);
585     mda.EnumerateAnnotations([&](panda_file::File::EntityId annotation_id) {
586         panda_file::AnnotationDataAccessor ada(*file_, annotation_id);
587         auto annotation_name =
588             std::string {reinterpret_cast<const char *>(file_->GetStringData(ada.GetClassId()).data)};
589         annotation_name.pop_back(); // remove ; from annotation name
590 
591         if (annotation_name.empty()) {
592             return;
593         }
594 
595         std::vector<pandasm::AnnotationData> method_annotation = method.metadata->GetAnnotations();
596         std::vector<pandasm::AnnotationElement> elements;
597         pandasm::AnnotationData ann_data(annotation_name, elements);
598         std::vector<pandasm::AnnotationData> annotations;
599         annotations.push_back(std::move(ann_data));
600         method.metadata->AddAnnotations(annotations);
601 
602         GetAnnotationElements(method, ada, annotation_name);
603     });
604 }
605 
606 template <typename T, typename U = T>
AddAnnotationElement(pandasm::Function & method,const std::string & annotation_name,const std::string & key,const U & value)607 void Disassembler::AddAnnotationElement(pandasm::Function &method, const std::string &annotation_name,
608                                         const std::string &key, const U &value)
609 {
610     if (key.empty()) {
611         return;
612     }
613 
614     std::unique_ptr<pandasm::Value> pandasmValue;
615     if constexpr (std::is_same<T, uint32_t>::value) {
616         pandasmValue = std::move(
617             std::make_unique<pandasm::ScalarValue>(pandasm::ScalarValue::Create<pandasm::Value::Type::U32>(value)));
618     } else if constexpr (std::is_same<T, double>::value) {
619         pandasmValue = std::move(
620             std::make_unique<pandasm::ScalarValue>(pandasm::ScalarValue::Create<pandasm::Value::Type::F64>(value)));
621     } else if constexpr (std::is_same<T, bool>::value) {
622         pandasmValue = std::move(
623             std::make_unique<pandasm::ScalarValue>(pandasm::ScalarValue::Create<pandasm::Value::Type::U1>(value)));
624     } else if constexpr (std::is_same<T, std::string_view>::value) {
625         pandasmValue = std::move(
626             std::make_unique<pandasm::ScalarValue>(pandasm::ScalarValue::Create<pandasm::Value::Type::STRING>(value)));
627     } else if constexpr (std::is_same<T, panda::pandasm::LiteralArray>::value) {
628         static_assert(std::is_same<U, std::string_view>::value);
629         pandasmValue = std::move(std::make_unique<pandasm::ScalarValue>(
630             pandasm::ScalarValue::Create<pandasm::Value::Type::LITERALARRAY>(value)));
631     } else {
632         UNREACHABLE();
633     }
634 
635     std::vector<pandasm::AnnotationData> method_annotation = method.metadata->GetAnnotations();
636     const auto ann_iter =
637         std::find_if(method_annotation.begin(), method_annotation.end(),
638                      [&](pandasm::AnnotationData &ann) -> bool { return ann.GetName() == annotation_name; });
639 
640     pandasm::AnnotationElement annotation_element(key, std::move(pandasmValue));
641     ann_iter->AddElement(std::move(annotation_element));
642     method.metadata->SetAnnotations(std::move(method_annotation));
643 }
644 
GetAnnotationByMethodName(const std::string & method_name) const645 std::optional<std::vector<std::string>> Disassembler::GetAnnotationByMethodName(const std::string &method_name) const
646 {
647     const auto method_synonyms_iter = prog_.function_synonyms.find(method_name);
648     bool is_signature = method_synonyms_iter != prog_.function_synonyms.end();
649     if (!is_signature) {
650         return std::nullopt;
651     }
652 
653     const auto method_iter = prog_.function_table.find(method_synonyms_iter->second.back());
654     bool is_method = method_iter != prog_.function_table.end();
655     const auto annotations = method_iter->second.metadata->GetAnnotations();
656     if (!is_method || annotations.empty()) {
657         return std::nullopt;
658     }
659 
660     std::vector<std::string> ann;
661     for (const auto &ann_data : annotations) {
662         ann.emplace_back(ann_data.GetName());
663     }
664     return ann;
665 }
666 
GetSerializedMethodAnnotation(const std::string & method_name,const std::string & anno_name) const667 std::optional<std::string> Disassembler::GetSerializedMethodAnnotation(const std::string &method_name,
668                                                                        const std::string &anno_name) const
669 {
670     const auto method_synonyms_iter = prog_.function_synonyms.find(method_name);
671     if (method_synonyms_iter == prog_.function_synonyms.end()) {
672         return std::nullopt;
673     }
674 
675     const auto method_iter = prog_.function_table.find(method_synonyms_iter->second.back());
676     if (method_iter == prog_.function_table.end()) {
677         return std::nullopt;
678     }
679 
680     const auto annotations = method_iter->second.metadata->GetAnnotations();
681     if (annotations.empty()) {
682         return std::nullopt;
683     }
684 
685     const auto annotation_iter =
686         std::find_if(annotations.begin(), annotations.end(),
687                      [&](const pandasm::AnnotationData &ann) -> bool { return ann.GetName() == anno_name; });
688     if (annotation_iter == annotations.end()) {
689         return std::nullopt;
690     }
691 
692     std::ostringstream os;
693     SerializeMethodAnnotation(*annotation_iter, os);
694     return os.str();
695 }
696 
GetSerializedRecord(const std::string & record_name) const697 std::optional<std::string> Disassembler::GetSerializedRecord(const std::string &record_name) const
698 {
699     const auto record_iter = prog_.record_table.find(record_name);
700     if (record_iter == prog_.record_table.end()) {
701         return std::nullopt;
702     }
703     std::ostringstream os;
704     Serialize(record_iter->second, os, false);
705     return os.str();
706 }
707 
GetStrings() const708 std::vector<std::string> Disassembler::GetStrings() const
709 {
710     std::vector<std::string> strings;
711     for (auto &str_info : string_offset_to_name_) {
712         strings.emplace_back(str_info.second);
713     }
714 
715     return strings;
716 }
717 
GetModuleLiterals() const718 std::vector<std::string> Disassembler::GetModuleLiterals() const
719 {
720     std::vector<std::string> module_literals;
721     for (auto &module_array : modulearray_table_) {
722         for (auto &module : module_array.second) {
723             module_literals.emplace_back(module);
724         }
725     }
726 
727     return module_literals;
728 }
729 
GetParams(pandasm::Function * method,const panda_file::File::EntityId & code_id) const730 void Disassembler::GetParams(pandasm::Function *method, const panda_file::File::EntityId &code_id) const
731 {
732     /**
733      * frame size - 2^16 - 1
734      */
735     static const uint32_t MAX_ARG_NUM = 0xFFFF;
736 
737     LOG(DEBUG, DISASSEMBLER) << "[getting params number]\ncode id: " << code_id << " (0x" << std::hex << code_id << ")";
738 
739     if (method == nullptr) {
740         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
741 
742         return;
743     }
744 
745     panda_file::CodeDataAccessor code_accessor(*file_, code_id);
746 
747     auto params_num = code_accessor.GetNumArgs();
748     if (params_num > MAX_ARG_NUM) {
749         LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
750                                  << "). number of function's arguments (" << std::dec << params_num
751                                  << ") exceeds MAX_ARG_NUM (" << MAX_ARG_NUM << ") !";
752 
753         return;
754     }
755 
756     method->return_type = pandasm::Type("any", 0);
757 
758     for (uint8_t i = 0; i < params_num; i++) {
759         method->params.push_back(pandasm::Function::Parameter(pandasm::Type("any", 0), method->language));
760     }
761 }
762 
GetExceptions(pandasm::Function * method,panda_file::File::EntityId method_id,panda_file::File::EntityId code_id) const763 LabelTable Disassembler::GetExceptions(pandasm::Function *method, panda_file::File::EntityId method_id,
764                                        panda_file::File::EntityId code_id) const
765 {
766     LOG(DEBUG, DISASSEMBLER) << "[getting exceptions]\ncode id: " << code_id << " (0x" << std::hex << code_id << ")";
767 
768     if (method == nullptr) {
769         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!\n";
770         return LabelTable {};
771     }
772 
773     panda_file::CodeDataAccessor code_accessor(*file_, code_id);
774 
775     const auto bc_ins = BytecodeInstruction(code_accessor.GetInstructions());
776     const auto bc_ins_last = bc_ins.JumpTo(code_accessor.GetCodeSize());
777 
778     size_t try_idx = 0;
779     LabelTable label_table {};
780     code_accessor.EnumerateTryBlocks([&](panda_file::CodeDataAccessor::TryBlock &try_block) {
781         pandasm::Function::CatchBlock catch_block_pa {};
782         if (!LocateTryBlock(bc_ins, bc_ins_last, try_block, &catch_block_pa, &label_table, try_idx)) {
783             return false;
784         }
785         size_t catch_idx = 0;
786         try_block.EnumerateCatchBlocks([&](panda_file::CodeDataAccessor::CatchBlock &catch_block) {
787             auto class_idx = catch_block.GetTypeIdx();
788             if (class_idx == panda_file::INVALID_INDEX) {
789                 catch_block_pa.exception_record = "";
790             } else {
791                 const auto class_id = file_->ResolveClassIndex(method_id, class_idx);
792                 catch_block_pa.exception_record = GetFullRecordName(class_id);
793             }
794             if (!LocateCatchBlock(bc_ins, bc_ins_last, catch_block, &catch_block_pa, &label_table, try_idx,
795                                   catch_idx)) {
796                 return false;
797             }
798 
799             method->catch_blocks.push_back(catch_block_pa);
800             catch_block_pa.catch_begin_label = "";
801             catch_block_pa.catch_end_label = "";
802             catch_idx++;
803 
804             return true;
805         });
806         try_idx++;
807 
808         return true;
809     });
810 
811     return label_table;
812 }
813 
getBytecodeInstructionNumber(BytecodeInstruction bc_ins_first,BytecodeInstruction bc_ins_cur)814 static size_t getBytecodeInstructionNumber(BytecodeInstruction bc_ins_first, BytecodeInstruction bc_ins_cur)
815 {
816     size_t count = 0;
817 
818     while (bc_ins_first.GetAddress() != bc_ins_cur.GetAddress()) {
819         count++;
820         bc_ins_first = bc_ins_first.GetNext();
821         if (bc_ins_first.GetAddress() > bc_ins_cur.GetAddress()) {
822             return std::numeric_limits<size_t>::max();
823         }
824     }
825 
826     return count;
827 }
828 
LocateTryBlock(const BytecodeInstruction & bc_ins,const BytecodeInstruction & bc_ins_last,const panda_file::CodeDataAccessor::TryBlock & try_block,pandasm::Function::CatchBlock * catch_block_pa,LabelTable * label_table,size_t try_idx) const829 bool Disassembler::LocateTryBlock(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last,
830                                   const panda_file::CodeDataAccessor::TryBlock &try_block,
831                                   pandasm::Function::CatchBlock *catch_block_pa, LabelTable *label_table,
832                                   size_t try_idx) const
833 {
834     const auto try_begin_bc_ins = bc_ins.JumpTo(try_block.GetStartPc());
835     const auto try_end_bc_ins = bc_ins.JumpTo(try_block.GetStartPc() + try_block.GetLength());
836 
837     const size_t try_begin_idx = getBytecodeInstructionNumber(bc_ins, try_begin_bc_ins);
838     const size_t try_end_idx = getBytecodeInstructionNumber(bc_ins, try_end_bc_ins);
839 
840     const bool try_begin_offset_in_range = bc_ins_last.GetAddress() > try_begin_bc_ins.GetAddress();
841     const bool try_end_offset_in_range = bc_ins_last.GetAddress() >= try_end_bc_ins.GetAddress();
842     const bool try_begin_offset_valid = try_begin_idx != std::numeric_limits<size_t>::max();
843     const bool try_end_offset_valid = try_end_idx != std::numeric_limits<size_t>::max();
844 
845     if (!try_begin_offset_in_range || !try_begin_offset_valid) {
846         LOG(ERROR, DISASSEMBLER) << "> invalid try block begin offset! address is: 0x" << std::hex
847                                  << try_begin_bc_ins.GetAddress();
848         return false;
849     } else {
850         std::stringstream ss {};
851         ss << "try_begin_label_" << try_idx;
852 
853         LabelTable::iterator it = label_table->find(try_begin_idx);
854         if (it == label_table->end()) {
855             catch_block_pa->try_begin_label = ss.str();
856             label_table->insert(std::pair<size_t, std::string>(try_begin_idx, ss.str()));
857         } else {
858             catch_block_pa->try_begin_label = it->second;
859         }
860     }
861 
862     if (!try_end_offset_in_range || !try_end_offset_valid) {
863         LOG(ERROR, DISASSEMBLER) << "> invalid try block end offset! address is: 0x" << std::hex
864                                  << try_end_bc_ins.GetAddress();
865         return false;
866     } else {
867         std::stringstream ss {};
868         ss << "try_end_label_" << try_idx;
869 
870         LabelTable::iterator it = label_table->find(try_end_idx);
871         if (it == label_table->end()) {
872             catch_block_pa->try_end_label = ss.str();
873             label_table->insert(std::pair<size_t, std::string>(try_end_idx, ss.str()));
874         } else {
875             catch_block_pa->try_end_label = it->second;
876         }
877     }
878 
879     return true;
880 }
881 
LocateCatchBlock(const BytecodeInstruction & bc_ins,const BytecodeInstruction & bc_ins_last,const panda_file::CodeDataAccessor::CatchBlock & catch_block,pandasm::Function::CatchBlock * catch_block_pa,LabelTable * label_table,size_t try_idx,size_t catch_idx) const882 bool Disassembler::LocateCatchBlock(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last,
883                                     const panda_file::CodeDataAccessor::CatchBlock &catch_block,
884                                     pandasm::Function::CatchBlock *catch_block_pa, LabelTable *label_table,
885                                     size_t try_idx, size_t catch_idx) const
886 {
887     const auto handler_begin_offset = catch_block.GetHandlerPc();
888     const auto handler_end_offset = handler_begin_offset + catch_block.GetCodeSize();
889 
890     const auto handler_begin_bc_ins = bc_ins.JumpTo(handler_begin_offset);
891     const auto handler_end_bc_ins = bc_ins.JumpTo(handler_end_offset);
892 
893     const size_t handler_begin_idx = getBytecodeInstructionNumber(bc_ins, handler_begin_bc_ins);
894     const size_t handler_end_idx = getBytecodeInstructionNumber(bc_ins, handler_end_bc_ins);
895 
896     const bool handler_begin_offset_in_range = bc_ins_last.GetAddress() > handler_begin_bc_ins.GetAddress();
897     const bool handler_end_offset_in_range = bc_ins_last.GetAddress() >= handler_end_bc_ins.GetAddress();
898     const bool handler_end_present = catch_block.GetCodeSize() != 0;
899     const bool handler_begin_offset_valid = handler_begin_idx != std::numeric_limits<size_t>::max();
900     const bool handler_end_offset_valid = handler_end_idx != std::numeric_limits<size_t>::max();
901 
902     if (!handler_begin_offset_in_range || !handler_begin_offset_valid) {
903         LOG(ERROR, DISASSEMBLER) << "> invalid catch block begin offset! address is: 0x" << std::hex
904                                  << handler_begin_bc_ins.GetAddress();
905         return false;
906     } else {
907         std::stringstream ss {};
908         ss << "handler_begin_label_" << try_idx << "_" << catch_idx;
909 
910         LabelTable::iterator it = label_table->find(handler_begin_idx);
911         if (it == label_table->end()) {
912             catch_block_pa->catch_begin_label = ss.str();
913             label_table->insert(std::pair<size_t, std::string>(handler_begin_idx, ss.str()));
914         } else {
915             catch_block_pa->catch_begin_label = it->second;
916         }
917     }
918 
919     if (!handler_end_offset_in_range || !handler_end_offset_valid) {
920         LOG(ERROR, DISASSEMBLER) << "> invalid catch block end offset! address is: 0x" << std::hex
921                                  << handler_end_bc_ins.GetAddress();
922         return false;
923     } else if (handler_end_present) {
924         std::stringstream ss {};
925         ss << "handler_end_label_" << try_idx << "_" << catch_idx;
926 
927         LabelTable::iterator it = label_table->find(handler_end_idx);
928         if (it == label_table->end()) {
929             catch_block_pa->catch_end_label = ss.str();
930             label_table->insert(std::pair<size_t, std::string>(handler_end_idx, ss.str()));
931         } else {
932             catch_block_pa->catch_end_label = it->second;
933         }
934     }
935 
936     return true;
937 }
938 
GetMetaData(pandasm::Function * method,const panda_file::File::EntityId & method_id) const939 void Disassembler::GetMetaData(pandasm::Function *method, const panda_file::File::EntityId &method_id) const
940 {
941     LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nmethod id: " << method_id << " (0x" << std::hex << method_id
942                              << ")";
943 
944     if (method == nullptr) {
945         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
946 
947         return;
948     }
949 
950     panda_file::MethodDataAccessor method_accessor(*file_, method_id);
951 
952     const auto method_name_raw = StringDataToString(file_->GetStringData(method_accessor.GetNameId()));
953 
954     if (!method_accessor.IsStatic()) {
955         const auto class_name = StringDataToString(file_->GetStringData(method_accessor.GetClassId()));
956         auto this_type = pandasm::Type::FromDescriptor(class_name);
957 
958         LOG(DEBUG, DISASSEMBLER) << "method (raw: \'" << method_name_raw
959                                  << "\') is not static. emplacing self-argument of type " << this_type.GetName();
960 
961         method->params.insert(method->params.begin(), pandasm::Function::Parameter(this_type, method->language));
962     } else {
963         method->metadata->SetAttribute("static");
964     }
965 
966     if (file_->IsExternal(method_accessor.GetMethodId())) {
967         method->metadata->SetAttribute("external");
968     }
969 
970     std::string ctor_name = panda::panda_file::GetCtorName(method->language);
971     std::string cctor_name = panda::panda_file::GetCctorName(method->language);
972 
973     const bool is_ctor = (method_name_raw == ctor_name);
974     const bool is_cctor = (method_name_raw == cctor_name);
975 
976     if (is_ctor) {
977         method->metadata->SetAttribute("ctor");
978         method->name.replace(method->name.find(ctor_name), ctor_name.length(), "_ctor_");
979     } else if (is_cctor) {
980         method->metadata->SetAttribute("cctor");
981         method->name.replace(method->name.find(cctor_name), cctor_name.length(), "_cctor_");
982     }
983 }
984 
GetMetaData(pandasm::Record * record,const panda_file::File::EntityId & record_id) const985 void Disassembler::GetMetaData(pandasm::Record *record, const panda_file::File::EntityId &record_id) const
986 {
987     LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nrecord id: " << record_id << " (0x" << std::hex << record_id
988                              << ")";
989 
990     if (record == nullptr) {
991         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but record ptr expected!";
992 
993         return;
994     }
995 
996     if (file_->IsExternal(record_id)) {
997         record->metadata->SetAttribute("external");
998     }
999 }
1000 
GetMetadataFieldValue(panda_file::FieldDataAccessor & field_accessor,pandasm::Field * field,bool isScopeNamesRecord)1001 void Disassembler::GetMetadataFieldValue(panda_file::FieldDataAccessor &field_accessor, pandasm::Field *field,
1002                                          bool isScopeNamesRecord)
1003 {
1004     if (field->type.GetId() == panda_file::Type::TypeId::U32) {
1005         const auto offset = field_accessor.GetValue<uint32_t>().value();
1006         bool isScopeNameField = isScopeNamesRecord || field->name == ark::SCOPE_NAMES;
1007         if (field->name == ark::MODULE_REQUEST_PAHSE_IDX) {
1008             module_request_phase_literals_.insert(offset);
1009         } else if (field->name != ark::TYPE_SUMMARY_FIELD_NAME && !isScopeNameField) {
1010             LOG(DEBUG, DISASSEMBLER) << "Module literalarray " << field->name << " at offset 0x" << std::hex << offset
1011                                      << " is excluded";
1012             module_literals_.insert(offset);
1013         }
1014         field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::U32>(offset));
1015     } else if (field->type.GetId() == panda_file::Type::TypeId::U8) {
1016         const uint8_t val = field_accessor.GetValue<uint8_t>().value();
1017         field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::U8>(val));
1018     } else if (field->type.GetId() == panda_file::Type::TypeId::F64) {
1019         std::optional<double> val = field_accessor.GetValue<double>();
1020         if (val.has_value()) {
1021             field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::F64>(val.value()));
1022         }
1023     } else if (field->type.GetId() == panda_file::Type::TypeId::U1) {
1024         std::optional<bool> val = field_accessor.GetValue<bool>();
1025         if (val.has_value()) {
1026             field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::U1>(val.value()));
1027         }
1028     } else if (field->type.GetId() == panda_file::Type::TypeId::REFERENCE && field->type.GetName() == "panda.String") {
1029         std::optional<uint32_t> string_offset_val = field_accessor.GetValue<uint32_t>();
1030         if (string_offset_val.has_value()) {
1031             std::string_view val {reinterpret_cast<const char *>(
1032                 file_->GetStringData(panda_file::File::EntityId(string_offset_val.value())).data)};
1033             field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::STRING>(val));
1034         }
1035     } else if (field->type.GetRank() > 0) {
1036         std::optional<uint32_t> litarray_offset_val = field_accessor.GetValue<uint32_t>();
1037         if (litarray_offset_val.has_value()) {
1038             field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::LITERALARRAY>(
1039                 std::string_view {std::to_string(litarray_offset_val.value())}));
1040         }
1041     } else {
1042         UNREACHABLE();
1043     }
1044 }
1045 
GetMetaData(pandasm::Field * field,const panda_file::File::EntityId & field_id,bool is_scope_names_record)1046 void Disassembler::GetMetaData(pandasm::Field *field, const panda_file::File::EntityId &field_id,
1047                                bool is_scope_names_record)
1048 {
1049     LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nfield id: " << field_id << " (0x" << std::hex << field_id << ")";
1050 
1051     if (field == nullptr) {
1052         LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
1053 
1054         return;
1055     }
1056 
1057     panda_file::FieldDataAccessor field_accessor(*file_, field_id);
1058 
1059     if (field_accessor.IsExternal()) {
1060         field->metadata->SetAttribute("external");
1061     }
1062 
1063     if (field_accessor.IsStatic()) {
1064         field->metadata->SetAttribute("static");
1065     }
1066 
1067     GetMetadataFieldValue(field_accessor, field, is_scope_names_record);
1068 }
1069 
AnnotationTagToString(const char tag) const1070 std::string Disassembler::AnnotationTagToString(const char tag) const
1071 {
1072     switch (tag) {
1073         case '1':
1074             return "u1";
1075         case '2':
1076             return "i8";
1077         case '3':
1078             return "u8";
1079         case '4':
1080             return "i16";
1081         case '5':
1082             return "u16";
1083         case '6':
1084             return "i32";
1085         case '7':
1086             return "u32";
1087         case '8':
1088             return "i64";
1089         case '9':
1090             return "u64";
1091         case 'A':
1092             return "f32";
1093         case 'B':
1094             return "f64";
1095         case 'C':
1096             return "string";
1097         case 'D':
1098             return "record";
1099         case 'E':
1100             return "method";
1101         case 'F':
1102             return "enum";
1103         case 'G':
1104             return "annotation";
1105         case 'I':
1106             return "void";
1107         case 'J':
1108             return "method_handle";
1109         case 'K':
1110             return "u1[]";
1111         case 'L':
1112             return "i8[]";
1113         case 'M':
1114             return "u8[]";
1115         case 'N':
1116             return "i16[]";
1117         case 'O':
1118             return "u16[]";
1119         case 'P':
1120             return "i32[]";
1121         case 'Q':
1122             return "u32[]";
1123         case 'R':
1124             return "i64[]";
1125         case 'S':
1126             return "u64[]";
1127         case 'T':
1128             return "f32[]";
1129         case 'U':
1130             return "f64[]";
1131         case 'V':
1132             return "string[]";
1133         case 'W':
1134             return "record[]";
1135         case 'X':
1136             return "method[]";
1137         case 'Y':
1138             return "enum[]";
1139         case 'Z':
1140             return "annotation[]";
1141         case '@':
1142             return "method_handle[]";
1143         case '*':
1144             return "nullptr string";
1145         default:
1146             return std::string();
1147     }
1148 }
1149 
ScalarValueToString(const panda_file::ScalarValue & value,const std::string & type)1150 std::string Disassembler::ScalarValueToString(const panda_file::ScalarValue &value, const std::string &type)
1151 {
1152     std::stringstream ss;
1153 
1154     if (type == "i8") {
1155         int8_t res = value.Get<int8_t>();
1156         ss << static_cast<int>(res);
1157     } else if (type == "u1" || type == "u8") {
1158         uint8_t res = value.Get<uint8_t>();
1159         ss << static_cast<unsigned int>(res);
1160     } else if (type == "i16") {
1161         ss << value.Get<int16_t>();
1162     } else if (type == "u16") {
1163         ss << value.Get<uint16_t>();
1164     } else if (type == "i32") {
1165         ss << value.Get<int32_t>();
1166     } else if (type == "u32") {
1167         ss << value.Get<uint32_t>();
1168     } else if (type == "i64") {
1169         ss << value.Get<int64_t>();
1170     } else if (type == "u64") {
1171         ss << value.Get<uint64_t>();
1172     } else if (type == "f32") {
1173         ss << value.Get<float>();
1174     } else if (type == "f64") {
1175         ss << value.Get<double>();
1176     } else if (type == "string") {
1177         const auto id = value.Get<panda_file::File::EntityId>();
1178         ss << "\"" << StringDataToString(file_->GetStringData(id)) << "\"";
1179     } else if (type == "record") {
1180         const auto id = value.Get<panda_file::File::EntityId>();
1181         ss << GetFullRecordName(id);
1182     } else if (type == "method") {
1183         const auto id = value.Get<panda_file::File::EntityId>();
1184         AddMethodToTables(id);
1185         ss << GetMethodSignature(id);
1186     } else if (type == "enum") {
1187         const auto id = value.Get<panda_file::File::EntityId>();
1188         panda_file::FieldDataAccessor field_accessor(*file_, id);
1189         ss << GetFullRecordName(field_accessor.GetClassId()) << "."
1190            << StringDataToString(file_->GetStringData(field_accessor.GetNameId()));
1191     } else if (type == "annotation") {
1192         const auto id = value.Get<panda_file::File::EntityId>();
1193         ss << "id_" << id;
1194     } else if (type == "void") {
1195         return std::string();
1196     } else if (type == "method_handle") {
1197     }
1198 
1199     return ss.str();
1200 }
1201 
ArrayValueToString(const panda_file::ArrayValue & value,const std::string & type,const size_t idx)1202 std::string Disassembler::ArrayValueToString(const panda_file::ArrayValue &value, const std::string &type,
1203                                              const size_t idx)
1204 {
1205     std::stringstream ss;
1206 
1207     if (type == "i8") {
1208         int8_t res = value.Get<int8_t>(idx);
1209         ss << static_cast<int>(res);
1210     } else if (type == "u1" || type == "u8") {
1211         uint8_t res = value.Get<uint8_t>(idx);
1212         ss << static_cast<unsigned int>(res);
1213     } else if (type == "i16") {
1214         ss << value.Get<int16_t>(idx);
1215     } else if (type == "u16") {
1216         ss << value.Get<uint16_t>(idx);
1217     } else if (type == "i32") {
1218         ss << value.Get<int32_t>(idx);
1219     } else if (type == "u32") {
1220         ss << value.Get<uint32_t>(idx);
1221     } else if (type == "i64") {
1222         ss << value.Get<int64_t>(idx);
1223     } else if (type == "u64") {
1224         ss << value.Get<uint64_t>(idx);
1225     } else if (type == "f32") {
1226         ss << value.Get<float>(idx);
1227     } else if (type == "f64") {
1228         ss << value.Get<double>(idx);
1229     } else if (type == "string") {
1230         const auto id = value.Get<panda_file::File::EntityId>(idx);
1231         ss << '\"' << StringDataToString(file_->GetStringData(id)) << '\"';
1232     } else if (type == "record") {
1233         const auto id = value.Get<panda_file::File::EntityId>(idx);
1234         ss << GetFullRecordName(id);
1235     } else if (type == "method") {
1236         const auto id = value.Get<panda_file::File::EntityId>(idx);
1237         AddMethodToTables(id);
1238         ss << GetMethodSignature(id);
1239     } else if (type == "enum") {
1240         const auto id = value.Get<panda_file::File::EntityId>(idx);
1241         panda_file::FieldDataAccessor field_accessor(*file_, id);
1242         ss << GetFullRecordName(field_accessor.GetClassId()) << "."
1243            << StringDataToString(file_->GetStringData(field_accessor.GetNameId()));
1244     } else if (type == "annotation") {
1245         const auto id = value.Get<panda_file::File::EntityId>(idx);
1246         ss << "id_" << id;
1247     } else if (type == "method_handle") {
1248     } else if (type == "nullptr string") {
1249     }
1250 
1251     return ss.str();
1252 }
1253 
GetFullMethodName(const panda_file::File::EntityId & method_id) const1254 std::string Disassembler::GetFullMethodName(const panda_file::File::EntityId &method_id) const
1255 {
1256     panda::panda_file::MethodDataAccessor method_accessor(*file_, method_id);
1257 
1258     const auto method_name_raw = StringDataToString(file_->GetStringData(method_accessor.GetNameId()));
1259 
1260     std::string class_name = GetFullRecordName(method_accessor.GetClassId());
1261     if (IsSystemType(class_name)) {
1262         class_name = "";
1263     } else {
1264         class_name += ".";
1265     }
1266 
1267     return class_name + method_name_raw;
1268 }
1269 
GetMethodSignature(const panda_file::File::EntityId & method_id) const1270 std::string Disassembler::GetMethodSignature(const panda_file::File::EntityId &method_id) const
1271 {
1272     panda::panda_file::MethodDataAccessor method_accessor(*file_, method_id);
1273 
1274     pandasm::Function method(GetFullMethodName(method_id), GetMethodLanguage(method_id));
1275     if (method_accessor.GetCodeId().has_value()) {
1276         GetParams(&method, method_accessor.GetCodeId().value());
1277     }
1278     GetMetaData(&method, method_id);
1279 
1280     return pandasm::GetFunctionSignatureFromName(method.name, method.params);
1281 }
1282 
GetFullRecordName(const panda_file::File::EntityId & class_id) const1283 std::string Disassembler::GetFullRecordName(const panda_file::File::EntityId &class_id) const
1284 {
1285     std::string name = StringDataToString(file_->GetStringData(class_id));
1286 
1287     auto type = pandasm::Type::FromDescriptor(name);
1288     type = pandasm::Type(type.GetComponentName(), type.GetRank());
1289 
1290     return type.GetPandasmName();
1291 }
1292 
GetRecordInfo(const panda_file::File::EntityId & record_id,RecordInfo * record_info) const1293 void Disassembler::GetRecordInfo(const panda_file::File::EntityId &record_id, RecordInfo *record_info) const
1294 {
1295     constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
1296 
1297     if (file_->IsExternal(record_id)) {
1298         return;
1299     }
1300 
1301     panda_file::ClassDataAccessor class_accessor {*file_, record_id};
1302     std::stringstream ss;
1303 
1304     ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1305        << class_accessor.GetClassId() << ", size: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH)
1306        << class_accessor.GetSize() << " (" << std::dec << class_accessor.GetSize() << ")";
1307 
1308     record_info->record_info = ss.str();
1309     ss.str(std::string());
1310 
1311     class_accessor.EnumerateFields([&](panda_file::FieldDataAccessor &field_accessor) -> void {
1312         ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1313            << field_accessor.GetFieldId();
1314 
1315         record_info->fields_info.push_back(ss.str());
1316 
1317         ss.str(std::string());
1318     });
1319 }
1320 
GetMethodInfo(const panda_file::File::EntityId & method_id,MethodInfo * method_info) const1321 void Disassembler::GetMethodInfo(const panda_file::File::EntityId &method_id, MethodInfo *method_info) const
1322 {
1323     constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
1324 
1325     panda_file::MethodDataAccessor method_accessor {*file_, method_id};
1326     std::stringstream ss;
1327 
1328     ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1329        << method_accessor.GetMethodId();
1330 
1331     if (method_accessor.GetCodeId().has_value()) {
1332         ss << ", code offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1333            << method_accessor.GetCodeId().value();
1334 
1335         GetInsInfo(method_accessor.GetCodeId().value(), method_info);
1336     } else {
1337         ss << ", <no code>";
1338     }
1339 
1340     method_info->method_info = ss.str();
1341 
1342     if (method_accessor.GetCodeId()) {
1343         ASSERT(debug_info_extractor_ != nullptr);
1344         method_info->line_number_table = debug_info_extractor_->GetLineNumberTable(method_id);
1345         method_info->column_number_table = debug_info_extractor_->GetColumnNumberTable(method_id);
1346         method_info->local_variable_table = debug_info_extractor_->GetLocalVariableTable(method_id);
1347 
1348         // Add information about parameters into the table
1349         panda_file::CodeDataAccessor codeda(*file_, method_accessor.GetCodeId().value());
1350         auto arg_idx = static_cast<int32_t>(codeda.GetNumVregs());
1351         uint32_t code_size = codeda.GetCodeSize();
1352         for (auto info : debug_info_extractor_->GetParameterInfo(method_id)) {
1353             panda_file::LocalVariableInfo arg_info {info.name, info.signature, "", arg_idx++, 0, code_size};
1354             method_info->local_variable_table.emplace_back(arg_info);
1355         }
1356     }
1357 }
1358 
IsArray(const panda_file::LiteralTag & tag)1359 static bool IsArray(const panda_file::LiteralTag &tag)
1360 {
1361     switch (tag) {
1362         case panda_file::LiteralTag::ARRAY_U1:
1363         case panda_file::LiteralTag::ARRAY_U8:
1364         case panda_file::LiteralTag::ARRAY_I8:
1365         case panda_file::LiteralTag::ARRAY_U16:
1366         case panda_file::LiteralTag::ARRAY_I16:
1367         case panda_file::LiteralTag::ARRAY_U32:
1368         case panda_file::LiteralTag::ARRAY_I32:
1369         case panda_file::LiteralTag::ARRAY_U64:
1370         case panda_file::LiteralTag::ARRAY_I64:
1371         case panda_file::LiteralTag::ARRAY_F32:
1372         case panda_file::LiteralTag::ARRAY_F64:
1373         case panda_file::LiteralTag::ARRAY_STRING:
1374             return true;
1375         default:
1376             return false;
1377     }
1378 }
1379 
SerializeLiteralArray(const pandasm::LiteralArray & lit_array) const1380 std::string Disassembler::SerializeLiteralArray(const pandasm::LiteralArray &lit_array) const
1381 {
1382     std::stringstream ret;
1383     if (lit_array.literals_.empty()) {
1384         return "";
1385     }
1386 
1387     std::stringstream ss;
1388     ss << "{ ";
1389     const auto &tag = lit_array.literals_[0].tag_;
1390     if (IsArray(tag)) {
1391         ss << LiteralTagToString(tag);
1392     }
1393     ss << lit_array.literals_.size();
1394     ss << " [ ";
1395     SerializeValues(lit_array, ss);
1396     ss << "]}";
1397     return ss.str();
1398 }
1399 
Serialize(const std::string & key,const pandasm::LiteralArray & lit_array,std::ostream & os) const1400 void Disassembler::Serialize(const std::string &key, const pandasm::LiteralArray &lit_array, std::ostream &os) const
1401 {
1402     os << key << " ";
1403     os << SerializeLiteralArray(lit_array);
1404     os << "\n";
1405 }
1406 
Serialize(const std::string & module_offset,const std::vector<std::string> & module_array,std::ostream & os) const1407 void Disassembler::Serialize(const std::string &module_offset, const std::vector<std::string> &module_array,
1408                              std::ostream &os) const
1409 {
1410     os << module_offset << " ";
1411     os << SerializeModuleLiteralArray(module_array);
1412     os << "\n";
1413 }
1414 
SerializeModuleLiteralArray(const std::vector<std::string> & module_array) const1415 std::string Disassembler::SerializeModuleLiteralArray(const std::vector<std::string> &module_array) const
1416 {
1417     if (module_array.empty()) {
1418         return "";
1419     }
1420 
1421     std::stringstream ss;
1422     ss << "{ ";
1423     ss << (module_array.size() - 1); // Only needs to show the count of module tag, exclude module request array
1424     ss << " [\n";
1425     for (size_t index = 0; index < module_array.size(); index++) {
1426         ss << module_array[index] << ";\n";
1427     }
1428     ss << "]}";
1429     return ss.str();
1430 }
1431 
LiteralTagToString(const panda_file::LiteralTag & tag) const1432 std::string Disassembler::LiteralTagToString(const panda_file::LiteralTag &tag) const
1433 {
1434     switch (tag) {
1435         case panda_file::LiteralTag::BOOL:
1436         case panda_file::LiteralTag::ARRAY_U1:
1437             return "u1";
1438         case panda_file::LiteralTag::ARRAY_U8:
1439             return "u8";
1440         case panda_file::LiteralTag::ARRAY_I8:
1441             return "i8";
1442         case panda_file::LiteralTag::ARRAY_U16:
1443             return "u16";
1444         case panda_file::LiteralTag::ARRAY_I16:
1445             return "i16";
1446         case panda_file::LiteralTag::ARRAY_U32:
1447             return "u32";
1448         case panda_file::LiteralTag::INTEGER:
1449         case panda_file::LiteralTag::ARRAY_I32:
1450             return "i32";
1451         case panda_file::LiteralTag::ARRAY_U64:
1452             return "u64";
1453         case panda_file::LiteralTag::ARRAY_I64:
1454             return "i64";
1455         case panda_file::LiteralTag::ARRAY_F32:
1456             return "f32";
1457         case panda_file::LiteralTag::DOUBLE:
1458         case panda_file::LiteralTag::ARRAY_F64:
1459             return "f64";
1460         case panda_file::LiteralTag::STRING:
1461         case panda_file::LiteralTag::ARRAY_STRING:
1462             return "string";
1463         case panda_file::LiteralTag::METHOD:
1464             return "method";
1465         case panda_file::LiteralTag::GETTER:
1466             return "getter";
1467         case panda_file::LiteralTag::SETTER:
1468             return "setter";
1469         case panda_file::LiteralTag::GENERATORMETHOD:
1470             return "generator_method";
1471         case panda_file::LiteralTag::ETS_IMPLEMENTS:
1472             return "ets_implements";
1473         case panda_file::LiteralTag::ACCESSOR:
1474             return "accessor";
1475         case panda_file::LiteralTag::METHODAFFILIATE:
1476             return "method_affiliate";
1477         case panda_file::LiteralTag::NULLVALUE:
1478             return "null_value";
1479         case panda_file::LiteralTag::TAGVALUE:
1480             return "tagvalue";
1481         case panda_file::LiteralTag::LITERALBUFFERINDEX:
1482             return "lit_index";
1483         case panda_file::LiteralTag::LITERALARRAY:
1484             return "lit_offset";
1485         case panda_file::LiteralTag::BUILTINTYPEINDEX:
1486             return "builtin_type";
1487         default:
1488             UNREACHABLE();
1489     }
1490 }
1491 
1492 template <typename T>
SerializeValues(const pandasm::LiteralArray & lit_array,T & os) const1493 void Disassembler::SerializeValues(const pandasm::LiteralArray &lit_array, T &os) const
1494 {
1495     switch (lit_array.literals_[0].tag_) {
1496         case panda_file::LiteralTag::ARRAY_U1: {
1497             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1498                 os << std::get<bool>(lit_array.literals_[i].value_) << " ";
1499             }
1500             break;
1501         }
1502         case panda_file::LiteralTag::ARRAY_U8: {
1503             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1504                 os << static_cast<uint16_t>(std::get<uint8_t>(lit_array.literals_[i].value_)) << " ";
1505             }
1506             break;
1507         }
1508         case panda_file::LiteralTag::ARRAY_I8: {
1509             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1510                 os << static_cast<int16_t>(bit_cast<int8_t>(std::get<uint8_t>(lit_array.literals_[i].value_))) << " ";
1511             }
1512             break;
1513         }
1514         case panda_file::LiteralTag::ARRAY_U16: {
1515             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1516                 os << std::get<uint16_t>(lit_array.literals_[i].value_) << " ";
1517             }
1518             break;
1519         }
1520         case panda_file::LiteralTag::ARRAY_I16: {
1521             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1522                 os << bit_cast<int16_t>(std::get<uint16_t>(lit_array.literals_[i].value_)) << " ";
1523             }
1524             break;
1525         }
1526         case panda_file::LiteralTag::ARRAY_U32: {
1527             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1528                 os << std::get<uint32_t>(lit_array.literals_[i].value_) << " ";
1529             }
1530             break;
1531         }
1532         case panda_file::LiteralTag::ARRAY_I32: {
1533             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1534                 os << bit_cast<int32_t>(std::get<uint32_t>(lit_array.literals_[i].value_)) << " ";
1535             }
1536             break;
1537         }
1538         case panda_file::LiteralTag::ARRAY_U64: {
1539             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1540                 os << std::get<uint64_t>(lit_array.literals_[i].value_) << " ";
1541             }
1542             break;
1543         }
1544         case panda_file::LiteralTag::ARRAY_I64: {
1545             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1546                 os << bit_cast<int64_t>(std::get<uint64_t>(lit_array.literals_[i].value_)) << " ";
1547             }
1548             break;
1549         }
1550         case panda_file::LiteralTag::ARRAY_F32: {
1551             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1552                 os << std::get<float>(lit_array.literals_[i].value_) << " ";
1553             }
1554             break;
1555         }
1556         case panda_file::LiteralTag::ARRAY_F64: {
1557             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1558                 os << std::get<double>(lit_array.literals_[i].value_) << " ";
1559             }
1560             break;
1561         }
1562         case panda_file::LiteralTag::ARRAY_STRING: {
1563             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1564                 os << "\"" << std::get<std::string>(lit_array.literals_[i].value_) << "\" ";
1565             }
1566             break;
1567         }
1568         default:
1569             SerializeLiterals(lit_array, os);
1570     }
1571 }
1572 
1573 template <typename T>
SerializeLiterals(const pandasm::LiteralArray & lit_array,T & os) const1574 void Disassembler::SerializeLiterals(const pandasm::LiteralArray &lit_array, T &os) const
1575 {
1576     for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1577         const auto &tag = lit_array.literals_[i].tag_;
1578         os << LiteralTagToString(tag) << ":";
1579         const auto &val = lit_array.literals_[i].value_;
1580         switch (lit_array.literals_[i].tag_) {
1581             case panda_file::LiteralTag::BOOL: {
1582                 os << std::get<bool>(val);
1583                 break;
1584             }
1585             case panda_file::LiteralTag::LITERALBUFFERINDEX:
1586             case panda_file::LiteralTag::INTEGER: {
1587                 os << bit_cast<int32_t>(std::get<uint32_t>(val));
1588                 break;
1589             }
1590             case panda_file::LiteralTag::DOUBLE: {
1591                 os << std::get<double>(val);
1592                 break;
1593             }
1594             case panda_file::LiteralTag::STRING:
1595             case panda_file::LiteralTag::ETS_IMPLEMENTS: {
1596                 os << "\"" << std::get<std::string>(val) << "\"";
1597                 break;
1598             }
1599             case panda_file::LiteralTag::METHOD:
1600             case panda_file::LiteralTag::GETTER:
1601             case panda_file::LiteralTag::SETTER:
1602             case panda_file::LiteralTag::GENERATORMETHOD: {
1603                 os << std::get<std::string>(val);
1604                 break;
1605             }
1606             case panda_file::LiteralTag::NULLVALUE:
1607             case panda_file::LiteralTag::ACCESSOR: {
1608                 os << static_cast<int16_t>(bit_cast<int8_t>(std::get<uint8_t>(val)));
1609                 break;
1610             }
1611             case panda_file::LiteralTag::METHODAFFILIATE: {
1612                 os << std::get<uint16_t>(val);
1613                 break;
1614             }
1615             case panda_file::LiteralTag::LITERALARRAY: {
1616                 os << std::get<std::string>(val);
1617                 break;
1618             }
1619             case panda_file::LiteralTag::BUILTINTYPEINDEX: {
1620                 os << static_cast<int16_t>(std::get<uint8_t>(val));
1621                 break;
1622             }
1623             default:
1624                 UNREACHABLE();
1625         }
1626         os << ", ";
1627     }
1628 }
1629 
Serialize(const pandasm::Record & record,std::ostream & os,bool print_information) const1630 void Disassembler::Serialize(const pandasm::Record &record, std::ostream &os, bool print_information) const
1631 {
1632     if (IsSystemType(record.name)) {
1633         return;
1634     }
1635     os << ".language " << panda::panda_file::LanguageToString(record.language) << std::endl;
1636     os << ".record " << record.name;
1637 
1638     const auto record_iter = prog_ann_.record_annotations.find(record.name);
1639     const bool record_in_table = record_iter != prog_ann_.record_annotations.end();
1640     if (record_in_table) {
1641         Serialize(*record.metadata, record_iter->second.ann_list, os);
1642     } else {
1643         Serialize(*record.metadata, {}, os);
1644     }
1645 
1646     if (record.metadata->IsForeign()) {
1647         os << "\n\n";
1648         return;
1649     }
1650 
1651     os << " {";
1652 
1653     if (print_information && prog_info_.records_info.find(record.name) != prog_info_.records_info.end()) {
1654         os << " # " << prog_info_.records_info.at(record.name).record_info << "\n";
1655         SerializeFields(record, os, true);
1656     } else {
1657         os << "\n";
1658         SerializeFields(record, os, false);
1659     }
1660 
1661     os << "}\n\n";
1662 }
1663 
DumpLiteralArray(const pandasm::LiteralArray & literal_array,std::stringstream & ss) const1664 void Disassembler::DumpLiteralArray(const pandasm::LiteralArray &literal_array, std::stringstream &ss) const
1665 {
1666     ss << "[";
1667     bool firstItem = true;
1668     for (const auto &item : literal_array.literals_) {
1669         if (!firstItem) {
1670             ss << ", ";
1671         } else {
1672             firstItem = false;
1673         }
1674 
1675         switch (item.tag_) {
1676             case panda_file::LiteralTag::DOUBLE: {
1677                 ss << std::get<double>(item.value_);
1678                 break;
1679             }
1680             case panda_file::LiteralTag::BOOL: {
1681                 ss << std::get<bool>(item.value_);
1682                 break;
1683             }
1684             case panda_file::LiteralTag::STRING: {
1685                 ss << "\"" << std::get<std::string>(item.value_) << "\"";
1686                 break;
1687             }
1688             case panda_file::LiteralTag::LITERALARRAY: {
1689                 std::string offset_str = std::get<std::string>(item.value_);
1690                 uint32_t lit_array_fffset = std::stoi(offset_str, nullptr, 16);
1691                 pandasm::LiteralArray lit_array;
1692                 GetLiteralArrayByOffset(&lit_array, panda_file::File::EntityId(lit_array_fffset));
1693                 DumpLiteralArray(lit_array, ss);
1694                 break;
1695             }
1696             case panda_file::LiteralTag::BUILTINTYPEINDEX: {
1697                 // By convention, BUILTINTYPEINDEX is used to store type of empty arrays,
1698                 // therefore it has no value
1699                 break;
1700             }
1701             default: {
1702                 UNREACHABLE();
1703                 break;
1704             }
1705         }
1706     }
1707     ss << "]";
1708 }
1709 
SerializeFieldValue(const pandasm::Field & f,std::stringstream & ss) const1710 void Disassembler::SerializeFieldValue(const pandasm::Field &f, std::stringstream &ss) const
1711 {
1712     if (f.type.GetId() == panda_file::Type::TypeId::U32) {
1713         ss << " = 0x" << std::hex << f.metadata->GetValue().value().GetValue<uint32_t>();
1714     } else if (f.type.GetId() == panda_file::Type::TypeId::U8) {
1715         ss << " = 0x" << std::hex << static_cast<uint32_t>(f.metadata->GetValue().value().GetValue<uint8_t>());
1716     } else if (f.type.GetId() == panda_file::Type::TypeId::F64) {
1717         ss << " = " << static_cast<double>(f.metadata->GetValue().value().GetValue<double>());
1718     } else if (f.type.GetId() == panda_file::Type::TypeId::U1) {
1719         ss << " = " << static_cast<bool>(f.metadata->GetValue().value().GetValue<bool>());
1720     } else if (f.type.GetId() == panda_file::Type::TypeId::REFERENCE && f.type.GetName() == "panda.String") {
1721         ss << " = \"" << static_cast<std::string>(f.metadata->GetValue().value().GetValue<std::string>()) << "\"";
1722     } else if (f.type.GetRank() > 0) {
1723         uint32_t lit_array_fffset =
1724             std::stoi(static_cast<std::string>(f.metadata->GetValue().value().GetValue<std::string>()));
1725         pandasm::LiteralArray lit_array;
1726         GetLiteralArrayByOffset(&lit_array, panda_file::File::EntityId(lit_array_fffset));
1727         ss << " = ";
1728         DumpLiteralArray(lit_array, ss);
1729     }
1730 }
1731 
SerializeFields(const pandasm::Record & record,std::ostream & os,bool print_information) const1732 void Disassembler::SerializeFields(const pandasm::Record &record, std::ostream &os, bool print_information) const
1733 {
1734     constexpr size_t INFO_OFFSET = 80;
1735 
1736     const auto record_iter = prog_ann_.record_annotations.find(record.name);
1737     const bool record_in_table = record_iter != prog_ann_.record_annotations.end();
1738 
1739     const auto rec_inf = (print_information) ? (prog_info_.records_info.at(record.name)) : (RecordInfo {});
1740 
1741     size_t field_idx = 0;
1742 
1743     std::stringstream ss;
1744     for (const auto &f : record.field_list) {
1745         std::string file = GetFileNameByPath(f.name);
1746         ss << "\t" << f.type.GetPandasmName() << " " << file;
1747         if (f.metadata->GetValue().has_value()) {
1748             SerializeFieldValue(f, ss);
1749         }
1750         if (record_in_table) {
1751             const auto field_iter = record_iter->second.field_annotations.find(f.name);
1752             if (field_iter != record_iter->second.field_annotations.end()) {
1753                 Serialize(*f.metadata, field_iter->second, ss);
1754             } else {
1755                 Serialize(*f.metadata, {}, ss);
1756             }
1757         } else {
1758             Serialize(*f.metadata, {}, ss);
1759         }
1760 
1761         if (print_information) {
1762             os << std::setw(INFO_OFFSET) << std::left << ss.str() << " # " << rec_inf.fields_info.at(field_idx) << "\n";
1763         } else {
1764             os << ss.str() << "\n";
1765         }
1766 
1767         ss.str(std::string());
1768         ss.clear();
1769 
1770         field_idx++;
1771     }
1772 }
1773 
getLiteralArrayTypeFromValue(const pandasm::LiteralArray & literal_array) const1774 std::string Disassembler::getLiteralArrayTypeFromValue(const pandasm::LiteralArray &literal_array) const
1775 {
1776     [[maybe_unused]] auto size = literal_array.literals_.size();
1777     ASSERT(size > 0);
1778     switch (literal_array.literals_[0].tag_) {
1779         case panda_file::LiteralTag::DOUBLE: {
1780             return "f64[]";
1781         }
1782         case panda_file::LiteralTag::BOOL: {
1783             return "u1[]";
1784         }
1785         case panda_file::LiteralTag::STRING: {
1786             return "panda.String[]";
1787         }
1788         case panda_file::LiteralTag::LITERALARRAY: {
1789             std::string offset_str = std::get<std::string>(literal_array.literals_[0].value_);
1790             uint32_t lit_array_fffset = std::stoi(offset_str, nullptr, 16);
1791             pandasm::LiteralArray lit_array;
1792             GetLiteralArrayByOffset(&lit_array, panda_file::File::EntityId(lit_array_fffset));
1793             return getLiteralArrayTypeFromValue(lit_array) + "[]";
1794         }
1795         case panda_file::LiteralTag::BUILTINTYPEINDEX: {
1796             uint8_t typeIndex = std::get<uint8_t>(literal_array.literals_[0].value_);
1797             static constexpr uint8_t EMPTY_LITERAL_ARRAY_WITH_NUMBER_TYPE = 0;
1798             static constexpr uint8_t EMPTY_LITERAL_ARRAY_WITH_BOOLEAN_TYPE = 1;
1799             static constexpr uint8_t EMPTY_LITERAL_ARRAY_WITH_STRING_TYPE = 2;
1800             switch (typeIndex) {
1801                 case EMPTY_LITERAL_ARRAY_WITH_NUMBER_TYPE:
1802                     return "f64[]";
1803                 case EMPTY_LITERAL_ARRAY_WITH_BOOLEAN_TYPE:
1804                     return "u1[]";
1805                 case EMPTY_LITERAL_ARRAY_WITH_STRING_TYPE:
1806                     return "panda.String[]";
1807                 default:
1808                     UNREACHABLE();
1809                     break;
1810             }
1811         }
1812         default: {
1813             UNREACHABLE();
1814             break;
1815         }
1816     }
1817 }
1818 
SerializeAnnotationElement(const std::vector<pandasm::AnnotationElement> & elements,std::stringstream & ss,uint32_t idx) const1819 void Disassembler::SerializeAnnotationElement(const std::vector<pandasm::AnnotationElement> &elements,
1820                                               std::stringstream &ss, uint32_t idx) const
1821 {
1822     for (const auto &elem : elements) {
1823         auto type = elem.GetValue()->GetType();
1824         if (type == pandasm::Value::Type::U32) {
1825             ss << "\t"
1826                << "u32"
1827                << " " << elem.GetName() << " { ";
1828             ss << "0x" << std::hex << elem.GetValue()->GetAsScalar()->GetValue<uint32_t>() << " }";
1829         } else if (type == pandasm::Value::Type::F64) {
1830             ss << "\t"
1831                << "f64"
1832                << " " << elem.GetName() << " { ";
1833             ss << elem.GetValue()->GetAsScalar()->GetValue<double>() << " }";
1834         } else if (type == pandasm::Value::Type::U1) {
1835             ss << "\t"
1836                << "u1"
1837                << " " << elem.GetName() << " { ";
1838             ss << elem.GetValue()->GetAsScalar()->GetValue<bool>() << " }";
1839         } else if (type == pandasm::Value::Type::STRING) {
1840             ss << "\t"
1841                << "panda.String"
1842                << " " << elem.GetName() << " { \"";
1843             ss << elem.GetValue()->GetAsScalar()->GetValue<std::string>() << "\" }";
1844         } else if (type == pandasm::Value::Type::LITERALARRAY) {
1845             uint32_t lit_array_fffset = std::stoi(elem.GetValue()->GetAsScalar()->GetValue<std::string>());
1846             pandasm::LiteralArray lit_array;
1847             GetLiteralArrayByOffset(&lit_array, panda_file::File::EntityId(lit_array_fffset));
1848             std::string typeName = getLiteralArrayTypeFromValue(lit_array);
1849             ss << "\t" << typeName << " " << elem.GetName() << " { ";
1850             DumpLiteralArray(lit_array, ss);
1851             ss << " }";
1852         } else {
1853             UNREACHABLE();
1854         }
1855         if (idx > 0) {
1856             ss << "\n";
1857         }
1858         --idx;
1859     }
1860 }
1861 
SerializeMethodAnnotation(const pandasm::AnnotationData & ann,std::ostream & os) const1862 void Disassembler::SerializeMethodAnnotation(const pandasm::AnnotationData &ann, std::ostream &os) const
1863 {
1864     os << ann.GetName() << ":\n";
1865     std::stringstream ss;
1866     std::vector<pandasm::AnnotationElement> elements = ann.GetElements();
1867     if (elements.empty()) {
1868         return;
1869     }
1870     uint32_t idx = elements.size() - 1;
1871     SerializeAnnotationElement(elements, ss, idx);
1872     os << ss.str() << "\n";
1873 }
1874 
SerializeMethodAnnotations(const pandasm::Function & method,std::ostream & os) const1875 void Disassembler::SerializeMethodAnnotations(const pandasm::Function &method, std::ostream &os) const
1876 {
1877     const auto annotations = method.metadata->GetAnnotations();
1878     if (annotations.empty()) {
1879         return;
1880     }
1881 
1882     for (const auto &ann : annotations) {
1883         SerializeMethodAnnotation(ann, os);
1884     }
1885 }
1886 
SerializeInstructions(const pandasm::Function & method,std::ostream & os,const std::map<std::string,MethodInfo>::const_iterator & method_info_it,bool print_method_info) const1887 void Disassembler::SerializeInstructions(const pandasm::Function &method, std::ostream &os,
1888                                          const std::map<std::string, MethodInfo>::const_iterator &method_info_it,
1889                                          bool print_method_info) const
1890 {
1891     std::string delim = ": ";
1892     size_t width = 0;
1893     if (print_method_info) {
1894         for (const auto &i : method.ins) {
1895             size_t ins_size = i->ToString().size();
1896             if (i->IsLabel()) {
1897                 ins_size = i->Label().size() - delim.length();
1898             }
1899 
1900             if (ins_size > width && ins_size < ark::INSTRUCTION_WIDTH_LIMIT) {
1901                 width = i->ToString().size();
1902             }
1903         }
1904     }
1905 
1906     size_t noLabelIdx = 0;
1907     for (size_t i = 0; i < method.ins.size(); i++) {
1908         std::string ins = method.ins[i]->ToString("", true, method.regs_num);
1909         if (method.ins[i]->IsLabel()) {
1910             size_t pos = ins.find(delim);
1911             std::string label = ins.substr(0, pos);
1912             ins.erase(0, pos + delim.length());
1913             os << label << ":\n";
1914         }
1915 
1916         if (ins != "") {
1917             os << "\t" << std::setw(width) << std::left << ins;
1918             if (print_method_info && noLabelIdx < method_info_it->second.instructions_info.size()) {
1919                 os << " # " << method_info_it->second.instructions_info.at(noLabelIdx);
1920             }
1921             os << "\n";
1922         }
1923 
1924         if (!method.ins[i]->IsLabel()) {
1925             noLabelIdx++;
1926         }
1927     }
1928 }
1929 
Serialize(const pandasm::Function & method,std::ostream & os,bool print_information) const1930 void Disassembler::Serialize(const pandasm::Function &method, std::ostream &os, bool print_information) const
1931 {
1932     SerializeMethodAnnotations(method, os);
1933     os << ".language " << panda::panda_file::LanguageToString(method.language) << std::endl;
1934     os << ".function " << method.return_type.GetPandasmName() << " " << method.name << "(";
1935 
1936     if (method.params.size() > 0) {
1937         os << method.params[0].type.GetPandasmName() << " a0";
1938 
1939         for (uint8_t i = 1; i < method.params.size(); i++) {
1940             os << ", " << method.params[i].type.GetPandasmName() << " a" << (size_t)i;
1941         }
1942     }
1943     os << ")";
1944 
1945     const std::string signature = pandasm::GetFunctionSignatureFromName(method.name, method.params);
1946 
1947     const auto method_iter = prog_ann_.method_annotations.find(signature);
1948     if (method_iter != prog_ann_.method_annotations.end()) {
1949         Serialize(*method.metadata, method_iter->second, os);
1950     } else {
1951         Serialize(*method.metadata, {}, os);
1952     }
1953 
1954     auto method_info_it = prog_info_.methods_info.find(signature);
1955     bool print_method_info = print_information && method_info_it != prog_info_.methods_info.end();
1956     if (print_method_info) {
1957         os << " { # " << method_info_it->second.method_info << "\n#   CODE:\n";
1958     } else {
1959         os << " {\n";
1960     }
1961     SerializeInstructions(method, os, method_info_it, print_method_info);
1962 
1963     if (method.catch_blocks.size() != 0) {
1964         os << "\n";
1965 
1966         for (const auto &catch_block : method.catch_blocks) {
1967             Serialize(catch_block, os);
1968 
1969             os << "\n";
1970         }
1971     }
1972 
1973     if (print_method_info) {
1974         const MethodInfo &method_info = method_info_it->second;
1975         SerializeLineNumberTable(method_info.line_number_table, os);
1976         SerializeColumnNumberTable(method_info.column_number_table, os);
1977         SerializeLocalVariableTable(method_info.local_variable_table, method, os);
1978     }
1979 
1980     os << "}\n\n";
1981 }
1982 
SerializeStrings(const panda_file::File::EntityId & offset,const std::string & name_value,std::ostream & os) const1983 void Disassembler::SerializeStrings(const panda_file::File::EntityId &offset, const std::string &name_value,
1984                                     std::ostream &os) const
1985 {
1986     os << "[offset:0x" << std::hex << offset << ", name_value:" << name_value << "]" << std::endl;
1987 }
1988 
Serialize(const pandasm::Function::CatchBlock & catch_block,std::ostream & os) const1989 void Disassembler::Serialize(const pandasm::Function::CatchBlock &catch_block, std::ostream &os) const
1990 {
1991     if (catch_block.exception_record == "") {
1992         os << ".catchall ";
1993     } else {
1994         os << ".catch " << catch_block.exception_record << ", ";
1995     }
1996 
1997     os << catch_block.try_begin_label << ", " << catch_block.try_end_label << ", " << catch_block.catch_begin_label;
1998 
1999     if (catch_block.catch_end_label != "") {
2000         os << ", " << catch_block.catch_end_label;
2001     }
2002 }
2003 
Serialize(const pandasm::ItemMetadata & meta,const AnnotationList & ann_list,std::ostream & os) const2004 void Disassembler::Serialize(const pandasm::ItemMetadata &meta, const AnnotationList &ann_list, std::ostream &os) const
2005 {
2006     auto bool_attributes = meta.GetBoolAttributes();
2007     auto attributes = meta.GetAttributes();
2008     if (bool_attributes.empty() && attributes.empty() && ann_list.empty()) {
2009         return;
2010     }
2011 
2012     os << " <";
2013 
2014     size_t size = bool_attributes.size();
2015     size_t idx = 0;
2016     for (const auto &attr : bool_attributes) {
2017         os << attr;
2018         ++idx;
2019 
2020         if (!attributes.empty() || !ann_list.empty() || idx < size) {
2021             os << ", ";
2022         }
2023     }
2024 
2025     size = attributes.size();
2026     idx = 0;
2027     for (const auto &[key, values] : attributes) {
2028         for (size_t i = 0; i < values.size(); i++) {
2029             os << key << "=" << values[i];
2030 
2031             if (i < values.size() - 1) {
2032                 os << ", ";
2033             }
2034         }
2035 
2036         ++idx;
2037 
2038         if (!ann_list.empty() || idx < size) {
2039             os << ", ";
2040         }
2041     }
2042 
2043     size = ann_list.size();
2044     idx = 0;
2045     for (const auto &[key, value] : ann_list) {
2046         os << key << "=" << value;
2047 
2048         ++idx;
2049 
2050         if (idx < size) {
2051             os << ", ";
2052         }
2053     }
2054 
2055     os << ">";
2056 }
2057 
SerializeLineNumberTable(const panda_file::LineNumberTable & line_number_table,std::ostream & os) const2058 void Disassembler::SerializeLineNumberTable(const panda_file::LineNumberTable &line_number_table,
2059                                             std::ostream &os) const
2060 {
2061     if (line_number_table.empty()) {
2062         return;
2063     }
2064 
2065     os << "\n#   LINE_NUMBER_TABLE:\n";
2066     for (const auto &line_info : line_number_table) {
2067         os << "#\tline " << line_info.line << ": " << line_info.offset << "\n";
2068     }
2069 }
2070 
SerializeColumnNumberTable(const panda_file::ColumnNumberTable & column_number_table,std::ostream & os) const2071 void Disassembler::SerializeColumnNumberTable(const panda_file::ColumnNumberTable &column_number_table,
2072                                               std::ostream &os) const
2073 {
2074     if (column_number_table.empty()) {
2075         return;
2076     }
2077 
2078     os << "\n#   COLUMN_NUMBER_TABLE:\n";
2079     for (const auto &column_info : column_number_table) {
2080         os << "#\tcolumn " << column_info.column << ": " << column_info.offset << "\n";
2081     }
2082 }
2083 
SerializeLocalVariableTable(const panda_file::LocalVariableTable & local_variable_table,const pandasm::Function & method,std::ostream & os) const2084 void Disassembler::SerializeLocalVariableTable(const panda_file::LocalVariableTable &local_variable_table,
2085                                                const pandasm::Function &method, std::ostream &os) const
2086 {
2087     if (local_variable_table.empty()) {
2088         return;
2089     }
2090 
2091     os << "\n#   LOCAL_VARIABLE_TABLE:\n";
2092     os << "#\t Start   End  Register           Name   Signature\n";
2093     const int START_WIDTH = 5;
2094     const int END_WIDTH = 4;
2095     const int REG_WIDTH = 8;
2096     const int NAME_WIDTH = 14;
2097     for (const auto &variable_info : local_variable_table) {
2098         std::ostringstream reg_stream;
2099         reg_stream << variable_info.reg_number << '(';
2100         if (variable_info.reg_number < 0) {
2101             reg_stream << "acc";
2102         } else {
2103             uint32_t vreg = variable_info.reg_number;
2104             uint32_t first_arg_reg = method.GetTotalRegs();
2105             if (vreg < first_arg_reg) {
2106                 reg_stream << 'v' << vreg;
2107             } else {
2108                 reg_stream << 'a' << vreg - first_arg_reg;
2109             }
2110         }
2111         reg_stream << ')';
2112 
2113         os << "#\t " << std::setw(START_WIDTH) << std::right << variable_info.start_offset << "  ";
2114         os << std::setw(END_WIDTH) << std::right << variable_info.end_offset << "  ";
2115         os << std::setw(REG_WIDTH) << std::right << reg_stream.str() << " ";
2116         os << std::setw(NAME_WIDTH) << std::right << variable_info.name << "   " << variable_info.type;
2117         if (!variable_info.type_signature.empty() && variable_info.type_signature != variable_info.type) {
2118             os << " (" << variable_info.type_signature << ")";
2119         }
2120         os << "\n";
2121     }
2122 }
2123 
BytecodeOpcodeToPandasmOpcode(uint8_t o) const2124 pandasm::Opcode Disassembler::BytecodeOpcodeToPandasmOpcode(uint8_t o) const
2125 {
2126     return BytecodeOpcodeToPandasmOpcode(BytecodeInstruction::Opcode(o));
2127 }
2128 
IDToString(BytecodeInstruction bc_ins,panda_file::File::EntityId method_id,size_t idx) const2129 std::string Disassembler::IDToString(BytecodeInstruction bc_ins, panda_file::File::EntityId method_id, size_t idx) const
2130 {
2131     std::stringstream name;
2132     const auto offset = file_->ResolveOffsetByIndex(method_id, bc_ins.GetId(idx).AsIndex());
2133     std::string str_data = StringDataToString(file_->GetStringData(offset));
2134     if (bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::METHOD_ID)) {
2135         name << GetMethodSignature(offset);
2136     } else if (bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::STRING_ID)) {
2137         name << '\"';
2138         name << str_data;
2139         name << '\"';
2140         string_offset_to_name_.emplace(offset, str_data);
2141     } else {
2142         ASSERT(bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::LITERALARRAY_ID));
2143         pandasm::LiteralArray lit_array;
2144         GetLiteralArrayByOffset(&lit_array, panda_file::File::EntityId(offset));
2145         name << SerializeLiteralArray(lit_array);
2146     }
2147 
2148     return name.str();
2149 }
2150 
GetRecordLanguage(panda_file::File::EntityId class_id) const2151 panda::panda_file::SourceLang Disassembler::GetRecordLanguage(panda_file::File::EntityId class_id) const
2152 {
2153     if (file_->IsExternal(class_id)) {
2154         // Keep the same behavior with abc2program
2155         return panda_file::DEFUALT_SOURCE_LANG;
2156     }
2157 
2158     panda_file::ClassDataAccessor cda(*file_, class_id);
2159     return cda.GetSourceLang().value_or(panda_file::DEFUALT_SOURCE_LANG);
2160 }
2161 
GetMethodLanguage(panda_file::File::EntityId method_id) const2162 panda::panda_file::SourceLang Disassembler::GetMethodLanguage(panda_file::File::EntityId method_id) const
2163 {
2164     if (file_->IsExternal(method_id)) {
2165         // Keep the same behavior with abc2program
2166         return panda_file::DEFUALT_SOURCE_LANG;
2167     }
2168 
2169     panda_file::MethodDataAccessor method_accessor(*file_, method_id);
2170     return method_accessor.GetSourceLang().value_or(panda_file::DEFUALT_SOURCE_LANG);
2171 }
2172 
translateImmToLabel(pandasm::Ins * pa_ins,LabelTable * label_table,const uint8_t * ins_arr,BytecodeInstruction bc_ins,BytecodeInstruction bc_ins_last,panda_file::File::EntityId code_id)2173 static void translateImmToLabel(pandasm::Ins *pa_ins, LabelTable *label_table, const uint8_t *ins_arr,
2174                                 BytecodeInstruction bc_ins, BytecodeInstruction bc_ins_last,
2175                                 panda_file::File::EntityId code_id)
2176 {
2177     const int32_t jmp_offset = std::stoi(pa_ins->Ids().at(0));
2178     const auto bc_ins_dest = bc_ins.JumpTo(jmp_offset);
2179     if (bc_ins_last.GetAddress() > bc_ins_dest.GetAddress()) {
2180         size_t idx = getBytecodeInstructionNumber(BytecodeInstruction(ins_arr), bc_ins_dest);
2181         if (idx != std::numeric_limits<size_t>::max()) {
2182             if (label_table->find(idx) == label_table->end()) {
2183                 std::stringstream ss {};
2184                 ss << "jump_label_" << label_table->size();
2185                 (*label_table)[idx] = ss.str();
2186             }
2187 
2188             pa_ins->SetId(0, label_table->at(idx));
2189         } else {
2190             LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
2191                                      << "). incorrect instruction at offset: 0x" << (bc_ins.GetAddress() - ins_arr)
2192                                      << ": invalid jump offset 0x" << jmp_offset
2193                                      << " - jumping in the middle of another instruction!";
2194         }
2195     } else {
2196         LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
2197                                  << "). incorrect instruction at offset: 0x" << (bc_ins.GetAddress() - ins_arr)
2198                                  << ": invalid jump offset 0x" << jmp_offset << " - jumping out of bounds!";
2199     }
2200 }
2201 
AddLabels(pandasm::Function * func,LabelTable & label_table)2202 static void AddLabels(pandasm::Function *func, LabelTable &label_table)
2203 {
2204     std::vector<pandasm::InsPtr> new_ins;
2205     new_ins.reserve(func->ins.size() + label_table.size());
2206 
2207     for (size_t i = 0; i < func->ins.size(); i++) {
2208         if (label_table.find(i) != label_table.end()) {
2209             new_ins.emplace_back(new pandasm::LabelIns(label_table[i]));
2210         }
2211         new_ins.emplace_back(std::move(func->ins[i]));
2212     }
2213 
2214     // In some case, the end label can be after the last instruction
2215     // Creating an invalid instruction for the label to make sure it can be serialized
2216     if (label_table.find(func->ins.size()) != label_table.end()) {
2217         new_ins.emplace_back(new pandasm::LabelIns(""));
2218     }
2219 
2220     func->ins.swap(new_ins);
2221 }
2222 
GetInstructions(pandasm::Function * method,panda_file::File::EntityId method_id,panda_file::File::EntityId code_id) const2223 IdList Disassembler::GetInstructions(pandasm::Function *method, panda_file::File::EntityId method_id,
2224                                      panda_file::File::EntityId code_id) const
2225 {
2226     panda_file::CodeDataAccessor code_accessor(*file_, code_id);
2227 
2228     const auto ins_sz = code_accessor.GetCodeSize();
2229     const auto ins_arr = code_accessor.GetInstructions();
2230 
2231     method->regs_num = code_accessor.GetNumVregs();
2232 
2233     auto bc_ins = BytecodeInstruction(ins_arr);
2234     const auto bc_ins_last = bc_ins.JumpTo(ins_sz);
2235 
2236     LabelTable label_table = GetExceptions(method, method_id, code_id);
2237 
2238     IdList unknown_external_methods {};
2239 
2240     while (bc_ins.GetAddress() != bc_ins_last.GetAddress()) {
2241         if (bc_ins.GetAddress() > bc_ins_last.GetAddress()) {
2242             LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
2243                                      << "). bytecode instructions sequence corrupted for method " << method->name
2244                                      << "! went out of bounds";
2245 
2246             break;
2247         }
2248 
2249         auto pa_ins = BytecodeInstructionToPandasmInstruction(bc_ins, method_id);
2250         if (pa_ins->IsJump()) {
2251             translateImmToLabel(pa_ins, &label_table, ins_arr, bc_ins, bc_ins_last, code_id);
2252         }
2253 
2254         // check if method id is unknown external method. if so, emplace it in table
2255         if (bc_ins.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) {
2256             const auto arg_method_idx = bc_ins.GetId().AsIndex();
2257             const auto arg_method_id = file_->ResolveMethodIndex(method_id, arg_method_idx);
2258 
2259             const auto arg_method_signature = GetMethodSignature(arg_method_id);
2260 
2261             const bool is_present = prog_.function_table.find(arg_method_signature) != prog_.function_table.cend();
2262             const bool is_external = file_->IsExternal(arg_method_id);
2263             if (is_external && !is_present) {
2264                 unknown_external_methods.push_back(arg_method_id);
2265             }
2266         }
2267 
2268         method->ins.emplace_back(pa_ins);
2269         bc_ins = bc_ins.GetNext();
2270     }
2271 
2272     size_t instruction_count = method->ins.size();
2273     for (const auto &pair : label_table) {
2274         if (pair.first > instruction_count) {
2275             LOG(ERROR, DISASSEMBLER) << "> Wrong label index got, count of instructions is " << instruction_count
2276                                      << ", but the label index is " << pair.first;
2277         }
2278     }
2279 
2280     AddLabels(method, label_table);
2281 
2282     return unknown_external_methods;
2283 }
2284 
GetColumnNumber()2285 std::vector<size_t> Disassembler::GetColumnNumber()
2286 {
2287     std::vector<size_t> columnNumber;
2288     for (const auto &method_info : prog_info_.methods_info) {
2289         for (const auto &column_number : method_info.second.column_number_table) {
2290             columnNumber.push_back(column_number.column);
2291         }
2292     }
2293     return columnNumber;
2294 }
2295 
GetLineNumber()2296 std::vector<size_t> Disassembler::GetLineNumber()
2297 {
2298     std::vector<size_t> lineNumber;
2299     for (const auto &method_info : prog_info_.methods_info) {
2300         for (const auto &line_number : method_info.second.line_number_table) {
2301             lineNumber.push_back(line_number.line);
2302         }
2303     }
2304     return lineNumber;
2305 }
2306 
2307 }  // namespace panda::disasm
2308