1 /*
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "disassembler.h"
17 #include "mangling.h"
18 #include "utils/logger.h"
19
20 #include <iomanip>
21
22 #include "get_language_specific_metadata.inc"
23
24 namespace panda::disasm {
25
Disassemble(const std::string & filename_in,const bool quiet,const bool skip_strings)26 void Disassembler::Disassemble(const std::string &filename_in, const bool quiet, const bool skip_strings)
27 {
28 auto file_new = panda_file::File::Open(filename_in);
29 file_.swap(file_new);
30
31 if (file_ != nullptr) {
32 prog_ = pandasm::Program {};
33
34 record_name_to_id_.clear();
35 method_name_to_id_.clear();
36 string_offset_to_name_.clear();
37 skip_strings_ = skip_strings;
38 quiet_ = quiet;
39
40 prog_info_ = ProgInfo {};
41
42 prog_ann_ = ProgAnnotations {};
43
44 GetRecords();
45 GetLiteralArrays();
46
47 GetLanguageSpecificMetadata();
48 } else {
49 LOG(ERROR, DISASSEMBLER) << "> unable to open specified pandafile: <" << filename_in << ">";
50 }
51 }
52
CollectInfo()53 void Disassembler::CollectInfo()
54 {
55 LOG(DEBUG, DISASSEMBLER) << "\n[getting program info]\n";
56
57 debug_info_extractor_ = std::make_unique<panda_file::DebugInfoExtractor>(file_.get());
58
59 for (const auto &pair : record_name_to_id_) {
60 GetRecordInfo(pair.second, &prog_info_.records_info[pair.first]);
61 }
62
63 for (const auto &pair : method_name_to_id_) {
64 GetMethodInfo(pair.second, &prog_info_.methods_info[pair.first]);
65 }
66 }
67
Serialize(std::ostream & os,bool add_separators,bool print_information) const68 void Disassembler::Serialize(std::ostream &os, bool add_separators, bool print_information) const
69 {
70 if (os.bad()) {
71 LOG(DEBUG, DISASSEMBLER) << "> serialization failed. os bad\n";
72
73 return;
74 }
75
76 if (file_ != nullptr) {
77 std::string abc_file = GetFileNameByAbsolutePath(file_->GetFilename());
78 os << "# source binary: " << abc_file << "\n\n";
79 }
80
81 SerializeLanguage(os);
82
83 if (add_separators) {
84 os << "# ====================\n"
85 "# LITERALS\n\n";
86 }
87
88 LOG(DEBUG, DISASSEMBLER) << "[serializing literals]";
89
90 for (const auto &[key, lit_arr] : prog_.literalarray_table) {
91 Serialize(key, lit_arr, os);
92 }
93
94 for (const auto &[module_offset, array_table] : modulearray_table_) {
95 Serialize(module_offset, array_table, os);
96 }
97
98 os << "\n";
99
100 if (add_separators) {
101 os << "# ====================\n"
102 "# RECORDS\n\n";
103 }
104
105 LOG(DEBUG, DISASSEMBLER) << "[serializing records]";
106
107 for (const auto &r : prog_.record_table) {
108 Serialize(r.second, os, print_information);
109 }
110
111 if (add_separators) {
112 os << "# ====================\n"
113 "# METHODS\n\n";
114 }
115
116 LOG(DEBUG, DISASSEMBLER) << "[serializing methods]";
117
118 for (const auto &m : prog_.function_table) {
119 Serialize(m.second, os, print_information);
120 }
121
122 if (add_separators) {
123 os << "# ====================\n"
124 "# STRING\n\n";
125 }
126
127 LOG(DEBUG, DISASSEMBLER) << "[serializing strings]";
128
129 for (const auto &[offset, name_value] : string_offset_to_name_) {
130 SerializeStrings(offset, name_value, os);
131 }
132 }
133
IsSystemType(const std::string & type_name)134 inline bool Disassembler::IsSystemType(const std::string &type_name)
135 {
136 bool is_array_type = type_name.find('[') != std::string::npos;
137 bool is_global = type_name == "_GLOBAL";
138
139 return is_array_type || is_global;
140 }
141
GetRecord(pandasm::Record * record,const panda_file::File::EntityId & record_id)142 void Disassembler::GetRecord(pandasm::Record *record, const panda_file::File::EntityId &record_id)
143 {
144 LOG(DEBUG, DISASSEMBLER) << "\n[getting record]\nid: " << record_id << " (0x" << std::hex << record_id << ")";
145
146 if (record == nullptr) {
147 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but record ptr expected!";
148
149 return;
150 }
151
152 record->name = GetFullRecordName(record_id);
153
154 LOG(DEBUG, DISASSEMBLER) << "name: " << record->name;
155
156 GetMetaData(record, record_id);
157
158 if (!file_->IsExternal(record_id)) {
159 GetMethods(record_id);
160 GetFields(record, record_id);
161 }
162 }
163
AddMethodToTables(const panda_file::File::EntityId & method_id)164 void Disassembler::AddMethodToTables(const panda_file::File::EntityId &method_id)
165 {
166 pandasm::Function new_method("", file_language_);
167 GetMethod(&new_method, method_id);
168
169 const auto signature = pandasm::GetFunctionSignatureFromName(new_method.name, new_method.params);
170 if (prog_.function_table.find(signature) != prog_.function_table.end()) {
171 return;
172 }
173
174 GetMethodAnnotations(new_method, method_id);
175 method_name_to_id_.emplace(signature, method_id);
176 prog_.function_synonyms[new_method.name].push_back(signature);
177 prog_.function_table.emplace(signature, std::move(new_method));
178 }
179
GetMethod(pandasm::Function * method,const panda_file::File::EntityId & method_id)180 void Disassembler::GetMethod(pandasm::Function *method, const panda_file::File::EntityId &method_id)
181 {
182 LOG(DEBUG, DISASSEMBLER) << "\n[getting method]\nid: " << method_id << " (0x" << std::hex << method_id << ")";
183
184 if (method == nullptr) {
185 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
186
187 return;
188 }
189
190 panda_file::MethodDataAccessor method_accessor(*file_, method_id);
191
192 method->name = GetFullMethodName(method_id);
193
194 LOG(DEBUG, DISASSEMBLER) << "name: " << method->name;
195
196 GetParams(method, method_accessor.GetProtoId());
197 GetMetaData(method, method_id);
198
199 if (method_accessor.GetCodeId().has_value()) {
200 const IdList id_list = GetInstructions(method, method_id, method_accessor.GetCodeId().value());
201
202 for (const auto &id : id_list) {
203 AddMethodToTables(id);
204 }
205 } else {
206 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << method_id << " (0x" << std::hex << method_id
207 << "). implementation of method expected, but no \'CODE\' tag was found!";
208
209 return;
210 }
211 }
212
213 template <typename T>
FillLiteralArrayData(pandasm::LiteralArray * lit_array,const panda_file::LiteralTag & tag,const panda_file::LiteralDataAccessor::LiteralValue & value) const214 void Disassembler::FillLiteralArrayData(pandasm::LiteralArray *lit_array, const panda_file::LiteralTag &tag,
215 const panda_file::LiteralDataAccessor::LiteralValue &value) const
216 {
217 panda_file::File::EntityId id(std::get<uint32_t>(value));
218 auto sp = file_->GetSpanFromId(id);
219 auto len = panda_file::helpers::Read<sizeof(uint32_t)>(&sp);
220 if (tag != panda_file::LiteralTag::ARRAY_STRING) {
221 for (size_t i = 0; i < len; i++) {
222 pandasm::LiteralArray::Literal lit;
223 lit.tag_ = tag;
224 lit.value_ = bit_cast<T>(panda_file::helpers::Read<sizeof(T)>(&sp));
225 lit_array->literals_.push_back(lit);
226 }
227 return;
228 }
229 for (size_t i = 0; i < len; i++) {
230 auto str_id = panda_file::helpers::Read<sizeof(T)>(&sp);
231 pandasm::LiteralArray::Literal lit;
232 lit.tag_ = tag;
233 lit.value_ = StringDataToString(file_->GetStringData(panda_file::File::EntityId(str_id)));
234 lit_array->literals_.push_back(lit);
235 }
236 }
237
FillLiteralData(pandasm::LiteralArray * lit_array,const panda_file::LiteralDataAccessor::LiteralValue & value,const panda_file::LiteralTag & tag) const238 void Disassembler::FillLiteralData(pandasm::LiteralArray *lit_array,
239 const panda_file::LiteralDataAccessor::LiteralValue &value,
240 const panda_file::LiteralTag &tag) const
241 {
242 pandasm::LiteralArray::Literal lit;
243 lit.tag_ = tag;
244 switch (tag) {
245 case panda_file::LiteralTag::BOOL: {
246 lit.value_ = std::get<bool>(value);
247 break;
248 }
249 case panda_file::LiteralTag::ACCESSOR:
250 case panda_file::LiteralTag::NULLVALUE:
251 case panda_file::LiteralTag::BUILTINTYPEINDEX: {
252 lit.value_ = std::get<uint8_t>(value);
253 break;
254 }
255 case panda_file::LiteralTag::METHODAFFILIATE: {
256 lit.value_ = std::get<uint16_t>(value);
257 break;
258 }
259 case panda_file::LiteralTag::LITERALBUFFERINDEX:
260 case panda_file::LiteralTag::INTEGER: {
261 lit.value_ = std::get<uint32_t>(value);
262 break;
263 }
264 case panda_file::LiteralTag::DOUBLE: {
265 lit.value_ = std::get<double>(value);
266 break;
267 }
268 case panda_file::LiteralTag::STRING: {
269 auto str_data = file_->GetStringData(panda_file::File::EntityId(std::get<uint32_t>(value)));
270 lit.value_ = StringDataToString(str_data);
271 break;
272 }
273 case panda_file::LiteralTag::METHOD:
274 case panda_file::LiteralTag::GETTER:
275 case panda_file::LiteralTag::SETTER:
276 case panda_file::LiteralTag::GENERATORMETHOD: {
277 panda_file::MethodDataAccessor mda(*file_, panda_file::File::EntityId(std::get<uint32_t>(value)));
278 lit.value_ = StringDataToString(file_->GetStringData(mda.GetNameId()));
279 break;
280 }
281 case panda_file::LiteralTag::LITERALARRAY: {
282 std::stringstream ss;
283 ss << "0x" << std::hex << std::get<uint32_t>(value);
284 lit.value_ = ss.str();
285 break;
286 }
287 case panda_file::LiteralTag::TAGVALUE: {
288 return;
289 }
290 default: {
291 UNREACHABLE();
292 }
293 }
294 lit_array->literals_.push_back(lit);
295 }
296
GetLiteralArrayByOffset(pandasm::LiteralArray * lit_array,panda_file::File::EntityId offset) const297 void Disassembler::GetLiteralArrayByOffset(pandasm::LiteralArray *lit_array, panda_file::File::EntityId offset) const
298 {
299 panda_file::LiteralDataAccessor lit_array_accessor(*file_, file_->GetLiteralArraysId());
300 lit_array_accessor.EnumerateLiteralVals(
301 offset, [this, lit_array](const panda_file::LiteralDataAccessor::LiteralValue &value,
302 const panda_file::LiteralTag &tag) {
303 switch (tag) {
304 case panda_file::LiteralTag::ARRAY_U1: {
305 FillLiteralArrayData<bool>(lit_array, tag, value);
306 break;
307 }
308 case panda_file::LiteralTag::ARRAY_I8:
309 case panda_file::LiteralTag::ARRAY_U8: {
310 FillLiteralArrayData<uint8_t>(lit_array, tag, value);
311 break;
312 }
313 case panda_file::LiteralTag::ARRAY_I16:
314 case panda_file::LiteralTag::ARRAY_U16: {
315 FillLiteralArrayData<uint16_t>(lit_array, tag, value);
316 break;
317 }
318 case panda_file::LiteralTag::ARRAY_I32:
319 case panda_file::LiteralTag::ARRAY_U32: {
320 FillLiteralArrayData<uint32_t>(lit_array, tag, value);
321 break;
322 }
323 case panda_file::LiteralTag::ARRAY_I64:
324 case panda_file::LiteralTag::ARRAY_U64: {
325 FillLiteralArrayData<uint64_t>(lit_array, tag, value);
326 break;
327 }
328 case panda_file::LiteralTag::ARRAY_F32: {
329 FillLiteralArrayData<float>(lit_array, tag, value);
330 break;
331 }
332 case panda_file::LiteralTag::ARRAY_F64: {
333 FillLiteralArrayData<double>(lit_array, tag, value);
334 break;
335 }
336 case panda_file::LiteralTag::ARRAY_STRING: {
337 FillLiteralArrayData<uint32_t>(lit_array, tag, value);
338 break;
339 }
340 default: {
341 FillLiteralData(lit_array, value, tag);
342 break;
343 }
344 }
345 });
346 }
347
GetLiteralArray(pandasm::LiteralArray * lit_array,size_t index) const348 void Disassembler::GetLiteralArray(pandasm::LiteralArray *lit_array, size_t index) const
349 {
350 panda_file::LiteralDataAccessor lit_array_accessor(*file_, file_->GetLiteralArraysId());
351 GetLiteralArrayByOffset(lit_array, lit_array_accessor.GetLiteralArrayId(index));
352 }
353
IsModuleLiteralOffset(const panda_file::File::EntityId & id) const354 bool Disassembler::IsModuleLiteralOffset(const panda_file::File::EntityId &id) const
355 {
356 return module_literals_.find(id.GetOffset()) != module_literals_.end();
357 }
358
GetLiteralArrays()359 void Disassembler::GetLiteralArrays()
360 {
361 const auto lit_arrays_id = file_->GetLiteralArraysId();
362
363 LOG(DEBUG, DISASSEMBLER) << "\n[getting literal arrays]\nid: " << lit_arrays_id << " (0x" << std::hex
364 << lit_arrays_id << ")";
365
366 panda_file::LiteralDataAccessor lda(*file_, lit_arrays_id);
367 size_t num_litarrays = lda.GetLiteralNum();
368 for (size_t index = 0; index < num_litarrays; index++) {
369 auto id = lda.GetLiteralArrayId(index);
370 if (IsModuleLiteralOffset(id)) {
371 std::stringstream ss;
372 ss << index << " 0x" << std::hex << id.GetOffset();
373 modulearray_table_.emplace(ss.str(), GetModuleLiteralArray(id));
374 continue;
375 }
376 std::stringstream ss;
377 ss << index << " 0x" << std::hex << id.GetOffset();
378 panda::pandasm::LiteralArray lit_arr;
379 GetLiteralArray(&lit_arr, index);
380 prog_.literalarray_table.emplace(ss.str(), lit_arr);
381 }
382 }
383
ModuleTagToString(panda_file::ModuleTag & tag) const384 std::string Disassembler::ModuleTagToString(panda_file::ModuleTag &tag) const
385 {
386 switch (tag) {
387 case panda_file::ModuleTag::REGULAR_IMPORT:
388 return "REGULAR_IMPORT";
389 case panda_file::ModuleTag::NAMESPACE_IMPORT:
390 return "NAMESPACE_IMPORT";
391 case panda_file::ModuleTag::LOCAL_EXPORT:
392 return "LOCAL_EXPORT";
393 case panda_file::ModuleTag::INDIRECT_EXPORT:
394 return "INDIRECT_EXPORT";
395 case panda_file::ModuleTag::STAR_EXPORT:
396 return "STAR_EXPORT";
397 default: {
398 UNREACHABLE();
399 break;
400 }
401 }
402 return "";
403 }
404
GetModuleLiteralArray(panda_file::File::EntityId & module_id) const405 std::vector<std::string> Disassembler::GetModuleLiteralArray(panda_file::File::EntityId &module_id) const
406 {
407 panda_file::ModuleDataAccessor mda(*file_, module_id);
408 const std::vector<uint32_t> &request_modules_offset = mda.getRequestModules();
409 std::vector<std::string> module_literal_array;
410 mda.EnumerateModuleRecord([&](panda_file::ModuleTag tag, uint32_t export_name_offset,
411 uint32_t request_module_idx, uint32_t import_name_offset,
412 uint32_t local_name_offset) {
413 std::stringstream ss;
414 ss << "ModuleTag: " << ModuleTagToString(tag);
415 if (IsValidOffset(local_name_offset)) {
416 ss << ", local_name: " << GetStringByOffset(local_name_offset);
417 }
418 if (IsValidOffset(export_name_offset)) {
419 ss << ", export_name: " << GetStringByOffset(export_name_offset);
420 }
421 if (IsValidOffset(import_name_offset)) {
422 ss << ", import_name: " << GetStringByOffset(import_name_offset);
423 }
424 if (request_module_idx < request_modules_offset.size()) {
425 auto request_module_offset = request_modules_offset[request_module_idx];
426 ASSERT(IsValidOffset(request_module_offset));
427 ss << ", module_request: " << GetStringByOffset(request_module_offset);
428 }
429 module_literal_array.push_back(ss.str());
430 });
431
432 return module_literal_array;
433 }
434
GetRecords()435 void Disassembler::GetRecords()
436 {
437 LOG(DEBUG, DISASSEMBLER) << "\n[getting records]\n";
438
439 const auto class_idx = file_->GetClasses();
440
441 for (size_t i = 0; i < class_idx.size(); i++) {
442 uint32_t class_id = class_idx[i];
443 auto class_off = file_->GetHeader()->class_idx_off + sizeof(uint32_t) * i;
444
445 if (class_id > file_->GetHeader()->file_size) {
446 LOG(ERROR, DISASSEMBLER) << "> error encountered in record at " << class_off << " (0x" << std::hex
447 << class_off << "). binary file corrupted. record offset (0x" << class_id
448 << ") out of bounds (0x" << file_->GetHeader()->file_size << ")!";
449 break;
450 }
451
452 const panda_file::File::EntityId record_id {class_id};
453 auto language = GetRecordLanguage(record_id);
454 if (language != file_language_) {
455 if (file_language_ == panda_file::SourceLang::PANDA_ASSEMBLY) {
456 file_language_ = language;
457 } else if (language != panda_file::SourceLang::PANDA_ASSEMBLY) {
458 LOG(ERROR, DISASSEMBLER) << "> possible error encountered in record at" << class_off << " (0x"
459 << std::hex << class_off << "). record's language ("
460 << panda_file::LanguageToString(language)
461 << ") differs from file's language ("
462 << panda_file::LanguageToString(file_language_) << ")!";
463 }
464 }
465
466 pandasm::Record record("", file_language_);
467 GetRecord(&record, record_id);
468
469 if (prog_.record_table.find(record.name) == prog_.record_table.end()) {
470 record_name_to_id_.emplace(record.name, record_id);
471 prog_.record_table.emplace(record.name, std::move(record));
472 }
473 }
474 }
475
GetFields(pandasm::Record * record,const panda_file::File::EntityId & record_id)476 void Disassembler::GetFields(pandasm::Record *record, const panda_file::File::EntityId &record_id)
477 {
478 panda_file::ClassDataAccessor class_accessor {*file_, record_id};
479
480 class_accessor.EnumerateFields([&](panda_file::FieldDataAccessor &field_accessor) -> void {
481 pandasm::Field field(file_language_);
482
483 panda_file::File::EntityId field_name_id = field_accessor.GetNameId();
484 field.name = StringDataToString(file_->GetStringData(field_name_id));
485
486 uint32_t field_type = field_accessor.GetType();
487 field.type = FieldTypeToPandasmType(field_type);
488
489 GetMetaData(&field, field_accessor.GetFieldId());
490
491 record->field_list.push_back(std::move(field));
492 });
493 }
494
GetMethods(const panda_file::File::EntityId & record_id)495 void Disassembler::GetMethods(const panda_file::File::EntityId &record_id)
496 {
497 panda_file::ClassDataAccessor class_accessor {*file_, record_id};
498
499 class_accessor.EnumerateMethods([&](panda_file::MethodDataAccessor &method_accessor) -> void {
500 AddMethodToTables(method_accessor.GetMethodId());
501 });
502 }
503
GetMethodAnnotations(pandasm::Function & method,const panda_file::File::EntityId & method_id)504 void Disassembler::GetMethodAnnotations(pandasm::Function &method, const panda_file::File::EntityId &method_id)
505 {
506 static const std::string MODULE_REQUEST_ANN_NAME = "L_ESConcurrentModuleRequestsAnnotation";
507 static const std::string SLOT_NUMBER_ANN_NAME = "L_ESSlotNumberAnnotation";
508
509 panda_file::MethodDataAccessor mda(*file_, method_id);
510 mda.EnumerateAnnotations([&](panda_file::File::EntityId annotation_id) {
511 panda_file::AnnotationDataAccessor ada(*file_, annotation_id);
512 auto *annotation_name = reinterpret_cast<const char *>(file_->GetStringData(ada.GetClassId()).data);
513 if (std::strcmp("L_ESConcurrentModuleRequestsAnnotation;", annotation_name) == 0) {
514 CreateAnnotationElement(ada, method, MODULE_REQUEST_ANN_NAME,
515 "ConcurrentModuleRequest", "concurrentModuleRequestIdx");
516 } else if (std::strcmp("L_ESSlotNumberAnnotation;", annotation_name) == 0) {
517 CreateAnnotationElement(ada, method, SLOT_NUMBER_ANN_NAME, "SlotNumber", "slotNumberIdx");
518 }
519 });
520 }
521
CreateAnnotationElement(panda_file::AnnotationDataAccessor & ada,pandasm::Function & method,const std::string & ann_name,const std::string & ann_elem_name,const std::string & ann_elem_index)522 void Disassembler::CreateAnnotationElement(panda_file::AnnotationDataAccessor &ada, pandasm::Function &method,
523 const std::string &ann_name, const std::string &ann_elem_name,
524 const std::string &ann_elem_index)
525 {
526 if (ann_elem_name.empty() || ann_elem_index.empty()) {
527 return;
528 }
529
530 uint32_t elem_count = ada.GetCount();
531 for (uint32_t i = 0; i < elem_count; i++) {
532 panda_file::AnnotationDataAccessor::Elem adae = ada.GetElement(i);
533 auto *elem_name = reinterpret_cast<const char *>(file_->GetStringData(adae.GetNameId()).data);
534 if (ann_elem_name == elem_name) {
535 uint32_t ann_elem_value = adae.GetScalarValue().GetValue();
536 AddAnnotationElement(method, ann_name, ann_elem_index, ann_elem_value);
537 }
538 }
539 }
540
AddAnnotationElement(pandasm::Function & method,const std::string & annotation_name,const std::string & key,const uint32_t & value)541 void Disassembler::AddAnnotationElement(pandasm::Function &method, const std::string &annotation_name,
542 const std::string &key, const uint32_t &value)
543 {
544 if (annotation_name.empty() || key.empty()) {
545 return;
546 }
547
548 std::vector<pandasm::AnnotationData> method_annotation = method.metadata->GetAnnotations();
549 const auto ann_iter = std::find_if(method_annotation.begin(), method_annotation.end(),
550 [&](pandasm::AnnotationData &ann) -> bool {
551 return ann.GetName() == annotation_name;
552 });
553
554 pandasm::AnnotationElement annotation_element(key,
555 std::make_unique<pandasm::ScalarValue>(pandasm::ScalarValue::Create<pandasm::Value::Type::U32>(value)));
556 const bool is_annotation = ann_iter != method_annotation.end();
557 if (is_annotation) {
558 ann_iter->AddElement(std::move(annotation_element));
559 method.metadata->SetAnnotations(std::move(method_annotation));
560 } else {
561 std::vector<pandasm::AnnotationElement> elements;
562 pandasm::AnnotationData ann_data(annotation_name, elements);
563 ann_data.AddElement(std::move(annotation_element));
564 std::vector<pandasm::AnnotationData> annotations;
565 annotations.push_back(std::move(ann_data));
566 method.metadata->AddAnnotations(annotations);
567 }
568 }
569
GetAnnotationByMethodName(const std::string & method_name) const570 std::optional<std::vector<std::string>> Disassembler::GetAnnotationByMethodName(const std::string &method_name) const
571 {
572 const auto method_synonyms_iter = prog_.function_synonyms.find(method_name);
573 bool is_signature = method_synonyms_iter != prog_.function_synonyms.end();
574 if (!is_signature) {
575 return std::nullopt;
576 }
577
578 const auto method_iter = prog_.function_table.find(method_synonyms_iter->second.back());
579 bool is_method = method_iter != prog_.function_table.end();
580 const auto annotations = method_iter->second.metadata->GetAnnotations();
581 if (!is_method || annotations.empty()) {
582 return std::nullopt;
583 }
584
585 std::vector<std::string> ann;
586 for (const auto &ann_data : annotations) {
587 ann.emplace_back(ann_data.GetName());
588 }
589 return ann;
590 }
591
GetStrings() const592 std::vector<std::string> Disassembler::GetStrings() const
593 {
594 std::vector<std::string> strings;
595 for (auto &str_info : string_offset_to_name_) {
596 strings.emplace_back(str_info.second);
597 }
598
599 return strings;
600 }
601
GetModuleLiterals() const602 std::vector<std::string> Disassembler::GetModuleLiterals() const
603 {
604 std::vector<std::string> module_literals;
605 for (auto &module_array : modulearray_table_) {
606 for (auto &module : module_array.second) {
607 module_literals.emplace_back(module);
608 }
609 }
610
611 return module_literals;
612 }
613
GetParams(pandasm::Function * method,const panda_file::File::EntityId & proto_id) const614 void Disassembler::GetParams(pandasm::Function *method, const panda_file::File::EntityId &proto_id) const
615 {
616 /**
617 * frame size - 2^16 - 1
618 */
619 static const uint32_t MAX_ARG_NUM = 0xFFFF;
620
621 LOG(DEBUG, DISASSEMBLER) << "[getting params]\nproto id: " << proto_id << " (0x" << std::hex << proto_id << ")";
622
623 if (method == nullptr) {
624 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
625
626 return;
627 }
628
629 panda_file::ProtoDataAccessor proto_accessor(*file_, proto_id);
630
631 auto params_num = proto_accessor.GetNumArgs();
632 if (params_num > MAX_ARG_NUM) {
633 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << proto_id << " (0x" << std::hex << proto_id
634 << "). number of function's arguments (" << std::dec << params_num
635 << ") exceeds MAX_ARG_NUM (" << MAX_ARG_NUM << ") !";
636
637 return;
638 }
639
640 size_t ref_idx = 0;
641 method->return_type = PFTypeToPandasmType(proto_accessor.GetReturnType(), proto_accessor, ref_idx);
642
643 for (uint8_t i = 0; i < params_num; i++) {
644 auto arg_type = PFTypeToPandasmType(proto_accessor.GetArgType(i), proto_accessor, ref_idx);
645 method->params.push_back(pandasm::Function::Parameter(arg_type, file_language_));
646 }
647 }
648
GetExceptions(pandasm::Function * method,panda_file::File::EntityId method_id,panda_file::File::EntityId code_id) const649 LabelTable Disassembler::GetExceptions(pandasm::Function *method, panda_file::File::EntityId method_id,
650 panda_file::File::EntityId code_id) const
651 {
652 LOG(DEBUG, DISASSEMBLER) << "[getting exceptions]\ncode id: " << code_id << " (0x" << std::hex << code_id << ")";
653
654 if (method == nullptr) {
655 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!\n";
656 return LabelTable {};
657 }
658
659 panda_file::CodeDataAccessor code_accessor(*file_, code_id);
660
661 const auto bc_ins = BytecodeInstruction(code_accessor.GetInstructions());
662 const auto bc_ins_last = bc_ins.JumpTo(code_accessor.GetCodeSize());
663
664 size_t try_idx = 0;
665 LabelTable label_table {};
666 code_accessor.EnumerateTryBlocks([&](panda_file::CodeDataAccessor::TryBlock &try_block) {
667 pandasm::Function::CatchBlock catch_block_pa {};
668 if (!LocateTryBlock(bc_ins, bc_ins_last, try_block, &catch_block_pa, &label_table, try_idx)) {
669 return false;
670 }
671 size_t catch_idx = 0;
672 try_block.EnumerateCatchBlocks([&](panda_file::CodeDataAccessor::CatchBlock &catch_block) {
673 auto class_idx = catch_block.GetTypeIdx();
674 if (class_idx == panda_file::INVALID_INDEX) {
675 catch_block_pa.exception_record = "";
676 } else {
677 const auto class_id = file_->ResolveClassIndex(method_id, class_idx);
678 catch_block_pa.exception_record = GetFullRecordName(class_id);
679 }
680 if (!LocateCatchBlock(bc_ins, bc_ins_last, catch_block, &catch_block_pa, &label_table, try_idx,
681 catch_idx)) {
682 return false;
683 }
684
685 method->catch_blocks.push_back(catch_block_pa);
686 catch_block_pa.catch_begin_label = "";
687 catch_block_pa.catch_end_label = "";
688 catch_idx++;
689
690 return true;
691 });
692 try_idx++;
693
694 return true;
695 });
696
697 return label_table;
698 }
699
getBytecodeInstructionNumber(BytecodeInstruction bc_ins_first,BytecodeInstruction bc_ins_cur)700 static size_t getBytecodeInstructionNumber(BytecodeInstruction bc_ins_first, BytecodeInstruction bc_ins_cur)
701 {
702 size_t count = 0;
703
704 while (bc_ins_first.GetAddress() != bc_ins_cur.GetAddress()) {
705 count++;
706 bc_ins_first = bc_ins_first.GetNext();
707 if (bc_ins_first.GetAddress() > bc_ins_cur.GetAddress()) {
708 return std::numeric_limits<size_t>::max();
709 }
710 }
711
712 return count;
713 }
714
LocateTryBlock(const BytecodeInstruction & bc_ins,const BytecodeInstruction & bc_ins_last,const panda_file::CodeDataAccessor::TryBlock & try_block,pandasm::Function::CatchBlock * catch_block_pa,LabelTable * label_table,size_t try_idx) const715 bool Disassembler::LocateTryBlock(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last,
716 const panda_file::CodeDataAccessor::TryBlock &try_block,
717 pandasm::Function::CatchBlock *catch_block_pa, LabelTable *label_table,
718 size_t try_idx) const
719 {
720 const auto try_begin_bc_ins = bc_ins.JumpTo(try_block.GetStartPc());
721 const auto try_end_bc_ins = bc_ins.JumpTo(try_block.GetStartPc() + try_block.GetLength());
722
723 const size_t try_begin_idx = getBytecodeInstructionNumber(bc_ins, try_begin_bc_ins);
724 const size_t try_end_idx = getBytecodeInstructionNumber(bc_ins, try_end_bc_ins);
725
726 const bool try_begin_offset_in_range = bc_ins_last.GetAddress() > try_begin_bc_ins.GetAddress();
727 const bool try_end_offset_in_range = bc_ins_last.GetAddress() >= try_end_bc_ins.GetAddress();
728 const bool try_begin_offset_valid = try_begin_idx != std::numeric_limits<size_t>::max();
729 const bool try_end_offset_valid = try_end_idx != std::numeric_limits<size_t>::max();
730
731 if (!try_begin_offset_in_range || !try_begin_offset_valid) {
732 LOG(ERROR, DISASSEMBLER) << "> invalid try block begin offset! address is: 0x" << std::hex
733 << try_begin_bc_ins.GetAddress();
734 return false;
735 } else {
736 std::stringstream ss {};
737 ss << "try_begin_label_" << try_idx;
738
739 LabelTable::iterator it = label_table->find(try_begin_idx);
740 if (it == label_table->end()) {
741 catch_block_pa->try_begin_label = ss.str();
742 label_table->insert(std::pair<size_t, std::string>(try_begin_idx, ss.str()));
743 } else {
744 catch_block_pa->try_begin_label = it->second;
745 }
746 }
747
748 if (!try_end_offset_in_range || !try_end_offset_valid) {
749 LOG(ERROR, DISASSEMBLER) << "> invalid try block end offset! address is: 0x" << std::hex
750 << try_end_bc_ins.GetAddress();
751 return false;
752 } else {
753 std::stringstream ss {};
754 ss << "try_end_label_" << try_idx;
755
756 LabelTable::iterator it = label_table->find(try_end_idx);
757 if (it == label_table->end()) {
758 catch_block_pa->try_end_label = ss.str();
759 label_table->insert(std::pair<size_t, std::string>(try_end_idx, ss.str()));
760 } else {
761 catch_block_pa->try_end_label = it->second;
762 }
763 }
764
765 return true;
766 }
767
LocateCatchBlock(const BytecodeInstruction & bc_ins,const BytecodeInstruction & bc_ins_last,const panda_file::CodeDataAccessor::CatchBlock & catch_block,pandasm::Function::CatchBlock * catch_block_pa,LabelTable * label_table,size_t try_idx,size_t catch_idx) const768 bool Disassembler::LocateCatchBlock(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last,
769 const panda_file::CodeDataAccessor::CatchBlock &catch_block,
770 pandasm::Function::CatchBlock *catch_block_pa, LabelTable *label_table,
771 size_t try_idx, size_t catch_idx) const
772 {
773 const auto handler_begin_offset = catch_block.GetHandlerPc();
774 const auto handler_end_offset = handler_begin_offset + catch_block.GetCodeSize();
775
776 const auto handler_begin_bc_ins = bc_ins.JumpTo(handler_begin_offset);
777 const auto handler_end_bc_ins = bc_ins.JumpTo(handler_end_offset);
778
779 const size_t handler_begin_idx = getBytecodeInstructionNumber(bc_ins, handler_begin_bc_ins);
780 const size_t handler_end_idx = getBytecodeInstructionNumber(bc_ins, handler_end_bc_ins);
781
782 const bool handler_begin_offset_in_range = bc_ins_last.GetAddress() > handler_begin_bc_ins.GetAddress();
783 const bool handler_end_offset_in_range = bc_ins_last.GetAddress() >= handler_end_bc_ins.GetAddress();
784 const bool handler_end_present = catch_block.GetCodeSize() != 0;
785 const bool handler_begin_offset_valid = handler_begin_idx != std::numeric_limits<size_t>::max();
786 const bool handler_end_offset_valid = handler_end_idx != std::numeric_limits<size_t>::max();
787
788 if (!handler_begin_offset_in_range || !handler_begin_offset_valid) {
789 LOG(ERROR, DISASSEMBLER) << "> invalid catch block begin offset! address is: 0x" << std::hex
790 << handler_begin_bc_ins.GetAddress();
791 return false;
792 } else {
793 std::stringstream ss {};
794 ss << "handler_begin_label_" << try_idx << "_" << catch_idx;
795
796 LabelTable::iterator it = label_table->find(handler_begin_idx);
797 if (it == label_table->end()) {
798 catch_block_pa->catch_begin_label = ss.str();
799 label_table->insert(std::pair<size_t, std::string>(handler_begin_idx, ss.str()));
800 } else {
801 catch_block_pa->catch_begin_label = it->second;
802 }
803 }
804
805 if (!handler_end_offset_in_range || !handler_end_offset_valid) {
806 LOG(ERROR, DISASSEMBLER) << "> invalid catch block end offset! address is: 0x" << std::hex
807 << handler_end_bc_ins.GetAddress();
808 return false;
809 } else if (handler_end_present) {
810 std::stringstream ss {};
811 ss << "handler_end_label_" << try_idx << "_" << catch_idx;
812
813 LabelTable::iterator it = label_table->find(handler_end_idx);
814 if (it == label_table->end()) {
815 catch_block_pa->catch_end_label = ss.str();
816 label_table->insert(std::pair<size_t, std::string>(handler_end_idx, ss.str()));
817 } else {
818 catch_block_pa->catch_end_label = it->second;
819 }
820 }
821
822 return true;
823 }
824
GetMetaData(pandasm::Function * method,const panda_file::File::EntityId & method_id) const825 void Disassembler::GetMetaData(pandasm::Function *method, const panda_file::File::EntityId &method_id) const
826 {
827 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nmethod id: " << method_id << " (0x" << std::hex << method_id
828 << ")";
829
830 if (method == nullptr) {
831 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
832
833 return;
834 }
835
836 panda_file::MethodDataAccessor method_accessor(*file_, method_id);
837
838 const auto method_name_raw = StringDataToString(file_->GetStringData(method_accessor.GetNameId()));
839
840 if (!method_accessor.IsStatic()) {
841 const auto class_name = StringDataToString(file_->GetStringData(method_accessor.GetClassId()));
842 auto this_type = pandasm::Type::FromDescriptor(class_name);
843
844 LOG(DEBUG, DISASSEMBLER) << "method (raw: \'" << method_name_raw
845 << "\') is not static. emplacing self-argument of type " << this_type.GetName();
846
847 method->params.insert(method->params.begin(), pandasm::Function::Parameter(this_type, file_language_));
848 } else {
849 method->metadata->SetAttribute("static");
850 }
851
852 if (file_->IsExternal(method_accessor.GetMethodId())) {
853 method->metadata->SetAttribute("external");
854 }
855
856 std::string ctor_name = panda::panda_file::GetCtorName(file_language_);
857 std::string cctor_name = panda::panda_file::GetCctorName(file_language_);
858
859 const bool is_ctor = (method_name_raw == ctor_name);
860 const bool is_cctor = (method_name_raw == cctor_name);
861
862 if (is_ctor) {
863 method->metadata->SetAttribute("ctor");
864 method->name.replace(method->name.find(ctor_name), ctor_name.length(), "_ctor_");
865 } else if (is_cctor) {
866 method->metadata->SetAttribute("cctor");
867 method->name.replace(method->name.find(cctor_name), cctor_name.length(), "_cctor_");
868 }
869 }
870
GetMetaData(pandasm::Record * record,const panda_file::File::EntityId & record_id) const871 void Disassembler::GetMetaData(pandasm::Record *record, const panda_file::File::EntityId &record_id) const
872 {
873 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nrecord id: " << record_id << " (0x" << std::hex << record_id
874 << ")";
875
876 if (record == nullptr) {
877 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but record ptr expected!";
878
879 return;
880 }
881
882 if (file_->IsExternal(record_id)) {
883 record->metadata->SetAttribute("external");
884 }
885 }
886
GetMetaData(pandasm::Field * field,const panda_file::File::EntityId & field_id)887 void Disassembler::GetMetaData(pandasm::Field *field, const panda_file::File::EntityId &field_id)
888 {
889 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nfield id: " << field_id << " (0x" << std::hex << field_id << ")";
890
891 if (field == nullptr) {
892 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
893
894 return;
895 }
896
897 panda_file::FieldDataAccessor field_accessor(*file_, field_id);
898
899 if (field_accessor.IsExternal()) {
900 field->metadata->SetAttribute("external");
901 }
902
903 if (field_accessor.IsStatic()) {
904 field->metadata->SetAttribute("static");
905 }
906
907 if (field->type.GetId() == panda_file::Type::TypeId::U32) {
908 const auto offset = field_accessor.GetValue<uint32_t>().value();
909 static const std::string TYPE_SUMMARY_FIELD_NAME = "typeSummaryOffset";
910 if (field->name != TYPE_SUMMARY_FIELD_NAME) {
911 LOG(DEBUG, DISASSEMBLER) << "Module literalarray " << field->name << " at offset 0x" << std::hex << offset
912 << " is excluded";
913 module_literals_.insert(offset);
914 }
915 field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::U32>(offset));
916 }
917 if (field->type.GetId() == panda_file::Type::TypeId::U8) {
918 const auto val = field_accessor.GetValue<uint8_t>().value();
919 field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::U8>(val));
920 }
921 }
922
AnnotationTagToString(const char tag) const923 std::string Disassembler::AnnotationTagToString(const char tag) const
924 {
925 switch (tag) {
926 case '1':
927 return "u1";
928 case '2':
929 return "i8";
930 case '3':
931 return "u8";
932 case '4':
933 return "i16";
934 case '5':
935 return "u16";
936 case '6':
937 return "i32";
938 case '7':
939 return "u32";
940 case '8':
941 return "i64";
942 case '9':
943 return "u64";
944 case 'A':
945 return "f32";
946 case 'B':
947 return "f64";
948 case 'C':
949 return "string";
950 case 'D':
951 return "record";
952 case 'E':
953 return "method";
954 case 'F':
955 return "enum";
956 case 'G':
957 return "annotation";
958 case 'I':
959 return "void";
960 case 'J':
961 return "method_handle";
962 case 'K':
963 return "u1[]";
964 case 'L':
965 return "i8[]";
966 case 'M':
967 return "u8[]";
968 case 'N':
969 return "i16[]";
970 case 'O':
971 return "u16[]";
972 case 'P':
973 return "i32[]";
974 case 'Q':
975 return "u32[]";
976 case 'R':
977 return "i64[]";
978 case 'S':
979 return "u64[]";
980 case 'T':
981 return "f32[]";
982 case 'U':
983 return "f64[]";
984 case 'V':
985 return "string[]";
986 case 'W':
987 return "record[]";
988 case 'X':
989 return "method[]";
990 case 'Y':
991 return "enum[]";
992 case 'Z':
993 return "annotation[]";
994 case '@':
995 return "method_handle[]";
996 case '*':
997 return "nullptr string";
998 default:
999 return std::string();
1000 }
1001 }
1002
ScalarValueToString(const panda_file::ScalarValue & value,const std::string & type)1003 std::string Disassembler::ScalarValueToString(const panda_file::ScalarValue &value, const std::string &type)
1004 {
1005 std::stringstream ss;
1006
1007 if (type == "i8") {
1008 int8_t res = value.Get<int8_t>();
1009 ss << static_cast<int>(res);
1010 } else if (type == "u1" || type == "u8") {
1011 uint8_t res = value.Get<uint8_t>();
1012 ss << static_cast<unsigned int>(res);
1013 } else if (type == "i16") {
1014 ss << value.Get<int16_t>();
1015 } else if (type == "u16") {
1016 ss << value.Get<uint16_t>();
1017 } else if (type == "i32") {
1018 ss << value.Get<int32_t>();
1019 } else if (type == "u32") {
1020 ss << value.Get<uint32_t>();
1021 } else if (type == "i64") {
1022 ss << value.Get<int64_t>();
1023 } else if (type == "u64") {
1024 ss << value.Get<uint64_t>();
1025 } else if (type == "f32") {
1026 ss << value.Get<float>();
1027 } else if (type == "f64") {
1028 ss << value.Get<double>();
1029 } else if (type == "string") {
1030 const auto id = value.Get<panda_file::File::EntityId>();
1031 ss << "\"" << StringDataToString(file_->GetStringData(id)) << "\"";
1032 } else if (type == "record") {
1033 const auto id = value.Get<panda_file::File::EntityId>();
1034 ss << GetFullRecordName(id);
1035 } else if (type == "method") {
1036 const auto id = value.Get<panda_file::File::EntityId>();
1037 AddMethodToTables(id);
1038 ss << GetMethodSignature(id);
1039 } else if (type == "enum") {
1040 const auto id = value.Get<panda_file::File::EntityId>();
1041 panda_file::FieldDataAccessor field_accessor(*file_, id);
1042 ss << GetFullRecordName(field_accessor.GetClassId()) << "."
1043 << StringDataToString(file_->GetStringData(field_accessor.GetNameId()));
1044 } else if (type == "annotation") {
1045 const auto id = value.Get<panda_file::File::EntityId>();
1046 ss << "id_" << id;
1047 } else if (type == "void") {
1048 return std::string();
1049 } else if (type == "method_handle") {
1050 }
1051
1052 return ss.str();
1053 }
1054
ArrayValueToString(const panda_file::ArrayValue & value,const std::string & type,const size_t idx)1055 std::string Disassembler::ArrayValueToString(const panda_file::ArrayValue &value, const std::string &type,
1056 const size_t idx)
1057 {
1058 std::stringstream ss;
1059
1060 if (type == "i8") {
1061 int8_t res = value.Get<int8_t>(idx);
1062 ss << static_cast<int>(res);
1063 } else if (type == "u1" || type == "u8") {
1064 uint8_t res = value.Get<uint8_t>(idx);
1065 ss << static_cast<unsigned int>(res);
1066 } else if (type == "i16") {
1067 ss << value.Get<int16_t>(idx);
1068 } else if (type == "u16") {
1069 ss << value.Get<uint16_t>(idx);
1070 } else if (type == "i32") {
1071 ss << value.Get<int32_t>(idx);
1072 } else if (type == "u32") {
1073 ss << value.Get<uint32_t>(idx);
1074 } else if (type == "i64") {
1075 ss << value.Get<int64_t>(idx);
1076 } else if (type == "u64") {
1077 ss << value.Get<uint64_t>(idx);
1078 } else if (type == "f32") {
1079 ss << value.Get<float>(idx);
1080 } else if (type == "f64") {
1081 ss << value.Get<double>(idx);
1082 } else if (type == "string") {
1083 const auto id = value.Get<panda_file::File::EntityId>(idx);
1084 ss << '\"' << StringDataToString(file_->GetStringData(id)) << '\"';
1085 } else if (type == "record") {
1086 const auto id = value.Get<panda_file::File::EntityId>(idx);
1087 ss << GetFullRecordName(id);
1088 } else if (type == "method") {
1089 const auto id = value.Get<panda_file::File::EntityId>(idx);
1090 AddMethodToTables(id);
1091 ss << GetMethodSignature(id);
1092 } else if (type == "enum") {
1093 const auto id = value.Get<panda_file::File::EntityId>(idx);
1094 panda_file::FieldDataAccessor field_accessor(*file_, id);
1095 ss << GetFullRecordName(field_accessor.GetClassId()) << "."
1096 << StringDataToString(file_->GetStringData(field_accessor.GetNameId()));
1097 } else if (type == "annotation") {
1098 const auto id = value.Get<panda_file::File::EntityId>(idx);
1099 ss << "id_" << id;
1100 } else if (type == "method_handle") {
1101 } else if (type == "nullptr string") {
1102 }
1103
1104 return ss.str();
1105 }
1106
GetFullMethodName(const panda_file::File::EntityId & method_id) const1107 std::string Disassembler::GetFullMethodName(const panda_file::File::EntityId &method_id) const
1108 {
1109 panda::panda_file::MethodDataAccessor method_accessor(*file_, method_id);
1110
1111 const auto method_name_raw = StringDataToString(file_->GetStringData(method_accessor.GetNameId()));
1112
1113 std::string class_name = GetFullRecordName(method_accessor.GetClassId());
1114 if (IsSystemType(class_name)) {
1115 class_name = "";
1116 } else {
1117 class_name += ".";
1118 }
1119
1120 return class_name + method_name_raw;
1121 }
1122
GetMethodSignature(const panda_file::File::EntityId & method_id) const1123 std::string Disassembler::GetMethodSignature(const panda_file::File::EntityId &method_id) const
1124 {
1125 panda::panda_file::MethodDataAccessor method_accessor(*file_, method_id);
1126
1127 pandasm::Function method(GetFullMethodName(method_id), file_language_);
1128 GetParams(&method, method_accessor.GetProtoId());
1129 GetMetaData(&method, method_id);
1130
1131 return pandasm::GetFunctionSignatureFromName(method.name, method.params);
1132 }
1133
GetFullRecordName(const panda_file::File::EntityId & class_id) const1134 std::string Disassembler::GetFullRecordName(const panda_file::File::EntityId &class_id) const
1135 {
1136 std::string name = StringDataToString(file_->GetStringData(class_id));
1137
1138 auto type = pandasm::Type::FromDescriptor(name);
1139 type = pandasm::Type(type.GetComponentName(), type.GetRank());
1140
1141 return type.GetPandasmName();
1142 }
1143
GetRecordInfo(const panda_file::File::EntityId & record_id,RecordInfo * record_info) const1144 void Disassembler::GetRecordInfo(const panda_file::File::EntityId &record_id, RecordInfo *record_info) const
1145 {
1146 constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
1147
1148 if (file_->IsExternal(record_id)) {
1149 return;
1150 }
1151
1152 panda_file::ClassDataAccessor class_accessor {*file_, record_id};
1153 std::stringstream ss;
1154
1155 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1156 << class_accessor.GetClassId() << ", size: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH)
1157 << class_accessor.GetSize() << " (" << std::dec << class_accessor.GetSize() << ")";
1158
1159 record_info->record_info = ss.str();
1160 ss.str(std::string());
1161
1162 class_accessor.EnumerateFields([&](panda_file::FieldDataAccessor &field_accessor) -> void {
1163 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1164 << field_accessor.GetFieldId();
1165
1166 record_info->fields_info.push_back(ss.str());
1167
1168 ss.str(std::string());
1169 });
1170 }
1171
GetMethodInfo(const panda_file::File::EntityId & method_id,MethodInfo * method_info) const1172 void Disassembler::GetMethodInfo(const panda_file::File::EntityId &method_id, MethodInfo *method_info) const
1173 {
1174 constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
1175
1176 panda_file::MethodDataAccessor method_accessor {*file_, method_id};
1177 std::stringstream ss;
1178
1179 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1180 << method_accessor.GetMethodId();
1181
1182 if (method_accessor.GetCodeId().has_value()) {
1183 ss << ", code offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1184 << method_accessor.GetCodeId().value();
1185
1186 GetInsInfo(method_accessor.GetCodeId().value(), method_info);
1187 } else {
1188 ss << ", <no code>";
1189 }
1190
1191 method_info->method_info = ss.str();
1192
1193 if (method_accessor.GetCodeId()) {
1194 ASSERT(debug_info_extractor_ != nullptr);
1195 method_info->line_number_table = debug_info_extractor_->GetLineNumberTable(method_id);
1196 method_info->column_number_table = debug_info_extractor_->GetColumnNumberTable(method_id);
1197 method_info->local_variable_table = debug_info_extractor_->GetLocalVariableTable(method_id);
1198
1199 // Add information about parameters into the table
1200 panda_file::CodeDataAccessor codeda(*file_, method_accessor.GetCodeId().value());
1201 auto arg_idx = static_cast<int32_t>(codeda.GetNumVregs());
1202 uint32_t code_size = codeda.GetCodeSize();
1203 for (auto info : debug_info_extractor_->GetParameterInfo(method_id)) {
1204 panda_file::LocalVariableInfo arg_info {info.name, info.signature, "", arg_idx++, 0, code_size};
1205 method_info->local_variable_table.emplace_back(arg_info);
1206 }
1207 }
1208 }
1209
IsArray(const panda_file::LiteralTag & tag)1210 static bool IsArray(const panda_file::LiteralTag &tag)
1211 {
1212 switch (tag) {
1213 case panda_file::LiteralTag::ARRAY_U1:
1214 case panda_file::LiteralTag::ARRAY_U8:
1215 case panda_file::LiteralTag::ARRAY_I8:
1216 case panda_file::LiteralTag::ARRAY_U16:
1217 case panda_file::LiteralTag::ARRAY_I16:
1218 case panda_file::LiteralTag::ARRAY_U32:
1219 case panda_file::LiteralTag::ARRAY_I32:
1220 case panda_file::LiteralTag::ARRAY_U64:
1221 case panda_file::LiteralTag::ARRAY_I64:
1222 case panda_file::LiteralTag::ARRAY_F32:
1223 case panda_file::LiteralTag::ARRAY_F64:
1224 case panda_file::LiteralTag::ARRAY_STRING:
1225 return true;
1226 default:
1227 return false;
1228 }
1229 }
1230
SerializeLiteralArray(const pandasm::LiteralArray & lit_array) const1231 std::string Disassembler::SerializeLiteralArray(const pandasm::LiteralArray &lit_array) const
1232 {
1233 std::stringstream ret;
1234 if (lit_array.literals_.empty()) {
1235 return "";
1236 }
1237
1238 std::stringstream ss;
1239 ss << "{ ";
1240 const auto &tag = lit_array.literals_[0].tag_;
1241 if (IsArray(tag)) {
1242 ss << LiteralTagToString(tag);
1243 }
1244 ss << lit_array.literals_.size();
1245 ss << " [ ";
1246 SerializeValues(lit_array, ss);
1247 ss << "]}";
1248 return ss.str();
1249 }
1250
Serialize(const std::string & key,const pandasm::LiteralArray & lit_array,std::ostream & os) const1251 void Disassembler::Serialize(const std::string &key, const pandasm::LiteralArray &lit_array, std::ostream &os) const
1252 {
1253 os << key << " ";
1254 os << SerializeLiteralArray(lit_array);
1255 os << "\n";
1256 }
1257
Serialize(const std::string & module_offset,const std::vector<std::string> & module_array,std::ostream & os) const1258 void Disassembler::Serialize(const std::string &module_offset, const std::vector<std::string> &module_array,
1259 std::ostream &os) const
1260 {
1261 os << module_offset << " ";
1262 os << SerializeModuleLiteralArray(module_array);
1263 os << "\n";
1264 }
1265
SerializeModuleLiteralArray(const std::vector<std::string> & module_array) const1266 std::string Disassembler::SerializeModuleLiteralArray(const std::vector<std::string> &module_array) const
1267 {
1268 if (module_array.empty()) {
1269 return "";
1270 }
1271
1272 std::stringstream ss;
1273 ss << "{ ";
1274 ss << module_array.size();
1275 ss << " [ ";
1276 for (size_t index = 0; index < module_array.size(); index++) {
1277 ss << module_array[index] << "; ";
1278 }
1279 ss << "]}";
1280 return ss.str();
1281 }
1282
LiteralTagToString(const panda_file::LiteralTag & tag) const1283 std::string Disassembler::LiteralTagToString(const panda_file::LiteralTag &tag) const
1284 {
1285 switch (tag) {
1286 case panda_file::LiteralTag::BOOL:
1287 case panda_file::LiteralTag::ARRAY_U1:
1288 return "u1";
1289 case panda_file::LiteralTag::ARRAY_U8:
1290 return "u8";
1291 case panda_file::LiteralTag::ARRAY_I8:
1292 return "i8";
1293 case panda_file::LiteralTag::ARRAY_U16:
1294 return "u16";
1295 case panda_file::LiteralTag::ARRAY_I16:
1296 return "i16";
1297 case panda_file::LiteralTag::ARRAY_U32:
1298 return "u32";
1299 case panda_file::LiteralTag::INTEGER:
1300 case panda_file::LiteralTag::ARRAY_I32:
1301 return "i32";
1302 case panda_file::LiteralTag::ARRAY_U64:
1303 return "u64";
1304 case panda_file::LiteralTag::ARRAY_I64:
1305 return "i64";
1306 case panda_file::LiteralTag::ARRAY_F32:
1307 return "f32";
1308 case panda_file::LiteralTag::DOUBLE:
1309 case panda_file::LiteralTag::ARRAY_F64:
1310 return "f64";
1311 case panda_file::LiteralTag::STRING:
1312 case panda_file::LiteralTag::ARRAY_STRING:
1313 return "string";
1314 case panda_file::LiteralTag::METHOD:
1315 return "method";
1316 case panda_file::LiteralTag::GETTER:
1317 return "getter";
1318 case panda_file::LiteralTag::SETTER:
1319 return "setter";
1320 case panda_file::LiteralTag::GENERATORMETHOD:
1321 return "generator_method";
1322 case panda_file::LiteralTag::ACCESSOR:
1323 return "accessor";
1324 case panda_file::LiteralTag::METHODAFFILIATE:
1325 return "method_affiliate";
1326 case panda_file::LiteralTag::NULLVALUE:
1327 return "null_value";
1328 case panda_file::LiteralTag::TAGVALUE:
1329 return "tagvalue";
1330 case panda_file::LiteralTag::LITERALBUFFERINDEX:
1331 return "lit_index";
1332 case panda_file::LiteralTag::LITERALARRAY:
1333 return "lit_offset";
1334 case panda_file::LiteralTag::BUILTINTYPEINDEX:
1335 return "builtin_type";
1336 default:
1337 UNREACHABLE();
1338 }
1339 }
1340
1341 template <typename T>
SerializeValues(const pandasm::LiteralArray & lit_array,T & os) const1342 void Disassembler::SerializeValues(const pandasm::LiteralArray &lit_array, T &os) const
1343 {
1344 switch (lit_array.literals_[0].tag_) {
1345 case panda_file::LiteralTag::ARRAY_U1: {
1346 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1347 os << std::get<bool>(lit_array.literals_[i].value_) << " ";
1348 }
1349 break;
1350 }
1351 case panda_file::LiteralTag::ARRAY_U8: {
1352 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1353 os << static_cast<uint16_t>(std::get<uint8_t>(lit_array.literals_[i].value_)) << " ";
1354 }
1355 break;
1356 }
1357 case panda_file::LiteralTag::ARRAY_I8: {
1358 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1359 os << static_cast<int16_t>(bit_cast<int8_t>(std::get<uint8_t>(lit_array.literals_[i].value_))) << " ";
1360 }
1361 break;
1362 }
1363 case panda_file::LiteralTag::ARRAY_U16: {
1364 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1365 os << std::get<uint16_t>(lit_array.literals_[i].value_) << " ";
1366 }
1367 break;
1368 }
1369 case panda_file::LiteralTag::ARRAY_I16: {
1370 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1371 os << bit_cast<int16_t>(std::get<uint16_t>(lit_array.literals_[i].value_)) << " ";
1372 }
1373 break;
1374 }
1375 case panda_file::LiteralTag::ARRAY_U32: {
1376 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1377 os << std::get<uint32_t>(lit_array.literals_[i].value_) << " ";
1378 }
1379 break;
1380 }
1381 case panda_file::LiteralTag::ARRAY_I32: {
1382 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1383 os << bit_cast<int32_t>(std::get<uint32_t>(lit_array.literals_[i].value_)) << " ";
1384 }
1385 break;
1386 }
1387 case panda_file::LiteralTag::ARRAY_U64: {
1388 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1389 os << std::get<uint64_t>(lit_array.literals_[i].value_) << " ";
1390 }
1391 break;
1392 }
1393 case panda_file::LiteralTag::ARRAY_I64: {
1394 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1395 os << bit_cast<int64_t>(std::get<uint64_t>(lit_array.literals_[i].value_)) << " ";
1396 }
1397 break;
1398 }
1399 case panda_file::LiteralTag::ARRAY_F32: {
1400 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1401 os << std::get<float>(lit_array.literals_[i].value_) << " ";
1402 }
1403 break;
1404 }
1405 case panda_file::LiteralTag::ARRAY_F64: {
1406 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1407 os << std::get<double>(lit_array.literals_[i].value_) << " ";
1408 }
1409 break;
1410 }
1411 case panda_file::LiteralTag::ARRAY_STRING: {
1412 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1413 os << "\"" << std::get<std::string>(lit_array.literals_[i].value_) << "\" ";
1414 }
1415 break;
1416 }
1417 default:
1418 SerializeLiterals(lit_array, os);
1419 }
1420 }
1421
1422 template <typename T>
SerializeLiterals(const pandasm::LiteralArray & lit_array,T & os) const1423 void Disassembler::SerializeLiterals(const pandasm::LiteralArray &lit_array, T &os) const
1424 {
1425 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1426 const auto &tag = lit_array.literals_[i].tag_;
1427 os << LiteralTagToString(tag) << ":";
1428 const auto &val = lit_array.literals_[i].value_;
1429 switch (lit_array.literals_[i].tag_) {
1430 case panda_file::LiteralTag::BOOL: {
1431 os << std::get<bool>(val);
1432 break;
1433 }
1434 case panda_file::LiteralTag::LITERALBUFFERINDEX:
1435 case panda_file::LiteralTag::INTEGER: {
1436 os << bit_cast<int32_t>(std::get<uint32_t>(val));
1437 break;
1438 }
1439 case panda_file::LiteralTag::DOUBLE: {
1440 os << std::get<double>(val);
1441 break;
1442 }
1443 case panda_file::LiteralTag::STRING: {
1444 os << "\"" << std::get<std::string>(val) << "\"";
1445 break;
1446 }
1447 case panda_file::LiteralTag::METHOD:
1448 case panda_file::LiteralTag::GETTER:
1449 case panda_file::LiteralTag::SETTER:
1450 case panda_file::LiteralTag::GENERATORMETHOD: {
1451 os << std::get<std::string>(val);
1452 break;
1453 }
1454 case panda_file::LiteralTag::NULLVALUE:
1455 case panda_file::LiteralTag::ACCESSOR: {
1456 os << static_cast<int16_t>(bit_cast<int8_t>(std::get<uint8_t>(val)));
1457 break;
1458 }
1459 case panda_file::LiteralTag::METHODAFFILIATE: {
1460 os << std::get<uint16_t>(val);
1461 break;
1462 }
1463 case panda_file::LiteralTag::LITERALARRAY: {
1464 os << std::get<std::string>(val);
1465 break;
1466 }
1467 case panda_file::LiteralTag::BUILTINTYPEINDEX: {
1468 os << static_cast<int16_t>(std::get<uint8_t>(val));
1469 break;
1470 }
1471 default:
1472 UNREACHABLE();
1473 }
1474 os << ", ";
1475 }
1476 }
1477
Serialize(const pandasm::Record & record,std::ostream & os,bool print_information) const1478 void Disassembler::Serialize(const pandasm::Record &record, std::ostream &os, bool print_information) const
1479 {
1480 if (IsSystemType(record.name)) {
1481 return;
1482 }
1483
1484 os << ".record " << record.name;
1485
1486 const auto record_iter = prog_ann_.record_annotations.find(record.name);
1487 const bool record_in_table = record_iter != prog_ann_.record_annotations.end();
1488 if (record_in_table) {
1489 Serialize(*record.metadata, record_iter->second.ann_list, os);
1490 } else {
1491 Serialize(*record.metadata, {}, os);
1492 }
1493
1494 if (record.metadata->IsForeign()) {
1495 os << "\n\n";
1496 return;
1497 }
1498
1499 os << " {";
1500
1501 if (print_information && prog_info_.records_info.find(record.name) != prog_info_.records_info.end()) {
1502 os << " # " << prog_info_.records_info.at(record.name).record_info << "\n";
1503 SerializeFields(record, os, true);
1504 } else {
1505 os << "\n";
1506 SerializeFields(record, os, false);
1507 }
1508
1509 os << "}\n\n";
1510 }
1511
SerializeFields(const pandasm::Record & record,std::ostream & os,bool print_information) const1512 void Disassembler::SerializeFields(const pandasm::Record &record, std::ostream &os, bool print_information) const
1513 {
1514 constexpr size_t INFO_OFFSET = 80;
1515
1516 const auto record_iter = prog_ann_.record_annotations.find(record.name);
1517 const bool record_in_table = record_iter != prog_ann_.record_annotations.end();
1518
1519 const auto rec_inf = (print_information) ? (prog_info_.records_info.at(record.name)) : (RecordInfo {});
1520
1521 size_t field_idx = 0;
1522
1523 std::stringstream ss;
1524 for (const auto &f : record.field_list) {
1525 std::string file = GetFileNameByAbsolutePath(f.name);
1526 ss << "\t" << f.type.GetPandasmName() << " " << file;
1527 if (f.metadata->GetValue().has_value()) {
1528 if (f.type.GetId() == panda_file::Type::TypeId::U32) {
1529 ss << " = 0x" << std::hex << f.metadata->GetValue().value().GetValue<uint32_t>();
1530 }
1531 if (f.type.GetId() == panda_file::Type::TypeId::U8) {
1532 ss << " = 0x" << std::hex << static_cast<uint32_t>(f.metadata->GetValue().value().GetValue<uint8_t>());
1533 }
1534 }
1535 if (record_in_table) {
1536 const auto field_iter = record_iter->second.field_annotations.find(f.name);
1537 if (field_iter != record_iter->second.field_annotations.end()) {
1538 Serialize(*f.metadata, field_iter->second, ss);
1539 } else {
1540 Serialize(*f.metadata, {}, ss);
1541 }
1542 } else {
1543 Serialize(*f.metadata, {}, ss);
1544 }
1545
1546 if (print_information) {
1547 os << std::setw(INFO_OFFSET) << std::left << ss.str() << " # " << rec_inf.fields_info.at(field_idx) << "\n";
1548 } else {
1549 os << ss.str() << "\n";
1550 }
1551
1552 ss.str(std::string());
1553 ss.clear();
1554
1555 field_idx++;
1556 }
1557 }
1558
SerializeMethodAnnotations(const pandasm::Function & method,std::ostream & os) const1559 void Disassembler::SerializeMethodAnnotations(const pandasm::Function &method, std::ostream &os) const
1560 {
1561 const auto annotations = method.metadata->GetAnnotations();
1562 if (annotations.empty()) {
1563 return;
1564 }
1565
1566 for (const auto &ann : annotations) {
1567 os << ann.GetName() << ":\n";
1568 std::stringstream ss;
1569 std::vector<pandasm::AnnotationElement> elements = ann.GetElements();
1570 if (elements.empty()) {
1571 continue;
1572 }
1573 uint32_t idx = elements.size() - 1;
1574 ss << "\t" << "u32" << " " << elements.back().GetName() << " { ";
1575 for (const auto &elem : elements) {
1576 ss << "0x" << std::hex << elem.GetValue()->GetAsScalar()->GetValue<uint32_t>();
1577 if (idx > 0) {
1578 ss << ", ";
1579 }
1580 --idx;
1581 }
1582 ss << " }";
1583 os << ss.str() << "\n";
1584 }
1585 }
1586
Serialize(const pandasm::Function & method,std::ostream & os,bool print_information) const1587 void Disassembler::Serialize(const pandasm::Function &method, std::ostream &os, bool print_information) const
1588 {
1589 SerializeMethodAnnotations(method, os);
1590 os << ".function " << method.return_type.GetPandasmName() << " " << method.name << "(";
1591
1592 if (method.params.size() > 0) {
1593 os << method.params[0].type.GetPandasmName() << " a0";
1594
1595 for (uint8_t i = 1; i < method.params.size(); i++) {
1596 os << ", " << method.params[i].type.GetPandasmName() << " a" << (size_t)i;
1597 }
1598 }
1599 os << ")";
1600
1601 const std::string signature = pandasm::GetFunctionSignatureFromName(method.name, method.params);
1602
1603 const auto method_iter = prog_ann_.method_annotations.find(signature);
1604 if (method_iter != prog_ann_.method_annotations.end()) {
1605 Serialize(*method.metadata, method_iter->second, os);
1606 } else {
1607 Serialize(*method.metadata, {}, os);
1608 }
1609
1610 auto method_info_it = prog_info_.methods_info.find(signature);
1611 bool print_method_info = print_information && method_info_it != prog_info_.methods_info.end();
1612 if (print_method_info) {
1613 const MethodInfo &method_info = method_info_it->second;
1614
1615 size_t width = 0;
1616 for (const auto &i : method.ins) {
1617 if (i.ToString().size() > width) {
1618 width = i.ToString().size();
1619 }
1620 }
1621
1622 os << " { # " << method_info.method_info << "\n# CODE:\n";
1623
1624 for (size_t i = 0; i < method.ins.size(); i++) {
1625 os << "\t" << std::setw(width) << std::left << method.ins.at(i).ToString("", true, method.regs_num) << " # "
1626 << method_info.instructions_info.at(i) << "\n";
1627 }
1628 } else {
1629 os << " {\n";
1630
1631 for (const auto &i : method.ins) {
1632 if (i.set_label) {
1633 std::string ins = i.ToString("", true, method.regs_num);
1634 std::string delim = ": ";
1635 size_t pos = ins.find(delim);
1636 std::string label = ins.substr(0, pos);
1637 ins.erase(0, pos + delim.length());
1638 os << label << ":\n\t" << ins << "\n";
1639 } else {
1640 os << "\t" << i.ToString("", true, method.regs_num) << "\n";
1641 }
1642 }
1643 }
1644
1645 if (method.catch_blocks.size() != 0) {
1646 os << "\n";
1647
1648 for (const auto &catch_block : method.catch_blocks) {
1649 Serialize(catch_block, os);
1650
1651 os << "\n";
1652 }
1653 }
1654
1655 if (print_method_info) {
1656 const MethodInfo &method_info = method_info_it->second;
1657 SerializeLineNumberTable(method_info.line_number_table, os);
1658 SerializeColumnNumberTable(method_info.column_number_table, os);
1659 SerializeLocalVariableTable(method_info.local_variable_table, method, os);
1660 }
1661
1662 os << "}\n\n";
1663 }
1664
SerializeStrings(const panda_file::File::EntityId & offset,const std::string & name_value,std::ostream & os) const1665 void Disassembler::SerializeStrings(const panda_file::File::EntityId &offset, const std::string &name_value,
1666 std::ostream &os) const
1667 {
1668 os << "[offset:0x" << std::hex <<offset<< ", name_value:" << name_value<< "]" <<std::endl;
1669 }
1670
Serialize(const pandasm::Function::CatchBlock & catch_block,std::ostream & os) const1671 void Disassembler::Serialize(const pandasm::Function::CatchBlock &catch_block, std::ostream &os) const
1672 {
1673 if (catch_block.exception_record == "") {
1674 os << ".catchall ";
1675 } else {
1676 os << ".catch " << catch_block.exception_record << ", ";
1677 }
1678
1679 os << catch_block.try_begin_label << ", " << catch_block.try_end_label << ", " << catch_block.catch_begin_label;
1680
1681 if (catch_block.catch_end_label != "") {
1682 os << ", " << catch_block.catch_end_label;
1683 }
1684 }
1685
Serialize(const pandasm::ItemMetadata & meta,const AnnotationList & ann_list,std::ostream & os) const1686 void Disassembler::Serialize(const pandasm::ItemMetadata &meta, const AnnotationList &ann_list, std::ostream &os) const
1687 {
1688 auto bool_attributes = meta.GetBoolAttributes();
1689 auto attributes = meta.GetAttributes();
1690 if (bool_attributes.empty() && attributes.empty() && ann_list.empty()) {
1691 return;
1692 }
1693
1694 os << " <";
1695
1696 size_t size = bool_attributes.size();
1697 size_t idx = 0;
1698 for (const auto &attr : bool_attributes) {
1699 os << attr;
1700 ++idx;
1701
1702 if (!attributes.empty() || !ann_list.empty() || idx < size) {
1703 os << ", ";
1704 }
1705 }
1706
1707 size = attributes.size();
1708 idx = 0;
1709 for (const auto &[key, values] : attributes) {
1710 for (size_t i = 0; i < values.size(); i++) {
1711 os << key << "=" << values[i];
1712
1713 if (i < values.size() - 1) {
1714 os << ", ";
1715 }
1716 }
1717
1718 ++idx;
1719
1720 if (!ann_list.empty() || idx < size) {
1721 os << ", ";
1722 }
1723 }
1724
1725 size = ann_list.size();
1726 idx = 0;
1727 for (const auto &[key, value] : ann_list) {
1728 os << key << "=" << value;
1729
1730 ++idx;
1731
1732 if (idx < size) {
1733 os << ", ";
1734 }
1735 }
1736
1737 os << ">";
1738 }
1739
SerializeLineNumberTable(const panda_file::LineNumberTable & line_number_table,std::ostream & os) const1740 void Disassembler::SerializeLineNumberTable(const panda_file::LineNumberTable &line_number_table,
1741 std::ostream &os) const
1742 {
1743 if (line_number_table.empty()) {
1744 return;
1745 }
1746
1747 os << "\n# LINE_NUMBER_TABLE:\n";
1748 for (const auto &line_info : line_number_table) {
1749 os << "#\tline " << line_info.line << ": " << line_info.offset << "\n";
1750 }
1751 }
1752
SerializeColumnNumberTable(const panda_file::ColumnNumberTable & column_number_table,std::ostream & os) const1753 void Disassembler::SerializeColumnNumberTable(const panda_file::ColumnNumberTable &column_number_table,
1754 std::ostream &os) const
1755 {
1756 if (column_number_table.empty()) {
1757 return;
1758 }
1759
1760 os << "\n# COLUMN_NUMBER_TABLE:\n";
1761 for (const auto &column_info : column_number_table) {
1762 os << "#\tcolumn " << column_info.column << ": " << column_info.offset << "\n";
1763 }
1764 }
1765
SerializeLocalVariableTable(const panda_file::LocalVariableTable & local_variable_table,const pandasm::Function & method,std::ostream & os) const1766 void Disassembler::SerializeLocalVariableTable(const panda_file::LocalVariableTable &local_variable_table,
1767 const pandasm::Function &method, std::ostream &os) const
1768 {
1769 if (local_variable_table.empty()) {
1770 return;
1771 }
1772
1773 os << "\n# LOCAL_VARIABLE_TABLE:\n";
1774 os << "#\t Start End Register Name Signature\n";
1775 const int START_WIDTH = 5;
1776 const int END_WIDTH = 4;
1777 const int REG_WIDTH = 8;
1778 const int NAME_WIDTH = 14;
1779 for (const auto &variable_info : local_variable_table) {
1780 std::ostringstream reg_stream;
1781 reg_stream << variable_info.reg_number << '(';
1782 if (variable_info.reg_number < 0) {
1783 reg_stream << "acc";
1784 } else {
1785 uint32_t vreg = variable_info.reg_number;
1786 uint32_t first_arg_reg = method.GetTotalRegs();
1787 if (vreg < first_arg_reg) {
1788 reg_stream << 'v' << vreg;
1789 } else {
1790 reg_stream << 'a' << vreg - first_arg_reg;
1791 }
1792 }
1793 reg_stream << ')';
1794
1795 os << "#\t " << std::setw(START_WIDTH) << std::right << variable_info.start_offset << " ";
1796 os << std::setw(END_WIDTH) << std::right << variable_info.end_offset << " ";
1797 os << std::setw(REG_WIDTH) << std::right << reg_stream.str() << " ";
1798 os << std::setw(NAME_WIDTH) << std::right << variable_info.name << " " << variable_info.type;
1799 if (!variable_info.type_signature.empty() && variable_info.type_signature != variable_info.type) {
1800 os << " (" << variable_info.type_signature << ")";
1801 }
1802 os << "\n";
1803 }
1804 }
1805
BytecodeOpcodeToPandasmOpcode(uint8_t o) const1806 pandasm::Opcode Disassembler::BytecodeOpcodeToPandasmOpcode(uint8_t o) const
1807 {
1808 return BytecodeOpcodeToPandasmOpcode(BytecodeInstruction::Opcode(o));
1809 }
1810
IDToString(BytecodeInstruction bc_ins,panda_file::File::EntityId method_id,size_t idx) const1811 std::string Disassembler::IDToString(BytecodeInstruction bc_ins, panda_file::File::EntityId method_id,
1812 size_t idx) const
1813 {
1814 std::stringstream name;
1815 const auto offset = file_->ResolveOffsetByIndex(method_id, bc_ins.GetId(idx).AsIndex());
1816 std::string str_data = StringDataToString(file_->GetStringData(offset));
1817
1818 if (bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::METHOD_ID)) {
1819 name << GetMethodSignature(offset);
1820 } else if (bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::STRING_ID)) {
1821 name << '\"';
1822 name << str_data;
1823 name << '\"';
1824 string_offset_to_name_.emplace(offset, str_data);
1825 } else {
1826 ASSERT(bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::LITERALARRAY_ID));
1827 pandasm::LiteralArray lit_array;
1828 GetLiteralArrayByOffset(&lit_array, panda_file::File::EntityId(offset));
1829 name << SerializeLiteralArray(lit_array);
1830 }
1831
1832 return name.str();
1833 }
1834
GetRecordLanguage(panda_file::File::EntityId class_id) const1835 panda::panda_file::SourceLang Disassembler::GetRecordLanguage(panda_file::File::EntityId class_id) const
1836 {
1837 if (file_->IsExternal(class_id)) {
1838 return panda::panda_file::SourceLang::PANDA_ASSEMBLY;
1839 }
1840
1841 panda_file::ClassDataAccessor cda(*file_, class_id);
1842 return cda.GetSourceLang().value_or(panda_file::SourceLang::PANDA_ASSEMBLY);
1843 }
1844
translateImmToLabel(pandasm::Ins * pa_ins,LabelTable * label_table,const uint8_t * ins_arr,BytecodeInstruction bc_ins,BytecodeInstruction bc_ins_last,panda_file::File::EntityId code_id)1845 static void translateImmToLabel(pandasm::Ins *pa_ins, LabelTable *label_table, const uint8_t *ins_arr,
1846 BytecodeInstruction bc_ins, BytecodeInstruction bc_ins_last,
1847 panda_file::File::EntityId code_id)
1848 {
1849 const int32_t jmp_offset = std::get<int64_t>(pa_ins->imms.at(0));
1850 const auto bc_ins_dest = bc_ins.JumpTo(jmp_offset);
1851 if (bc_ins_last.GetAddress() > bc_ins_dest.GetAddress()) {
1852 size_t idx = getBytecodeInstructionNumber(BytecodeInstruction(ins_arr), bc_ins_dest);
1853 if (idx != std::numeric_limits<size_t>::max()) {
1854 if (label_table->find(idx) == label_table->end()) {
1855 std::stringstream ss {};
1856 ss << "jump_label_" << label_table->size();
1857 (*label_table)[idx] = ss.str();
1858 }
1859
1860 pa_ins->imms.clear();
1861 pa_ins->ids.push_back(label_table->at(idx));
1862 } else {
1863 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
1864 << "). incorrect instruction at offset: 0x" << (bc_ins.GetAddress() - ins_arr)
1865 << ": invalid jump offset 0x" << jmp_offset
1866 << " - jumping in the middle of another instruction!";
1867 }
1868 } else {
1869 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
1870 << "). incorrect instruction at offset: 0x" << (bc_ins.GetAddress() - ins_arr)
1871 << ": invalid jump offset 0x" << jmp_offset << " - jumping out of bounds!";
1872 }
1873 }
1874
GetInstructions(pandasm::Function * method,panda_file::File::EntityId method_id,panda_file::File::EntityId code_id) const1875 IdList Disassembler::GetInstructions(pandasm::Function *method, panda_file::File::EntityId method_id,
1876 panda_file::File::EntityId code_id) const
1877 {
1878 panda_file::CodeDataAccessor code_accessor(*file_, code_id);
1879
1880 const auto ins_sz = code_accessor.GetCodeSize();
1881 const auto ins_arr = code_accessor.GetInstructions();
1882
1883 method->regs_num = code_accessor.GetNumVregs();
1884
1885 auto bc_ins = BytecodeInstruction(ins_arr);
1886 const auto bc_ins_last = bc_ins.JumpTo(ins_sz);
1887
1888 LabelTable label_table = GetExceptions(method, method_id, code_id);
1889
1890 IdList unknown_external_methods {};
1891
1892 while (bc_ins.GetAddress() != bc_ins_last.GetAddress()) {
1893 if (bc_ins.GetAddress() > bc_ins_last.GetAddress()) {
1894 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
1895 << "). bytecode instructions sequence corrupted for method " << method->name
1896 << "! went out of bounds";
1897
1898 break;
1899 }
1900
1901 auto pa_ins = BytecodeInstructionToPandasmInstruction(bc_ins, method_id);
1902 if (pa_ins.IsJump()) {
1903 translateImmToLabel(&pa_ins, &label_table, ins_arr, bc_ins, bc_ins_last, code_id);
1904 }
1905
1906 // check if method id is unknown external method. if so, emplace it in table
1907 if (bc_ins.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) {
1908 const auto arg_method_idx = bc_ins.GetId().AsIndex();
1909 const auto arg_method_id = file_->ResolveMethodIndex(method_id, arg_method_idx);
1910
1911 const auto arg_method_signature = GetMethodSignature(arg_method_id);
1912
1913 const bool is_present = prog_.function_table.find(arg_method_signature) != prog_.function_table.cend();
1914 const bool is_external = file_->IsExternal(arg_method_id);
1915 if (is_external && !is_present) {
1916 unknown_external_methods.push_back(arg_method_id);
1917 }
1918 }
1919
1920 method->ins.push_back(pa_ins);
1921 bc_ins = bc_ins.GetNext();
1922 }
1923
1924 for (const auto &pair : label_table) {
1925 method->ins[pair.first].label = pair.second;
1926 method->ins[pair.first].set_label = true;
1927 }
1928
1929 return unknown_external_methods;
1930 }
1931
GetColumnNumber()1932 std::vector<size_t> Disassembler::GetColumnNumber()
1933 {
1934 std::vector<size_t> columnNumber;
1935 for (const auto &method_info : prog_info_.methods_info) {
1936 for (const auto &column_number : method_info.second.column_number_table) {
1937 columnNumber.push_back(column_number.column);
1938 }
1939 }
1940 return columnNumber;
1941 }
1942
GetLineNumber()1943 std::vector<size_t> Disassembler::GetLineNumber()
1944 {
1945 std::vector<size_t> lineNumber;
1946 for (const auto &method_info : prog_info_.methods_info) {
1947 for (const auto &line_number : method_info.second.line_number_table) {
1948 lineNumber.push_back(line_number.line);
1949 }
1950 }
1951 return lineNumber;
1952 }
1953
1954 } // namespace panda::disasm
1955