1 /*
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "disassembler.h"
17 #include "mangling.h"
18 #include "utils/logger.h"
19
20 #include <iomanip>
21
22 #include "get_language_specific_metadata.inc"
23
24 namespace panda::disasm {
25
Disassemble(const std::string & filename_in,const bool quiet,const bool skip_strings)26 void Disassembler::Disassemble(const std::string &filename_in, const bool quiet, const bool skip_strings)
27 {
28 auto file_new = panda_file::File::Open(filename_in);
29 file_.swap(file_new);
30
31 if (file_ != nullptr) {
32 prog_ = pandasm::Program {};
33
34 record_name_to_id_.clear();
35 method_name_to_id_.clear();
36
37 skip_strings_ = skip_strings;
38 quiet_ = quiet;
39
40 prog_info_ = ProgInfo {};
41
42 prog_ann_ = ProgAnnotations {};
43
44 GetRecords();
45 GetLiteralArrays();
46
47 GetLanguageSpecificMetadata();
48 } else {
49 LOG(ERROR, DISASSEMBLER) << "> unable to open specified pandafile: <" << filename_in << ">";
50 }
51 }
52
CollectInfo()53 void Disassembler::CollectInfo()
54 {
55 LOG(DEBUG, DISASSEMBLER) << "\n[getting program info]\n";
56
57 debug_info_extractor_ = std::make_unique<panda_file::DebugInfoExtractor>(file_.get());
58
59 for (const auto &pair : record_name_to_id_) {
60 GetRecordInfo(pair.second, &prog_info_.records_info[pair.first]);
61 }
62
63 for (const auto &pair : method_name_to_id_) {
64 GetMethodInfo(pair.second, &prog_info_.methods_info[pair.first]);
65 }
66 }
67
Serialize(std::ostream & os,bool add_separators,bool print_information) const68 void Disassembler::Serialize(std::ostream &os, bool add_separators, bool print_information) const
69 {
70 if (os.bad()) {
71 LOG(DEBUG, DISASSEMBLER) << "> serialization failed. os bad\n";
72
73 return;
74 }
75
76 if (file_ != nullptr) {
77 os << "# source binary: " << file_->GetFilename() << "\n\n";
78 }
79
80 SerializeLanguage(os);
81
82 if (add_separators) {
83 os << "# ====================\n"
84 "# LITERALS\n\n";
85 }
86
87 LOG(DEBUG, DISASSEMBLER) << "[serializing literals]";
88
89 for (const auto &[key, lit_arr] : prog_.literalarray_table) {
90 Serialize(key, lit_arr, os);
91 }
92
93 os << "\n";
94
95 if (add_separators) {
96 os << "# ====================\n"
97 "# RECORDS\n\n";
98 }
99
100 LOG(DEBUG, DISASSEMBLER) << "[serializing records]";
101
102 for (const auto &r : prog_.record_table) {
103 Serialize(r.second, os, print_information);
104 }
105
106 if (add_separators) {
107 os << "# ====================\n"
108 "# METHODS\n\n";
109 }
110
111 LOG(DEBUG, DISASSEMBLER) << "[serializing methods]";
112
113 for (const auto &m : prog_.function_table) {
114 Serialize(m.second, os, print_information);
115 }
116 }
117
IsSystemType(const std::string & type_name)118 inline bool Disassembler::IsSystemType(const std::string &type_name)
119 {
120 bool is_array_type = type_name.find('[') != std::string::npos;
121 bool is_global = type_name == "_GLOBAL";
122
123 return is_array_type || is_global;
124 }
125
GetRecord(pandasm::Record * record,const panda_file::File::EntityId & record_id)126 void Disassembler::GetRecord(pandasm::Record *record, const panda_file::File::EntityId &record_id)
127 {
128 LOG(DEBUG, DISASSEMBLER) << "\n[getting record]\nid: " << record_id << " (0x" << std::hex << record_id << ")";
129
130 if (record == nullptr) {
131 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but record ptr expected!";
132
133 return;
134 }
135
136 record->name = GetFullRecordName(record_id);
137
138 LOG(DEBUG, DISASSEMBLER) << "name: " << record->name;
139
140 GetMetaData(record, record_id);
141
142 if (!file_->IsExternal(record_id)) {
143 GetMethods(record_id);
144 GetFields(record, record_id);
145 }
146 }
147
AddMethodToTables(const panda_file::File::EntityId & method_id)148 void Disassembler::AddMethodToTables(const panda_file::File::EntityId &method_id)
149 {
150 pandasm::Function new_method("", file_language_);
151 GetMethod(&new_method, method_id);
152
153 const auto signature = pandasm::GetFunctionSignatureFromName(new_method.name, new_method.params);
154 if (prog_.function_table.find(signature) != prog_.function_table.end()) {
155 return;
156 }
157
158 method_name_to_id_.emplace(signature, method_id);
159 prog_.function_synonyms[new_method.name].push_back(signature);
160 prog_.function_table.emplace(signature, std::move(new_method));
161 }
162
GetMethod(pandasm::Function * method,const panda_file::File::EntityId & method_id)163 void Disassembler::GetMethod(pandasm::Function *method, const panda_file::File::EntityId &method_id)
164 {
165 LOG(DEBUG, DISASSEMBLER) << "\n[getting method]\nid: " << method_id << " (0x" << std::hex << method_id << ")";
166
167 if (method == nullptr) {
168 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
169
170 return;
171 }
172
173 panda_file::MethodDataAccessor method_accessor(*file_, method_id);
174
175 method->name = GetFullMethodName(method_id);
176
177 LOG(DEBUG, DISASSEMBLER) << "name: " << method->name;
178
179 GetParams(method, method_accessor.GetProtoId());
180 GetMetaData(method, method_id);
181
182 if (method_accessor.GetCodeId().has_value()) {
183 const IdList id_list = GetInstructions(method, method_id, method_accessor.GetCodeId().value());
184
185 for (const auto &id : id_list) {
186 AddMethodToTables(id);
187 }
188 } else {
189 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << method_id << " (0x" << std::hex << method_id
190 << "). implementation of method expected, but no \'CODE\' tag was found!";
191
192 return;
193 }
194 }
195
196 template <typename T>
FillLiteralArrayData(pandasm::LiteralArray * lit_array,const panda_file::LiteralTag & tag,const panda_file::LiteralDataAccessor::LiteralValue & value) const197 void Disassembler::FillLiteralArrayData(pandasm::LiteralArray *lit_array, const panda_file::LiteralTag &tag,
198 const panda_file::LiteralDataAccessor::LiteralValue &value) const
199 {
200 panda_file::File::EntityId id(std::get<uint32_t>(value));
201 auto sp = file_->GetSpanFromId(id);
202 auto len = panda_file::helpers::Read<sizeof(uint32_t)>(&sp);
203 if (tag != panda_file::LiteralTag::ARRAY_STRING) {
204 for (size_t i = 0; i < len; i++) {
205 pandasm::LiteralArray::Literal lit;
206 lit.tag_ = tag;
207 lit.value_ = bit_cast<T>(panda_file::helpers::Read<sizeof(T)>(&sp));
208 lit_array->literals_.push_back(lit);
209 }
210 return;
211 }
212 for (size_t i = 0; i < len; i++) {
213 auto str_id = panda_file::helpers::Read<sizeof(T)>(&sp);
214 pandasm::LiteralArray::Literal lit;
215 lit.tag_ = tag;
216 lit.value_ = StringDataToString(file_->GetStringData(panda_file::File::EntityId(str_id)));
217 lit_array->literals_.push_back(lit);
218 }
219 }
220
FillLiteralData(pandasm::LiteralArray * lit_array,const panda_file::LiteralDataAccessor::LiteralValue & value,const panda_file::LiteralTag & tag) const221 void Disassembler::FillLiteralData(pandasm::LiteralArray *lit_array,
222 const panda_file::LiteralDataAccessor::LiteralValue &value,
223 const panda_file::LiteralTag &tag) const
224 {
225 pandasm::LiteralArray::Literal lit;
226 lit.tag_ = tag;
227 switch (tag) {
228 case panda_file::LiteralTag::BOOL: {
229 lit.value_ = std::get<bool>(value);
230 break;
231 }
232 case panda_file::LiteralTag::ACCESSOR:
233 case panda_file::LiteralTag::NULLVALUE:
234 case panda_file::LiteralTag::BUILTINTYPEINDEX: {
235 lit.value_ = std::get<uint8_t>(value);
236 break;
237 }
238 case panda_file::LiteralTag::METHODAFFILIATE: {
239 lit.value_ = std::get<uint16_t>(value);
240 break;
241 }
242 case panda_file::LiteralTag::LITERALBUFFERINDEX:
243 case panda_file::LiteralTag::INTEGER: {
244 lit.value_ = std::get<uint32_t>(value);
245 break;
246 }
247 case panda_file::LiteralTag::DOUBLE: {
248 lit.value_ = std::get<double>(value);
249 break;
250 }
251 case panda_file::LiteralTag::STRING: {
252 auto str_data = file_->GetStringData(panda_file::File::EntityId(std::get<uint32_t>(value)));
253 lit.value_ = StringDataToString(str_data);
254 break;
255 }
256 case panda_file::LiteralTag::METHOD:
257 case panda_file::LiteralTag::GENERATORMETHOD: {
258 panda_file::MethodDataAccessor mda(*file_, panda_file::File::EntityId(std::get<uint32_t>(value)));
259 lit.value_ = StringDataToString(file_->GetStringData(mda.GetNameId()));
260 break;
261 }
262 case panda_file::LiteralTag::LITERALARRAY: {
263 std::stringstream ss;
264 ss << "0x" << std::hex << std::get<uint32_t>(value);
265 lit.value_ = ss.str();
266 break;
267 }
268 case panda_file::LiteralTag::TAGVALUE: {
269 return;
270 }
271 default: {
272 UNREACHABLE();
273 }
274 }
275 lit_array->literals_.push_back(lit);
276 }
277
GetLiteralArrayByOffset(pandasm::LiteralArray * lit_array,panda_file::File::EntityId offset) const278 void Disassembler::GetLiteralArrayByOffset(pandasm::LiteralArray *lit_array, panda_file::File::EntityId offset) const
279 {
280 panda_file::LiteralDataAccessor lit_array_accessor(*file_, file_->GetLiteralArraysId());
281 lit_array_accessor.EnumerateLiteralVals(
282 offset, [this, lit_array](const panda_file::LiteralDataAccessor::LiteralValue &value,
283 const panda_file::LiteralTag &tag) {
284 switch (tag) {
285 case panda_file::LiteralTag::ARRAY_U1: {
286 FillLiteralArrayData<bool>(lit_array, tag, value);
287 break;
288 }
289 case panda_file::LiteralTag::ARRAY_I8:
290 case panda_file::LiteralTag::ARRAY_U8: {
291 FillLiteralArrayData<uint8_t>(lit_array, tag, value);
292 break;
293 }
294 case panda_file::LiteralTag::ARRAY_I16:
295 case panda_file::LiteralTag::ARRAY_U16: {
296 FillLiteralArrayData<uint16_t>(lit_array, tag, value);
297 break;
298 }
299 case panda_file::LiteralTag::ARRAY_I32:
300 case panda_file::LiteralTag::ARRAY_U32: {
301 FillLiteralArrayData<uint32_t>(lit_array, tag, value);
302 break;
303 }
304 case panda_file::LiteralTag::ARRAY_I64:
305 case panda_file::LiteralTag::ARRAY_U64: {
306 FillLiteralArrayData<uint64_t>(lit_array, tag, value);
307 break;
308 }
309 case panda_file::LiteralTag::ARRAY_F32: {
310 FillLiteralArrayData<float>(lit_array, tag, value);
311 break;
312 }
313 case panda_file::LiteralTag::ARRAY_F64: {
314 FillLiteralArrayData<double>(lit_array, tag, value);
315 break;
316 }
317 case panda_file::LiteralTag::ARRAY_STRING: {
318 FillLiteralArrayData<uint32_t>(lit_array, tag, value);
319 break;
320 }
321 default: {
322 FillLiteralData(lit_array, value, tag);
323 break;
324 }
325 }
326 });
327 }
328
GetLiteralArray(pandasm::LiteralArray * lit_array,size_t index) const329 void Disassembler::GetLiteralArray(pandasm::LiteralArray *lit_array, size_t index) const
330 {
331 panda_file::LiteralDataAccessor lit_array_accessor(*file_, file_->GetLiteralArraysId());
332 GetLiteralArrayByOffset(lit_array, lit_array_accessor.GetLiteralArrayId(index));
333 }
334
IsModuleLiteralOffset(const panda_file::File::EntityId & id) const335 bool Disassembler::IsModuleLiteralOffset(const panda_file::File::EntityId &id) const
336 {
337 return module_literals_.find(id.GetOffset()) != module_literals_.end();
338 }
339
GetLiteralArrays()340 void Disassembler::GetLiteralArrays()
341 {
342 const auto lit_arrays_id = file_->GetLiteralArraysId();
343
344 LOG(DEBUG, DISASSEMBLER) << "\n[getting literal arrays]\nid: " << lit_arrays_id << " (0x" << std::hex
345 << lit_arrays_id << ")";
346
347 panda_file::LiteralDataAccessor lda(*file_, lit_arrays_id);
348 size_t num_litarrays = lda.GetLiteralNum();
349 for (size_t index = 0; index < num_litarrays; index++) {
350 auto id = lda.GetLiteralArrayId(index);
351 if (IsModuleLiteralOffset(id)) {
352 continue; // exclude module literals as they do not obey encoding rules of normal literals
353 }
354 std::stringstream ss;
355 ss << index << " 0x" << std::hex << id.GetOffset();
356 panda::pandasm::LiteralArray lit_arr;
357 GetLiteralArray(&lit_arr, index);
358 prog_.literalarray_table.emplace(ss.str(), lit_arr);
359 }
360 }
361
GetRecords()362 void Disassembler::GetRecords()
363 {
364 LOG(DEBUG, DISASSEMBLER) << "\n[getting records]\n";
365
366 const auto class_idx = file_->GetClasses();
367
368 for (size_t i = 0; i < class_idx.size(); i++) {
369 uint32_t class_id = class_idx[i];
370 auto class_off = file_->GetHeader()->class_idx_off + sizeof(uint32_t) * i;
371
372 if (class_id > file_->GetHeader()->file_size) {
373 LOG(ERROR, DISASSEMBLER) << "> error encountered in record at " << class_off << " (0x" << std::hex
374 << class_off << "). binary file corrupted. record offset (0x" << class_id
375 << ") out of bounds (0x" << file_->GetHeader()->file_size << ")!";
376 break;
377 }
378
379 const panda_file::File::EntityId record_id {class_id};
380 auto language = GetRecordLanguage(record_id);
381
382 if (language != file_language_) {
383 if (file_language_ == panda_file::SourceLang::PANDA_ASSEMBLY) {
384 file_language_ = language;
385 } else if (language != panda_file::SourceLang::PANDA_ASSEMBLY) {
386 LOG(ERROR, DISASSEMBLER) << "> possible error encountered in record at" << class_off << " (0x"
387 << std::hex << class_off << "). record's language ("
388 << panda_file::LanguageToString(language)
389 << ") differs from file's language ("
390 << panda_file::LanguageToString(file_language_) << ")!";
391 }
392 }
393
394 pandasm::Record record("", file_language_);
395 GetRecord(&record, record_id);
396
397 if (prog_.record_table.find(record.name) == prog_.record_table.end()) {
398 record_name_to_id_.emplace(record.name, record_id);
399 prog_.record_table.emplace(record.name, std::move(record));
400 }
401 }
402 }
403
GetFields(pandasm::Record * record,const panda_file::File::EntityId & record_id)404 void Disassembler::GetFields(pandasm::Record *record, const panda_file::File::EntityId &record_id)
405 {
406 panda_file::ClassDataAccessor class_accessor {*file_, record_id};
407
408 class_accessor.EnumerateFields([&](panda_file::FieldDataAccessor &field_accessor) -> void {
409 pandasm::Field field(file_language_);
410
411 panda_file::File::EntityId field_name_id = field_accessor.GetNameId();
412 field.name = StringDataToString(file_->GetStringData(field_name_id));
413
414 uint32_t field_type = field_accessor.GetType();
415 field.type = FieldTypeToPandasmType(field_type);
416
417 GetMetaData(&field, field_accessor.GetFieldId());
418
419 record->field_list.push_back(std::move(field));
420 });
421 }
422
GetMethods(const panda_file::File::EntityId & record_id)423 void Disassembler::GetMethods(const panda_file::File::EntityId &record_id)
424 {
425 panda_file::ClassDataAccessor class_accessor {*file_, record_id};
426
427 class_accessor.EnumerateMethods([&](panda_file::MethodDataAccessor &method_accessor) -> void {
428 AddMethodToTables(method_accessor.GetMethodId());
429 });
430 }
431
GetParams(pandasm::Function * method,const panda_file::File::EntityId & proto_id) const432 void Disassembler::GetParams(pandasm::Function *method, const panda_file::File::EntityId &proto_id) const
433 {
434 /**
435 * frame size - 2^16 - 1
436 */
437 static const uint32_t MAX_ARG_NUM = 0xFFFF;
438
439 LOG(DEBUG, DISASSEMBLER) << "[getting params]\nproto id: " << proto_id << " (0x" << std::hex << proto_id << ")";
440
441 if (method == nullptr) {
442 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
443
444 return;
445 }
446
447 panda_file::ProtoDataAccessor proto_accessor(*file_, proto_id);
448
449 auto params_num = proto_accessor.GetNumArgs();
450
451 if (params_num > MAX_ARG_NUM) {
452 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << proto_id << " (0x" << std::hex << proto_id
453 << "). number of function's arguments (" << std::dec << params_num
454 << ") exceeds MAX_ARG_NUM (" << MAX_ARG_NUM << ") !";
455
456 return;
457 }
458
459 size_t ref_idx = 0;
460 method->return_type = PFTypeToPandasmType(proto_accessor.GetReturnType(), proto_accessor, ref_idx);
461
462 for (uint8_t i = 0; i < params_num; i++) {
463 auto arg_type = PFTypeToPandasmType(proto_accessor.GetArgType(i), proto_accessor, ref_idx);
464 method->params.push_back(pandasm::Function::Parameter(arg_type, file_language_));
465 }
466 }
467
GetExceptions(pandasm::Function * method,panda_file::File::EntityId method_id,panda_file::File::EntityId code_id) const468 LabelTable Disassembler::GetExceptions(pandasm::Function *method, panda_file::File::EntityId method_id,
469 panda_file::File::EntityId code_id) const
470 {
471 LOG(DEBUG, DISASSEMBLER) << "[getting exceptions]\ncode id: " << code_id << " (0x" << std::hex << code_id << ")";
472
473 if (method == nullptr) {
474 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!\n";
475 return LabelTable {};
476 }
477
478 panda_file::CodeDataAccessor code_accessor(*file_, code_id);
479
480 const auto bc_ins = BytecodeInstruction(code_accessor.GetInstructions());
481 const auto bc_ins_last = bc_ins.JumpTo(code_accessor.GetCodeSize());
482
483 size_t try_idx = 0;
484 LabelTable label_table {};
485 code_accessor.EnumerateTryBlocks([&](panda_file::CodeDataAccessor::TryBlock &try_block) {
486 pandasm::Function::CatchBlock catch_block_pa {};
487 if (!LocateTryBlock(bc_ins, bc_ins_last, try_block, &catch_block_pa, &label_table, try_idx)) {
488 return false;
489 }
490 size_t catch_idx = 0;
491 try_block.EnumerateCatchBlocks([&](panda_file::CodeDataAccessor::CatchBlock &catch_block) {
492 auto class_idx = catch_block.GetTypeIdx();
493
494 if (class_idx == panda_file::INVALID_INDEX) {
495 catch_block_pa.exception_record = "";
496 } else {
497 const auto class_id = file_->ResolveClassIndex(method_id, class_idx);
498 catch_block_pa.exception_record = GetFullRecordName(class_id);
499 }
500 if (!LocateCatchBlock(bc_ins, bc_ins_last, catch_block, &catch_block_pa, &label_table, try_idx,
501 catch_idx)) {
502 return false;
503 }
504
505 method->catch_blocks.push_back(catch_block_pa);
506 catch_block_pa.catch_begin_label = "";
507 catch_block_pa.catch_end_label = "";
508 catch_idx++;
509
510 return true;
511 });
512 try_idx++;
513
514 return true;
515 });
516
517 return label_table;
518 }
519
getBytecodeInstructionNumber(BytecodeInstruction bc_ins_first,BytecodeInstruction bc_ins_cur)520 static size_t getBytecodeInstructionNumber(BytecodeInstruction bc_ins_first, BytecodeInstruction bc_ins_cur)
521 {
522 size_t count = 0;
523
524 while (bc_ins_first.GetAddress() != bc_ins_cur.GetAddress()) {
525 count++;
526 bc_ins_first = bc_ins_first.GetNext();
527 if (bc_ins_first.GetAddress() > bc_ins_cur.GetAddress()) {
528 return std::numeric_limits<size_t>::max();
529 }
530 }
531
532 return count;
533 }
534
LocateTryBlock(const BytecodeInstruction & bc_ins,const BytecodeInstruction & bc_ins_last,const panda_file::CodeDataAccessor::TryBlock & try_block,pandasm::Function::CatchBlock * catch_block_pa,LabelTable * label_table,size_t try_idx) const535 bool Disassembler::LocateTryBlock(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last,
536 const panda_file::CodeDataAccessor::TryBlock &try_block,
537 pandasm::Function::CatchBlock *catch_block_pa, LabelTable *label_table,
538 size_t try_idx) const
539 {
540 const auto try_begin_bc_ins = bc_ins.JumpTo(try_block.GetStartPc());
541 const auto try_end_bc_ins = bc_ins.JumpTo(try_block.GetStartPc() + try_block.GetLength());
542
543 const size_t try_begin_idx = getBytecodeInstructionNumber(bc_ins, try_begin_bc_ins);
544 const size_t try_end_idx = getBytecodeInstructionNumber(bc_ins, try_end_bc_ins);
545
546 const bool try_begin_offset_in_range = bc_ins_last.GetAddress() > try_begin_bc_ins.GetAddress();
547 const bool try_end_offset_in_range = bc_ins_last.GetAddress() >= try_end_bc_ins.GetAddress();
548 const bool try_begin_offset_valid = try_begin_idx != std::numeric_limits<size_t>::max();
549 const bool try_end_offset_valid = try_end_idx != std::numeric_limits<size_t>::max();
550
551 if (!try_begin_offset_in_range || !try_begin_offset_valid) {
552 LOG(ERROR, DISASSEMBLER) << "> invalid try block begin offset! address is: 0x" << std::hex
553 << try_begin_bc_ins.GetAddress();
554 return false;
555 } else {
556 std::stringstream ss {};
557 ss << "try_begin_label_" << try_idx;
558
559 LabelTable::iterator it = label_table->find(try_begin_idx);
560 if (it == label_table->end()) {
561 catch_block_pa->try_begin_label = ss.str();
562 label_table->insert(std::pair<size_t, std::string>(try_begin_idx, ss.str()));
563 } else {
564 catch_block_pa->try_begin_label = it->second;
565 }
566 }
567
568 if (!try_end_offset_in_range || !try_end_offset_valid) {
569 LOG(ERROR, DISASSEMBLER) << "> invalid try block end offset! address is: 0x" << std::hex
570 << try_end_bc_ins.GetAddress();
571 return false;
572 } else {
573 std::stringstream ss {};
574 ss << "try_end_label_" << try_idx;
575
576 LabelTable::iterator it = label_table->find(try_end_idx);
577 if (it == label_table->end()) {
578 catch_block_pa->try_end_label = ss.str();
579 label_table->insert(std::pair<size_t, std::string>(try_end_idx, ss.str()));
580 } else {
581 catch_block_pa->try_end_label = it->second;
582 }
583 }
584
585 return true;
586 }
587
LocateCatchBlock(const BytecodeInstruction & bc_ins,const BytecodeInstruction & bc_ins_last,const panda_file::CodeDataAccessor::CatchBlock & catch_block,pandasm::Function::CatchBlock * catch_block_pa,LabelTable * label_table,size_t try_idx,size_t catch_idx) const588 bool Disassembler::LocateCatchBlock(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last,
589 const panda_file::CodeDataAccessor::CatchBlock &catch_block,
590 pandasm::Function::CatchBlock *catch_block_pa, LabelTable *label_table,
591 size_t try_idx, size_t catch_idx) const
592 {
593 const auto handler_begin_offset = catch_block.GetHandlerPc();
594 const auto handler_end_offset = handler_begin_offset + catch_block.GetCodeSize();
595
596 const auto handler_begin_bc_ins = bc_ins.JumpTo(handler_begin_offset);
597 const auto handler_end_bc_ins = bc_ins.JumpTo(handler_end_offset);
598
599 const size_t handler_begin_idx = getBytecodeInstructionNumber(bc_ins, handler_begin_bc_ins);
600 const size_t handler_end_idx = getBytecodeInstructionNumber(bc_ins, handler_end_bc_ins);
601
602 const bool handler_begin_offset_in_range = bc_ins_last.GetAddress() > handler_begin_bc_ins.GetAddress();
603 const bool handler_end_offset_in_range = bc_ins_last.GetAddress() > handler_end_bc_ins.GetAddress();
604 const bool handler_end_present = catch_block.GetCodeSize() != 0;
605 const bool handler_begin_offset_valid = handler_begin_idx != std::numeric_limits<size_t>::max();
606 const bool handler_end_offset_valid = handler_end_idx != std::numeric_limits<size_t>::max();
607
608 if (!handler_begin_offset_in_range || !handler_begin_offset_valid) {
609 LOG(ERROR, DISASSEMBLER) << "> invalid catch block begin offset! address is: 0x" << std::hex
610 << handler_begin_bc_ins.GetAddress();
611 return false;
612 } else {
613 std::stringstream ss {};
614 ss << "handler_begin_label_" << try_idx << "_" << catch_idx;
615
616 LabelTable::iterator it = label_table->find(handler_begin_idx);
617 if (it == label_table->end()) {
618 catch_block_pa->catch_begin_label = ss.str();
619 label_table->insert(std::pair<size_t, std::string>(handler_begin_idx, ss.str()));
620 } else {
621 catch_block_pa->catch_begin_label = it->second;
622 }
623 }
624
625 if (!handler_end_offset_in_range || !handler_end_offset_valid) {
626 LOG(ERROR, DISASSEMBLER) << "> invalid catch block end offset! address is: 0x" << std::hex
627 << handler_end_bc_ins.GetAddress();
628 return false;
629 } else if (handler_end_present) {
630 std::stringstream ss {};
631 ss << "handler_end_label_" << try_idx << "_" << catch_idx;
632
633 LabelTable::iterator it = label_table->find(handler_end_idx);
634 if (it == label_table->end()) {
635 catch_block_pa->catch_end_label = ss.str();
636 label_table->insert(std::pair<size_t, std::string>(handler_end_idx, ss.str()));
637 } else {
638 catch_block_pa->catch_end_label = it->second;
639 }
640 }
641
642 return true;
643 }
644
GetMetaData(pandasm::Function * method,const panda_file::File::EntityId & method_id) const645 void Disassembler::GetMetaData(pandasm::Function *method, const panda_file::File::EntityId &method_id) const
646 {
647 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nmethod id: " << method_id << " (0x" << std::hex << method_id
648 << ")";
649
650 if (method == nullptr) {
651 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
652
653 return;
654 }
655
656 panda_file::MethodDataAccessor method_accessor(*file_, method_id);
657
658 const auto method_name_raw = StringDataToString(file_->GetStringData(method_accessor.GetNameId()));
659
660 if (!method_accessor.IsStatic()) {
661 const auto class_name = StringDataToString(file_->GetStringData(method_accessor.GetClassId()));
662 auto this_type = pandasm::Type::FromDescriptor(class_name);
663
664 LOG(DEBUG, DISASSEMBLER) << "method (raw: \'" << method_name_raw
665 << "\') is not static. emplacing self-argument of type " << this_type.GetName();
666
667 method->params.insert(method->params.begin(), pandasm::Function::Parameter(this_type, file_language_));
668 } else {
669 method->metadata->SetAttribute("static");
670 }
671
672 if (file_->IsExternal(method_accessor.GetMethodId())) {
673 method->metadata->SetAttribute("external");
674 }
675
676 std::string ctor_name = panda::panda_file::GetCtorName(file_language_);
677 std::string cctor_name = panda::panda_file::GetCctorName(file_language_);
678
679 const bool is_ctor = (method_name_raw == ctor_name);
680 const bool is_cctor = (method_name_raw == cctor_name);
681
682 if (is_ctor) {
683 method->metadata->SetAttribute("ctor");
684 method->name.replace(method->name.find(ctor_name), ctor_name.length(), "_ctor_");
685 } else if (is_cctor) {
686 method->metadata->SetAttribute("cctor");
687 method->name.replace(method->name.find(cctor_name), cctor_name.length(), "_cctor_");
688 }
689 }
690
GetMetaData(pandasm::Record * record,const panda_file::File::EntityId & record_id) const691 void Disassembler::GetMetaData(pandasm::Record *record, const panda_file::File::EntityId &record_id) const
692 {
693 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nrecord id: " << record_id << " (0x" << std::hex << record_id
694 << ")";
695
696 if (record == nullptr) {
697 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but record ptr expected!";
698
699 return;
700 }
701
702 if (file_->IsExternal(record_id)) {
703 record->metadata->SetAttribute("external");
704 }
705 }
706
GetMetaData(pandasm::Field * field,const panda_file::File::EntityId & field_id)707 void Disassembler::GetMetaData(pandasm::Field *field, const panda_file::File::EntityId &field_id)
708 {
709 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nfield id: " << field_id << " (0x" << std::hex << field_id << ")";
710
711 if (field == nullptr) {
712 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
713
714 return;
715 }
716
717 panda_file::FieldDataAccessor field_accessor(*file_, field_id);
718
719 if (field_accessor.IsExternal()) {
720 field->metadata->SetAttribute("external");
721 }
722
723 if (field_accessor.IsStatic()) {
724 field->metadata->SetAttribute("static");
725 }
726
727 if (field->type.GetId() == panda_file::Type::TypeId::U32) {
728 const auto offset = field_accessor.GetValue<uint32_t>().value();
729 static const std::string TYPE_SUMMARY_FIELD_NAME = "typeSummaryOffset";
730 if (field->name != TYPE_SUMMARY_FIELD_NAME) {
731 LOG(DEBUG, DISASSEMBLER) << "Module literalarray " << field->name << " at offset 0x" << std::hex << offset
732 << " is excluded";
733 module_literals_.insert(offset);
734 }
735 field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::U32>(offset));
736 }
737 if (field->type.GetId() == panda_file::Type::TypeId::U8) {
738 const auto val = field_accessor.GetValue<uint8_t>().value();
739 field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::U8>(val));
740 }
741 }
742
AnnotationTagToString(const char tag) const743 std::string Disassembler::AnnotationTagToString(const char tag) const
744 {
745 switch (tag) {
746 case '1':
747 return "u1";
748 case '2':
749 return "i8";
750 case '3':
751 return "u8";
752 case '4':
753 return "i16";
754 case '5':
755 return "u16";
756 case '6':
757 return "i32";
758 case '7':
759 return "u32";
760 case '8':
761 return "i64";
762 case '9':
763 return "u64";
764 case 'A':
765 return "f32";
766 case 'B':
767 return "f64";
768 case 'C':
769 return "string";
770 case 'D':
771 return "record";
772 case 'E':
773 return "method";
774 case 'F':
775 return "enum";
776 case 'G':
777 return "annotation";
778 case 'I':
779 return "void";
780 case 'J':
781 return "method_handle";
782 case 'K':
783 return "u1[]";
784 case 'L':
785 return "i8[]";
786 case 'M':
787 return "u8[]";
788 case 'N':
789 return "i16[]";
790 case 'O':
791 return "u16[]";
792 case 'P':
793 return "i32[]";
794 case 'Q':
795 return "u32[]";
796 case 'R':
797 return "i64[]";
798 case 'S':
799 return "u64[]";
800 case 'T':
801 return "f32[]";
802 case 'U':
803 return "f64[]";
804 case 'V':
805 return "string[]";
806 case 'W':
807 return "record[]";
808 case 'X':
809 return "method[]";
810 case 'Y':
811 return "enum[]";
812 case 'Z':
813 return "annotation[]";
814 case '@':
815 return "method_handle[]";
816 case '*':
817 return "nullptr string";
818 default:
819 return std::string();
820 }
821 }
822
ScalarValueToString(const panda_file::ScalarValue & value,const std::string & type)823 std::string Disassembler::ScalarValueToString(const panda_file::ScalarValue &value, const std::string &type)
824 {
825 std::stringstream ss;
826
827 if (type == "i8") {
828 int8_t res = value.Get<int8_t>();
829 ss << static_cast<int>(res);
830 } else if (type == "u1" || type == "u8") {
831 uint8_t res = value.Get<uint8_t>();
832 ss << static_cast<unsigned int>(res);
833 } else if (type == "i16") {
834 ss << value.Get<int16_t>();
835 } else if (type == "u16") {
836 ss << value.Get<uint16_t>();
837 } else if (type == "i32") {
838 ss << value.Get<int32_t>();
839 } else if (type == "u32") {
840 ss << value.Get<uint32_t>();
841 } else if (type == "i64") {
842 ss << value.Get<int64_t>();
843 } else if (type == "u64") {
844 ss << value.Get<uint64_t>();
845 } else if (type == "f32") {
846 ss << value.Get<float>();
847 } else if (type == "f64") {
848 ss << value.Get<double>();
849 } else if (type == "string") {
850 const auto id = value.Get<panda_file::File::EntityId>();
851 ss << "\"" << StringDataToString(file_->GetStringData(id)) << "\"";
852 } else if (type == "record") {
853 const auto id = value.Get<panda_file::File::EntityId>();
854 ss << GetFullRecordName(id);
855 } else if (type == "method") {
856 const auto id = value.Get<panda_file::File::EntityId>();
857 AddMethodToTables(id);
858 ss << GetMethodSignature(id);
859 } else if (type == "enum") {
860 const auto id = value.Get<panda_file::File::EntityId>();
861 panda_file::FieldDataAccessor field_accessor(*file_, id);
862 ss << GetFullRecordName(field_accessor.GetClassId()) << "."
863 << StringDataToString(file_->GetStringData(field_accessor.GetNameId()));
864 } else if (type == "annotation") {
865 const auto id = value.Get<panda_file::File::EntityId>();
866 ss << "id_" << id;
867 } else if (type == "void") {
868 return std::string();
869 } else if (type == "method_handle") {
870 }
871
872 return ss.str();
873 }
874
ArrayValueToString(const panda_file::ArrayValue & value,const std::string & type,const size_t idx)875 std::string Disassembler::ArrayValueToString(const panda_file::ArrayValue &value, const std::string &type,
876 const size_t idx)
877 {
878 std::stringstream ss;
879
880 if (type == "i8") {
881 int8_t res = value.Get<int8_t>(idx);
882 ss << static_cast<int>(res);
883 } else if (type == "u1" || type == "u8") {
884 uint8_t res = value.Get<uint8_t>(idx);
885 ss << static_cast<unsigned int>(res);
886 } else if (type == "i16") {
887 ss << value.Get<int16_t>(idx);
888 } else if (type == "u16") {
889 ss << value.Get<uint16_t>(idx);
890 } else if (type == "i32") {
891 ss << value.Get<int32_t>(idx);
892 } else if (type == "u32") {
893 ss << value.Get<uint32_t>(idx);
894 } else if (type == "i64") {
895 ss << value.Get<int64_t>(idx);
896 } else if (type == "u64") {
897 ss << value.Get<uint64_t>(idx);
898 } else if (type == "f32") {
899 ss << value.Get<float>(idx);
900 } else if (type == "f64") {
901 ss << value.Get<double>(idx);
902 } else if (type == "string") {
903 const auto id = value.Get<panda_file::File::EntityId>(idx);
904 ss << '\"' << StringDataToString(file_->GetStringData(id)) << '\"';
905 } else if (type == "record") {
906 const auto id = value.Get<panda_file::File::EntityId>(idx);
907 ss << GetFullRecordName(id);
908 } else if (type == "method") {
909 const auto id = value.Get<panda_file::File::EntityId>(idx);
910 AddMethodToTables(id);
911 ss << GetMethodSignature(id);
912 } else if (type == "enum") {
913 const auto id = value.Get<panda_file::File::EntityId>(idx);
914 panda_file::FieldDataAccessor field_accessor(*file_, id);
915 ss << GetFullRecordName(field_accessor.GetClassId()) << "."
916 << StringDataToString(file_->GetStringData(field_accessor.GetNameId()));
917 } else if (type == "annotation") {
918 const auto id = value.Get<panda_file::File::EntityId>(idx);
919 ss << "id_" << id;
920 } else if (type == "method_handle") {
921 } else if (type == "nullptr string") {
922 }
923
924 return ss.str();
925 }
926
GetFullMethodName(const panda_file::File::EntityId & method_id) const927 std::string Disassembler::GetFullMethodName(const panda_file::File::EntityId &method_id) const
928 {
929 panda::panda_file::MethodDataAccessor method_accessor(*file_, method_id);
930
931 const auto method_name_raw = StringDataToString(file_->GetStringData(method_accessor.GetNameId()));
932
933 std::string class_name = GetFullRecordName(method_accessor.GetClassId());
934 if (IsSystemType(class_name)) {
935 class_name = "";
936 } else {
937 class_name += ".";
938 }
939
940 return class_name + method_name_raw;
941 }
942
GetMethodSignature(const panda_file::File::EntityId & method_id) const943 std::string Disassembler::GetMethodSignature(const panda_file::File::EntityId &method_id) const
944 {
945 panda::panda_file::MethodDataAccessor method_accessor(*file_, method_id);
946
947 pandasm::Function method(GetFullMethodName(method_id), file_language_);
948 GetParams(&method, method_accessor.GetProtoId());
949 GetMetaData(&method, method_id);
950
951 return pandasm::GetFunctionSignatureFromName(method.name, method.params);
952 }
953
GetFullRecordName(const panda_file::File::EntityId & class_id) const954 std::string Disassembler::GetFullRecordName(const panda_file::File::EntityId &class_id) const
955 {
956 std::string name = StringDataToString(file_->GetStringData(class_id));
957
958 auto type = pandasm::Type::FromDescriptor(name);
959 type = pandasm::Type(type.GetComponentName(), type.GetRank());
960
961 return type.GetPandasmName();
962 }
963
GetRecordInfo(const panda_file::File::EntityId & record_id,RecordInfo * record_info) const964 void Disassembler::GetRecordInfo(const panda_file::File::EntityId &record_id, RecordInfo *record_info) const
965 {
966 constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
967
968 if (file_->IsExternal(record_id)) {
969 return;
970 }
971
972 panda_file::ClassDataAccessor class_accessor {*file_, record_id};
973 std::stringstream ss;
974
975 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
976 << class_accessor.GetClassId() << ", size: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH)
977 << class_accessor.GetSize() << " (" << std::dec << class_accessor.GetSize() << ")";
978
979 record_info->record_info = ss.str();
980 ss.str(std::string());
981
982 class_accessor.EnumerateFields([&](panda_file::FieldDataAccessor &field_accessor) -> void {
983 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
984 << field_accessor.GetFieldId();
985
986 record_info->fields_info.push_back(ss.str());
987
988 ss.str(std::string());
989 });
990 }
991
GetMethodInfo(const panda_file::File::EntityId & method_id,MethodInfo * method_info) const992 void Disassembler::GetMethodInfo(const panda_file::File::EntityId &method_id, MethodInfo *method_info) const
993 {
994 constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
995
996 panda_file::MethodDataAccessor method_accessor {*file_, method_id};
997 std::stringstream ss;
998
999 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1000 << method_accessor.GetMethodId();
1001
1002 if (method_accessor.GetCodeId().has_value()) {
1003 ss << ", code offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1004 << method_accessor.GetCodeId().value();
1005
1006 GetInsInfo(method_accessor.GetCodeId().value(), method_info);
1007 } else {
1008 ss << ", <no code>";
1009 }
1010
1011 method_info->method_info = ss.str();
1012
1013 if (method_accessor.GetCodeId()) {
1014 ASSERT(debug_info_extractor_ != nullptr);
1015 method_info->line_number_table = debug_info_extractor_->GetLineNumberTable(method_id);
1016 method_info->local_variable_table = debug_info_extractor_->GetLocalVariableTable(method_id);
1017
1018 // Add information about parameters into the table
1019 panda_file::CodeDataAccessor codeda(*file_, method_accessor.GetCodeId().value());
1020 auto arg_idx = static_cast<int32_t>(codeda.GetNumVregs());
1021 uint32_t code_size = codeda.GetCodeSize();
1022 for (auto info : debug_info_extractor_->GetParameterInfo(method_id)) {
1023 panda_file::LocalVariableInfo arg_info {info.name, info.signature, "", arg_idx++, 0, code_size};
1024 method_info->local_variable_table.emplace_back(arg_info);
1025 }
1026 }
1027 }
1028
IsArray(const panda_file::LiteralTag & tag)1029 static bool IsArray(const panda_file::LiteralTag &tag)
1030 {
1031 switch (tag) {
1032 case panda_file::LiteralTag::ARRAY_U1:
1033 case panda_file::LiteralTag::ARRAY_U8:
1034 case panda_file::LiteralTag::ARRAY_I8:
1035 case panda_file::LiteralTag::ARRAY_U16:
1036 case panda_file::LiteralTag::ARRAY_I16:
1037 case panda_file::LiteralTag::ARRAY_U32:
1038 case panda_file::LiteralTag::ARRAY_I32:
1039 case panda_file::LiteralTag::ARRAY_U64:
1040 case panda_file::LiteralTag::ARRAY_I64:
1041 case panda_file::LiteralTag::ARRAY_F32:
1042 case panda_file::LiteralTag::ARRAY_F64:
1043 case panda_file::LiteralTag::ARRAY_STRING:
1044 return true;
1045 default:
1046 return false;
1047 }
1048 }
1049
SerializeLiteralArray(const pandasm::LiteralArray & lit_array) const1050 std::string Disassembler::SerializeLiteralArray(const pandasm::LiteralArray &lit_array) const
1051 {
1052 std::stringstream ret;
1053 if (lit_array.literals_.empty()) {
1054 return "";
1055 }
1056
1057 std::stringstream ss;
1058 ss << "{ ";
1059 const auto &tag = lit_array.literals_[0].tag_;
1060 if (IsArray(tag)) {
1061 ss << LiteralTagToString(tag);
1062 }
1063 ss << lit_array.literals_.size();
1064 ss << " [ ";
1065 SerializeValues(lit_array, ss);
1066 ss << "]}";
1067 return ss.str();
1068 }
1069
Serialize(const std::string & key,const pandasm::LiteralArray & lit_array,std::ostream & os) const1070 void Disassembler::Serialize(const std::string &key, const pandasm::LiteralArray &lit_array, std::ostream &os) const
1071 {
1072 os << key << " ";
1073 os << SerializeLiteralArray(lit_array);
1074 os << "\n";
1075 }
1076
LiteralTagToString(const panda_file::LiteralTag & tag) const1077 std::string Disassembler::LiteralTagToString(const panda_file::LiteralTag &tag) const
1078 {
1079 switch (tag) {
1080 case panda_file::LiteralTag::BOOL:
1081 case panda_file::LiteralTag::ARRAY_U1:
1082 return "u1";
1083 case panda_file::LiteralTag::ARRAY_U8:
1084 return "u8";
1085 case panda_file::LiteralTag::ARRAY_I8:
1086 return "i8";
1087 case panda_file::LiteralTag::ARRAY_U16:
1088 return "u16";
1089 case panda_file::LiteralTag::ARRAY_I16:
1090 return "i16";
1091 case panda_file::LiteralTag::ARRAY_U32:
1092 return "u32";
1093 case panda_file::LiteralTag::INTEGER:
1094 case panda_file::LiteralTag::ARRAY_I32:
1095 return "i32";
1096 case panda_file::LiteralTag::ARRAY_U64:
1097 return "u64";
1098 case panda_file::LiteralTag::ARRAY_I64:
1099 return "i64";
1100 case panda_file::LiteralTag::ARRAY_F32:
1101 return "f32";
1102 case panda_file::LiteralTag::DOUBLE:
1103 case panda_file::LiteralTag::ARRAY_F64:
1104 return "f64";
1105 case panda_file::LiteralTag::STRING:
1106 case panda_file::LiteralTag::ARRAY_STRING:
1107 return "string";
1108 case panda_file::LiteralTag::METHOD:
1109 return "method";
1110 case panda_file::LiteralTag::GENERATORMETHOD:
1111 return "generator_method";
1112 case panda_file::LiteralTag::ACCESSOR:
1113 return "accessor";
1114 case panda_file::LiteralTag::METHODAFFILIATE:
1115 return "method_affiliate";
1116 case panda_file::LiteralTag::NULLVALUE:
1117 return "null_value";
1118 case panda_file::LiteralTag::TAGVALUE:
1119 return "tagvalue";
1120 case panda_file::LiteralTag::LITERALBUFFERINDEX:
1121 return "lit_index";
1122 case panda_file::LiteralTag::LITERALARRAY:
1123 return "lit_offset";
1124 case panda_file::LiteralTag::BUILTINTYPEINDEX:
1125 return "builtin_type";
1126 default:
1127 UNREACHABLE();
1128 }
1129 }
1130
1131 template <typename T>
SerializeValues(const pandasm::LiteralArray & lit_array,T & os) const1132 void Disassembler::SerializeValues(const pandasm::LiteralArray &lit_array, T &os) const
1133 {
1134 switch (lit_array.literals_[0].tag_) {
1135 case panda_file::LiteralTag::ARRAY_U1: {
1136 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1137 os << std::get<bool>(lit_array.literals_[i].value_) << " ";
1138 }
1139 break;
1140 }
1141 case panda_file::LiteralTag::ARRAY_U8: {
1142 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1143 os << static_cast<uint16_t>(std::get<uint8_t>(lit_array.literals_[i].value_)) << " ";
1144 }
1145 break;
1146 }
1147 case panda_file::LiteralTag::ARRAY_I8: {
1148 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1149 os << static_cast<int16_t>(bit_cast<int8_t>(std::get<uint8_t>(lit_array.literals_[i].value_))) << " ";
1150 }
1151 break;
1152 }
1153 case panda_file::LiteralTag::ARRAY_U16: {
1154 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1155 os << std::get<uint16_t>(lit_array.literals_[i].value_) << " ";
1156 }
1157 break;
1158 }
1159 case panda_file::LiteralTag::ARRAY_I16: {
1160 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1161 os << bit_cast<int16_t>(std::get<uint16_t>(lit_array.literals_[i].value_)) << " ";
1162 }
1163 break;
1164 }
1165 case panda_file::LiteralTag::ARRAY_U32: {
1166 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1167 os << std::get<uint32_t>(lit_array.literals_[i].value_) << " ";
1168 }
1169 break;
1170 }
1171 case panda_file::LiteralTag::ARRAY_I32: {
1172 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1173 os << bit_cast<int32_t>(std::get<uint32_t>(lit_array.literals_[i].value_)) << " ";
1174 }
1175 break;
1176 }
1177 case panda_file::LiteralTag::ARRAY_U64: {
1178 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1179 os << std::get<uint64_t>(lit_array.literals_[i].value_) << " ";
1180 }
1181 break;
1182 }
1183 case panda_file::LiteralTag::ARRAY_I64: {
1184 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1185 os << bit_cast<int64_t>(std::get<uint64_t>(lit_array.literals_[i].value_)) << " ";
1186 }
1187 break;
1188 }
1189 case panda_file::LiteralTag::ARRAY_F32: {
1190 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1191 os << std::get<float>(lit_array.literals_[i].value_) << " ";
1192 }
1193 break;
1194 }
1195 case panda_file::LiteralTag::ARRAY_F64: {
1196 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1197 os << std::get<double>(lit_array.literals_[i].value_) << " ";
1198 }
1199 break;
1200 }
1201 case panda_file::LiteralTag::ARRAY_STRING: {
1202 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1203 os << "\"" << std::get<std::string>(lit_array.literals_[i].value_) << "\" ";
1204 }
1205 break;
1206 }
1207 default:
1208 SerializeLiterals(lit_array, os);
1209 }
1210 }
1211
1212 template <typename T>
SerializeLiterals(const pandasm::LiteralArray & lit_array,T & os) const1213 void Disassembler::SerializeLiterals(const pandasm::LiteralArray &lit_array, T &os) const
1214 {
1215 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1216 const auto &tag = lit_array.literals_[i].tag_;
1217 os << LiteralTagToString(tag) << ":";
1218 const auto &val = lit_array.literals_[i].value_;
1219 switch (lit_array.literals_[i].tag_) {
1220 case panda_file::LiteralTag::BOOL: {
1221 os << std::get<bool>(val);
1222 break;
1223 }
1224 case panda_file::LiteralTag::LITERALBUFFERINDEX:
1225 case panda_file::LiteralTag::INTEGER: {
1226 os << bit_cast<int32_t>(std::get<uint32_t>(val));
1227 break;
1228 }
1229 case panda_file::LiteralTag::DOUBLE: {
1230 os << std::get<double>(val);
1231 break;
1232 }
1233 case panda_file::LiteralTag::STRING: {
1234 os << "\"" << std::get<std::string>(val) << "\"";
1235 break;
1236 }
1237 case panda_file::LiteralTag::METHOD:
1238 case panda_file::LiteralTag::GENERATORMETHOD: {
1239 os << std::get<std::string>(val);
1240 break;
1241 }
1242 case panda_file::LiteralTag::NULLVALUE:
1243 case panda_file::LiteralTag::ACCESSOR: {
1244 os << static_cast<int16_t>(bit_cast<int8_t>(std::get<uint8_t>(val)));
1245 break;
1246 }
1247 case panda_file::LiteralTag::METHODAFFILIATE: {
1248 os << std::get<uint16_t>(val);
1249 break;
1250 }
1251 case panda_file::LiteralTag::LITERALARRAY: {
1252 os << std::get<std::string>(val);
1253 break;
1254 }
1255 case panda_file::LiteralTag::BUILTINTYPEINDEX: {
1256 os << static_cast<int16_t>(std::get<uint8_t>(val));
1257 break;
1258 }
1259 default:
1260 UNREACHABLE();
1261 }
1262 os << ", ";
1263 }
1264 }
1265
Serialize(const pandasm::Record & record,std::ostream & os,bool print_information) const1266 void Disassembler::Serialize(const pandasm::Record &record, std::ostream &os, bool print_information) const
1267 {
1268 if (IsSystemType(record.name)) {
1269 return;
1270 }
1271
1272 os << ".record " << record.name;
1273
1274 const auto record_iter = prog_ann_.record_annotations.find(record.name);
1275 const bool record_in_table = record_iter != prog_ann_.record_annotations.end();
1276
1277 if (record_in_table) {
1278 Serialize(*record.metadata, record_iter->second.ann_list, os);
1279 } else {
1280 Serialize(*record.metadata, {}, os);
1281 }
1282
1283 if (record.metadata->IsForeign()) {
1284 os << "\n\n";
1285 return;
1286 }
1287
1288 os << " {";
1289
1290 if (print_information && prog_info_.records_info.find(record.name) != prog_info_.records_info.end()) {
1291 os << " # " << prog_info_.records_info.at(record.name).record_info << "\n";
1292 SerializeFields(record, os, true);
1293 } else {
1294 os << "\n";
1295 SerializeFields(record, os, false);
1296 }
1297
1298 os << "}\n\n";
1299 }
1300
SerializeFields(const pandasm::Record & record,std::ostream & os,bool print_information) const1301 void Disassembler::SerializeFields(const pandasm::Record &record, std::ostream &os, bool print_information) const
1302 {
1303 constexpr size_t INFO_OFFSET = 80;
1304
1305 const auto record_iter = prog_ann_.record_annotations.find(record.name);
1306 const bool record_in_table = record_iter != prog_ann_.record_annotations.end();
1307
1308 const auto rec_inf = (print_information) ? (prog_info_.records_info.at(record.name)) : (RecordInfo {});
1309
1310 size_t field_idx = 0;
1311
1312 std::stringstream ss;
1313 for (const auto &f : record.field_list) {
1314 ss << "\t" << f.type.GetPandasmName() << " " << f.name;
1315 if (f.metadata->GetValue().has_value()) {
1316 if (f.type.GetId() == panda_file::Type::TypeId::U32) {
1317 ss << " = 0x" << std::hex << f.metadata->GetValue().value().GetValue<uint32_t>();
1318 }
1319 if (f.type.GetId() == panda_file::Type::TypeId::U8) {
1320 ss << " = 0x" << std::hex << static_cast<uint32_t>(f.metadata->GetValue().value().GetValue<uint8_t>());
1321 }
1322 }
1323 if (record_in_table) {
1324 const auto field_iter = record_iter->second.field_annotations.find(f.name);
1325 if (field_iter != record_iter->second.field_annotations.end()) {
1326 Serialize(*f.metadata, field_iter->second, ss);
1327 } else {
1328 Serialize(*f.metadata, {}, ss);
1329 }
1330 } else {
1331 Serialize(*f.metadata, {}, ss);
1332 }
1333
1334 if (print_information) {
1335 os << std::setw(INFO_OFFSET) << std::left << ss.str() << " # " << rec_inf.fields_info.at(field_idx) << "\n";
1336 } else {
1337 os << ss.str() << "\n";
1338 }
1339
1340 ss.str(std::string());
1341 ss.clear();
1342
1343 field_idx++;
1344 }
1345 }
1346
Serialize(const pandasm::Function & method,std::ostream & os,bool print_information) const1347 void Disassembler::Serialize(const pandasm::Function &method, std::ostream &os, bool print_information) const
1348 {
1349 os << ".function " << method.return_type.GetPandasmName() << " " << method.name << "(";
1350
1351 if (method.params.size() > 0) {
1352 os << method.params[0].type.GetPandasmName() << " a0";
1353
1354 for (uint8_t i = 1; i < method.params.size(); i++) {
1355 os << ", " << method.params[i].type.GetPandasmName() << " a" << (size_t)i;
1356 }
1357 }
1358 os << ")";
1359
1360 const std::string signature = pandasm::GetFunctionSignatureFromName(method.name, method.params);
1361
1362 const auto method_iter = prog_ann_.method_annotations.find(signature);
1363 if (method_iter != prog_ann_.method_annotations.end()) {
1364 Serialize(*method.metadata, method_iter->second, os);
1365 } else {
1366 Serialize(*method.metadata, {}, os);
1367 }
1368
1369 auto method_info_it = prog_info_.methods_info.find(signature);
1370 bool print_method_info = print_information && method_info_it != prog_info_.methods_info.end();
1371 if (print_method_info) {
1372 const MethodInfo &method_info = method_info_it->second;
1373
1374 size_t width = 0;
1375 for (const auto &i : method.ins) {
1376 if (i.ToString().size() > width) {
1377 width = i.ToString().size();
1378 }
1379 }
1380
1381 os << " { # " << method_info.method_info << "\n# CODE:\n";
1382
1383 for (size_t i = 0; i < method.ins.size(); i++) {
1384 os << "\t" << std::setw(width) << std::left << method.ins.at(i).ToString("", true, method.regs_num) << " # "
1385 << method_info.instructions_info.at(i) << "\n";
1386 }
1387 } else {
1388 os << " {\n";
1389
1390 for (const auto &i : method.ins) {
1391 if (i.set_label) {
1392 std::string ins = i.ToString("", true, method.regs_num);
1393 std::string delim = ": ";
1394 size_t pos = ins.find(delim);
1395 std::string label = ins.substr(0, pos);
1396 ins.erase(0, pos + delim.length());
1397 os << label << ":\n\t" << ins << "\n";
1398 } else {
1399 os << "\t" << i.ToString("", true, method.regs_num) << "\n";
1400 }
1401 }
1402 }
1403
1404 if (method.catch_blocks.size() != 0) {
1405 os << "\n";
1406
1407 for (const auto &catch_block : method.catch_blocks) {
1408 Serialize(catch_block, os);
1409
1410 os << "\n";
1411 }
1412 }
1413
1414 if (print_method_info) {
1415 const MethodInfo &method_info = method_info_it->second;
1416 SerializeLineNumberTable(method_info.line_number_table, os);
1417 SerializeLocalVariableTable(method_info.local_variable_table, method, os);
1418 }
1419
1420 os << "}\n\n";
1421 }
1422
Serialize(const pandasm::Function::CatchBlock & catch_block,std::ostream & os) const1423 void Disassembler::Serialize(const pandasm::Function::CatchBlock &catch_block, std::ostream &os) const
1424 {
1425 if (catch_block.exception_record == "") {
1426 os << ".catchall ";
1427 } else {
1428 os << ".catch " << catch_block.exception_record << ", ";
1429 }
1430
1431 os << catch_block.try_begin_label << ", " << catch_block.try_end_label << ", " << catch_block.catch_begin_label;
1432
1433 if (catch_block.catch_end_label != "") {
1434 os << ", " << catch_block.catch_end_label;
1435 }
1436 }
1437
Serialize(const pandasm::ItemMetadata & meta,const AnnotationList & ann_list,std::ostream & os) const1438 void Disassembler::Serialize(const pandasm::ItemMetadata &meta, const AnnotationList &ann_list, std::ostream &os) const
1439 {
1440 auto bool_attributes = meta.GetBoolAttributes();
1441 auto attributes = meta.GetAttributes();
1442 if (bool_attributes.empty() && attributes.empty() && ann_list.empty()) {
1443 return;
1444 }
1445
1446 os << " <";
1447
1448 size_t size = bool_attributes.size();
1449 size_t idx = 0;
1450 for (const auto &attr : bool_attributes) {
1451 os << attr;
1452 ++idx;
1453
1454 if (!attributes.empty() || !ann_list.empty() || idx < size) {
1455 os << ", ";
1456 }
1457 }
1458
1459 size = attributes.size();
1460 idx = 0;
1461 for (const auto &[key, values] : attributes) {
1462 for (size_t i = 0; i < values.size(); i++) {
1463 os << key << "=" << values[i];
1464
1465 if (i < values.size() - 1) {
1466 os << ", ";
1467 }
1468 }
1469
1470 ++idx;
1471
1472 if (!ann_list.empty() || idx < size) {
1473 os << ", ";
1474 }
1475 }
1476
1477 size = ann_list.size();
1478 idx = 0;
1479 for (const auto &[key, value] : ann_list) {
1480 os << key << "=" << value;
1481
1482 ++idx;
1483
1484 if (idx < size) {
1485 os << ", ";
1486 }
1487 }
1488
1489 os << ">";
1490 }
1491
SerializeLineNumberTable(const panda_file::LineNumberTable & line_number_table,std::ostream & os) const1492 void Disassembler::SerializeLineNumberTable(const panda_file::LineNumberTable &line_number_table,
1493 std::ostream &os) const
1494 {
1495 if (line_number_table.empty()) {
1496 return;
1497 }
1498
1499 os << "\n# LINE_NUMBER_TABLE:\n";
1500 for (const auto &line_info : line_number_table) {
1501 os << "#\tline " << line_info.line << ": " << line_info.offset << "\n";
1502 }
1503 }
1504
SerializeLocalVariableTable(const panda_file::LocalVariableTable & local_variable_table,const pandasm::Function & method,std::ostream & os) const1505 void Disassembler::SerializeLocalVariableTable(const panda_file::LocalVariableTable &local_variable_table,
1506 const pandasm::Function &method, std::ostream &os) const
1507 {
1508 if (local_variable_table.empty()) {
1509 return;
1510 }
1511
1512 os << "\n# LOCAL_VARIABLE_TABLE:\n";
1513 os << "#\t Start End Register Name Signature\n";
1514 const int START_WIDTH = 5;
1515 const int END_WIDTH = 4;
1516 const int REG_WIDTH = 8;
1517 const int NAME_WIDTH = 14;
1518 for (const auto &variable_info : local_variable_table) {
1519 std::ostringstream reg_stream;
1520 reg_stream << variable_info.reg_number << '(';
1521 if (variable_info.reg_number < 0) {
1522 reg_stream << "acc";
1523 } else {
1524 uint32_t vreg = variable_info.reg_number;
1525 uint32_t first_arg_reg = method.GetTotalRegs();
1526 if (vreg < first_arg_reg) {
1527 reg_stream << 'v' << vreg;
1528 } else {
1529 reg_stream << 'a' << vreg - first_arg_reg;
1530 }
1531 }
1532 reg_stream << ')';
1533
1534 os << "#\t " << std::setw(START_WIDTH) << std::right << variable_info.start_offset << " ";
1535 os << std::setw(END_WIDTH) << std::right << variable_info.end_offset << " ";
1536 os << std::setw(REG_WIDTH) << std::right << reg_stream.str() << " ";
1537 os << std::setw(NAME_WIDTH) << std::right << variable_info.name << " " << variable_info.type;
1538 if (!variable_info.type_signature.empty() && variable_info.type_signature != variable_info.type) {
1539 os << " (" << variable_info.type_signature << ")";
1540 }
1541 os << "\n";
1542 }
1543 }
1544
BytecodeOpcodeToPandasmOpcode(uint8_t o) const1545 pandasm::Opcode Disassembler::BytecodeOpcodeToPandasmOpcode(uint8_t o) const
1546 {
1547 return BytecodeOpcodeToPandasmOpcode(BytecodeInstruction::Opcode(o));
1548 }
1549
IDToString(BytecodeInstruction bc_ins,panda_file::File::EntityId method_id,size_t idx) const1550 std::string Disassembler::IDToString(BytecodeInstruction bc_ins, panda_file::File::EntityId method_id,
1551 size_t idx) const
1552 {
1553 std::stringstream name;
1554 const auto offset = file_->ResolveOffsetByIndex(method_id, bc_ins.GetId(idx).AsIndex());
1555
1556 if (bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::METHOD_ID)) {
1557 name << GetMethodSignature(offset);
1558 } else if (bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::STRING_ID)) {
1559 name << '\"';
1560 name << StringDataToString(file_->GetStringData(offset));
1561 name << '\"';
1562 } else {
1563 ASSERT(bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::LITERALARRAY_ID));
1564 pandasm::LiteralArray lit_array;
1565 GetLiteralArrayByOffset(&lit_array, panda_file::File::EntityId(offset));
1566 name << SerializeLiteralArray(lit_array);
1567 }
1568
1569 return name.str();
1570 }
1571
GetRecordLanguage(panda_file::File::EntityId class_id) const1572 panda::panda_file::SourceLang Disassembler::GetRecordLanguage(panda_file::File::EntityId class_id) const
1573 {
1574 if (file_->IsExternal(class_id)) {
1575 return panda::panda_file::SourceLang::PANDA_ASSEMBLY;
1576 }
1577
1578 panda_file::ClassDataAccessor cda(*file_, class_id);
1579 return cda.GetSourceLang().value_or(panda_file::SourceLang::PANDA_ASSEMBLY);
1580 }
1581
translateImmToLabel(pandasm::Ins * pa_ins,LabelTable * label_table,const uint8_t * ins_arr,BytecodeInstruction bc_ins,BytecodeInstruction bc_ins_last,panda_file::File::EntityId code_id)1582 static void translateImmToLabel(pandasm::Ins *pa_ins, LabelTable *label_table, const uint8_t *ins_arr,
1583 BytecodeInstruction bc_ins, BytecodeInstruction bc_ins_last,
1584 panda_file::File::EntityId code_id)
1585 {
1586 const int32_t jmp_offset = std::get<int64_t>(pa_ins->imms.at(0));
1587 const auto bc_ins_dest = bc_ins.JumpTo(jmp_offset);
1588 if (bc_ins_last.GetAddress() > bc_ins_dest.GetAddress()) {
1589 size_t idx = getBytecodeInstructionNumber(BytecodeInstruction(ins_arr), bc_ins_dest);
1590
1591 if (idx != std::numeric_limits<size_t>::max()) {
1592 if (label_table->find(idx) == label_table->end()) {
1593 std::stringstream ss {};
1594 ss << "jump_label_" << label_table->size();
1595 (*label_table)[idx] = ss.str();
1596 }
1597
1598 pa_ins->imms.clear();
1599 pa_ins->ids.push_back(label_table->at(idx));
1600 } else {
1601 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
1602 << "). incorrect instruction at offset: 0x" << (bc_ins.GetAddress() - ins_arr)
1603 << ": invalid jump offset 0x" << jmp_offset
1604 << " - jumping in the middle of another instruction!";
1605 }
1606 } else {
1607 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
1608 << "). incorrect instruction at offset: 0x" << (bc_ins.GetAddress() - ins_arr)
1609 << ": invalid jump offset 0x" << jmp_offset << " - jumping out of bounds!";
1610 }
1611 }
1612
GetInstructions(pandasm::Function * method,panda_file::File::EntityId method_id,panda_file::File::EntityId code_id) const1613 IdList Disassembler::GetInstructions(pandasm::Function *method, panda_file::File::EntityId method_id,
1614 panda_file::File::EntityId code_id) const
1615 {
1616 panda_file::CodeDataAccessor code_accessor(*file_, code_id);
1617
1618 const auto ins_sz = code_accessor.GetCodeSize();
1619 const auto ins_arr = code_accessor.GetInstructions();
1620
1621 method->regs_num = code_accessor.GetNumVregs();
1622
1623 auto bc_ins = BytecodeInstruction(ins_arr);
1624 const auto bc_ins_last = bc_ins.JumpTo(ins_sz);
1625
1626 LabelTable label_table = GetExceptions(method, method_id, code_id);
1627
1628 IdList unknown_external_methods {};
1629
1630 while (bc_ins.GetAddress() != bc_ins_last.GetAddress()) {
1631 if (bc_ins.GetAddress() > bc_ins_last.GetAddress()) {
1632 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
1633 << "). bytecode instructions sequence corrupted for method " << method->name
1634 << "! went out of bounds";
1635
1636 break;
1637 }
1638
1639 auto pa_ins = BytecodeInstructionToPandasmInstruction(bc_ins, method_id);
1640 if (pa_ins.IsJump()) {
1641 translateImmToLabel(&pa_ins, &label_table, ins_arr, bc_ins, bc_ins_last, code_id);
1642 }
1643
1644 // check if method id is unknown external method. if so, emplace it in table
1645 if (bc_ins.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) {
1646 const auto arg_method_idx = bc_ins.GetId().AsIndex();
1647 const auto arg_method_id = file_->ResolveMethodIndex(method_id, arg_method_idx);
1648
1649 const auto arg_method_signature = GetMethodSignature(arg_method_id);
1650
1651 const bool is_present = prog_.function_table.find(arg_method_signature) != prog_.function_table.cend();
1652 const bool is_external = file_->IsExternal(arg_method_id);
1653
1654 if (is_external && !is_present) {
1655 unknown_external_methods.push_back(arg_method_id);
1656 }
1657 }
1658
1659 method->ins.push_back(pa_ins);
1660 bc_ins = bc_ins.GetNext();
1661 }
1662
1663 for (const auto &pair : label_table) {
1664 method->ins[pair.first].label = pair.second;
1665 method->ins[pair.first].set_label = true;
1666 }
1667
1668 return unknown_external_methods;
1669 }
1670
1671 } // namespace panda::disasm
1672