1 /*
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "disassembler.h"
17 #include "mangling.h"
18 #include "utils/logger.h"
19
20 #include <iomanip>
21
22 namespace panda::disasm {
23
Disassembler(Disassembler && that)24 Disassembler::Disassembler(Disassembler &&that)
25 {
26 this->file_ = std::move(that.file_);
27
28 this->prog_ = std::move(that.prog_);
29
30 this->file_language_ = std::move(that.file_language_);
31
32 this->record_name_to_id_ = std::move(that.record_name_to_id_);
33 this->method_name_to_id_ = std::move(that.method_name_to_id_);
34
35 this->skip_strings_ = std::move(that.skip_strings_);
36 this->quiet_ = std::move(that.quiet_);
37
38 this->prog_info_ = std::move(that.prog_info_);
39 this->prog_j_ann_ = std::move(that.prog_j_ann_);
40 }
41
Disassemble(const std::string & filename_in,bool quiet,bool skip_strings)42 void Disassembler::Disassemble(const std::string &filename_in, bool quiet, bool skip_strings)
43 {
44 auto file_new = panda_file::File::Open(filename_in);
45 file_.swap(file_new);
46
47 if (file_ != nullptr) {
48 prog_ = pandasm::Program {};
49
50 record_name_to_id_.clear();
51 method_name_to_id_.clear();
52
53 skip_strings_ = skip_strings;
54 quiet_ = quiet;
55
56 prog_info_ = ProgInfo {};
57 prog_j_ann_ = ProgJavaAnnotations {};
58
59 GetLiteralArrays();
60 GetRecords();
61
62 GetLanguageSpecificMetadata();
63 } else {
64 LOG(ERROR, DISASSEMBLER) << "> Failed to open the specified pandafile: <" << filename_in << ">";
65 }
66 }
67
CollectInfo()68 void Disassembler::CollectInfo()
69 {
70 LOG(DEBUG, DISASSEMBLER) << "\n[getting program info]\n";
71
72 for (const auto &pair : record_name_to_id_) {
73 GetRecordInfo(pair.second, &prog_info_.records_info[pair.first]);
74 }
75
76 for (const auto &pair : method_name_to_id_) {
77 GetMethodInfo(pair.second, &prog_info_.methods_info[pair.first]);
78 }
79 }
80
Serialize(std::ostream & os,bool add_separators,bool print_information) const81 void Disassembler::Serialize(std::ostream &os, bool add_separators, bool print_information) const
82 {
83 if (os.bad()) {
84 LOG(DEBUG, DISASSEMBLER) << "> serialization failed. os bad\n";
85 return;
86 }
87
88 if (file_ != nullptr) {
89 os << "#\n# source binary: " << file_->GetFilename() << "\n#\n\n";
90 }
91
92 SerializeLanguage(os);
93
94 if (add_separators) {
95 os << "# ====================\n"
96 "# LITERALS\n\n";
97 }
98
99 LOG(DEBUG, DISASSEMBLER) << "[serializing literals]";
100
101 size_t index = 0;
102 for (const auto &pair : prog_.literalarray_table) {
103 Serialize(index++, pair.second, os);
104 }
105
106 os << "\n";
107
108 if (add_separators) {
109 os << "# ====================\n"
110 "# RECORDS\n\n";
111 }
112
113 LOG(DEBUG, DISASSEMBLER) << "[serializing records]";
114
115 for (const auto &r : prog_.record_table) {
116 Serialize(r.second, os, print_information);
117 }
118
119 if (add_separators) {
120 os << "# ====================\n"
121 "# METHODS\n\n";
122 }
123
124 LOG(DEBUG, DISASSEMBLER) << "[serializing methods]";
125
126 for (const auto &m : prog_.function_table) {
127 Serialize(m.second, os, print_information);
128 }
129 }
130
IsPandasmFriendly(const char c)131 inline bool Disassembler::IsPandasmFriendly(const char c)
132 {
133 return isalnum(c) || c == '_';
134 }
135
IsSystemType(const std::string & type_name)136 inline bool Disassembler::IsSystemType(const std::string &type_name)
137 {
138 bool is_array_type = (type_name.find('[') != std::string::npos);
139 bool is_global = (type_name == "_GLOBAL");
140
141 return is_array_type || is_global;
142 }
143
MakePandasmFriendly(const std::string & str)144 std::string Disassembler::MakePandasmFriendly(const std::string &str)
145 {
146 auto str_new = str;
147 std::replace_if(
148 str_new.begin(), str_new.end(), [](const char c) { return !IsPandasmFriendly(c); }, '_');
149
150 return str_new;
151 }
152
GetRecord(pandasm::Record * record,const panda_file::File::EntityId & record_id)153 void Disassembler::GetRecord(pandasm::Record *record, const panda_file::File::EntityId &record_id)
154 {
155 LOG(DEBUG, DISASSEMBLER) << "\n[getting record]\nid: " << record_id.GetOffset();
156
157 if (record == nullptr) {
158 LOG(ERROR, DISASSEMBLER) << "> nullptr received!";
159 return;
160 }
161
162 auto language = GetClassLanguage(record_id);
163 record->name = GetFullRecordName(record_id, language);
164
165 LOG(DEBUG, DISASSEMBLER) << "name: " << record->name;
166
167 GetMetaData(record, record_id);
168
169 if (!file_->IsExternal(record_id)) {
170 GetMethods(record_id);
171 GetFields(record, record_id);
172 }
173 }
174
GetMethod(pandasm::Function * method,const panda_file::File::EntityId & method_id)175 void Disassembler::GetMethod(pandasm::Function *method, const panda_file::File::EntityId &method_id)
176 {
177 LOG(DEBUG, DISASSEMBLER) << "\n[getting method]\nid: " << method_id.GetOffset();
178
179 if (method == nullptr) {
180 LOG(ERROR, DISASSEMBLER) << "> nullptr received!";
181 return;
182 }
183
184 panda_file::MethodDataAccessor method_accessor(*file_, method_id);
185 pandasm::extensions::Language language = PFLangToPandasmLang(method_accessor.GetSourceLang());
186
187 method->name = GetFullMethodName(method_id, language);
188
189 LOG(DEBUG, DISASSEMBLER) << "name: " << method->name;
190
191 GetParams(method, method_accessor.GetProtoId());
192 GetMetaData(method, method_id);
193
194 if (method->HasImplementation()) {
195 if (method_accessor.GetCodeId().has_value()) {
196 const IdList id_list = GetInstructions(method, method_id, method_accessor.GetCodeId().value());
197
198 for (const auto &id : id_list) {
199 pandasm::Function new_method("", language);
200 GetMethod(&new_method, id);
201
202 method_name_to_id_.emplace(new_method.name, id);
203 prog_.function_table.emplace(new_method.name, std::move(new_method));
204 }
205 } else {
206 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << std::dec << method_id << " ("
207 << "0x" << std::hex << method_id
208 << "). Implementation of method is expected, but no \'CODE\' tag was found";
209 }
210 }
211 }
212
213 template <typename T>
FillLiteralArrayData(pandasm::LiteralArray * lit_array,const panda_file::LiteralTag & tag,const panda_file::LiteralDataAccessor::LiteralValue & value) const214 void Disassembler::FillLiteralArrayData(pandasm::LiteralArray *lit_array, const panda_file::LiteralTag &tag,
215 const panda_file::LiteralDataAccessor::LiteralValue &value) const
216 {
217 panda_file::File::EntityId id(std::get<uint32_t>(value));
218 auto sp = file_->GetSpanFromId(id);
219 // CODECHECK-NOLINTNEXTLINE(C_RULE_ID_HORIZON_SPACE)
220 auto len = panda_file::helpers::Read<sizeof(uint32_t)>(&sp);
221 if (tag != panda_file::LiteralTag::ARRAY_STRING) {
222 for (size_t i = 0; i < len; i++) {
223 pandasm::LiteralArray::Literal lit;
224 lit.tag_ = tag;
225 lit.value_ = bit_cast<T>(panda_file::helpers::Read<sizeof(T)>(&sp));
226 lit_array->literals_.push_back(lit);
227 }
228 return;
229 }
230 for (size_t i = 0; i < len; i++) {
231 // CODECHECK-NOLINTNEXTLINE(C_RULE_ID_HORIZON_SPACE)
232 auto str_id = panda_file::helpers::Read<sizeof(T)>(&sp);
233 pandasm::LiteralArray::Literal lit;
234 lit.tag_ = tag;
235 lit.value_ = StringDataToString(file_->GetStringData(panda_file::File::EntityId(str_id)));
236 lit_array->literals_.push_back(lit);
237 }
238 }
239
FillLiteralData(pandasm::LiteralArray * lit_array,const panda_file::LiteralDataAccessor::LiteralValue & value,const panda_file::LiteralTag & tag) const240 void Disassembler::FillLiteralData(pandasm::LiteralArray *lit_array,
241 const panda_file::LiteralDataAccessor::LiteralValue &value,
242 const panda_file::LiteralTag &tag) const
243 {
244 pandasm::LiteralArray::Literal lit;
245 lit.tag_ = tag;
246 switch (tag) {
247 case panda_file::LiteralTag::BOOL: {
248 lit.value_ = std::get<bool>(value);
249 break;
250 }
251 case panda_file::LiteralTag::ACCESSOR:
252 case panda_file::LiteralTag::NULLVALUE: {
253 lit.value_ = std::get<uint8_t>(value);
254 break;
255 }
256 case panda_file::LiteralTag::METHODAFFILIATE: {
257 lit.value_ = std::get<uint16_t>(value);
258 break;
259 }
260 case panda_file::LiteralTag::INTEGER: {
261 lit.value_ = std::get<uint32_t>(value);
262 break;
263 }
264 case panda_file::LiteralTag::DOUBLE: {
265 lit.value_ = std::get<double>(value);
266 break;
267 }
268 case panda_file::LiteralTag::STRING:
269 case panda_file::LiteralTag::METHOD:
270 case panda_file::LiteralTag::GENERATORMETHOD: {
271 auto str_data = file_->GetStringData(panda_file::File::EntityId(std::get<uint32_t>(value)));
272 lit.value_ = StringDataToString(str_data);
273 break;
274 }
275 case panda_file::LiteralTag::TAGVALUE: {
276 return;
277 }
278 default: {
279 UNREACHABLE();
280 }
281 }
282 lit_array->literals_.push_back(lit);
283 }
284
GetLiteralArray(pandasm::LiteralArray * lit_array,const size_t index) const285 void Disassembler::GetLiteralArray(pandasm::LiteralArray *lit_array, const size_t index) const
286 {
287 LOG(DEBUG, DISASSEMBLER) << "\n[getting literal array]\nindex: " << index;
288
289 panda_file::LiteralDataAccessor lit_array_accessor(*file_, file_->GetLiteralArraysId());
290
291 lit_array_accessor.EnumerateLiteralVals(
292 index, [this, lit_array](const panda_file::LiteralDataAccessor::LiteralValue &value,
293 const panda_file::LiteralTag &tag) {
294 switch (tag) {
295 case panda_file::LiteralTag::ARRAY_I8: {
296 FillLiteralArrayData<uint8_t>(lit_array, tag, value);
297 break;
298 }
299 case panda_file::LiteralTag::ARRAY_I16: {
300 FillLiteralArrayData<uint16_t>(lit_array, tag, value);
301 break;
302 }
303 case panda_file::LiteralTag::ARRAY_I32: {
304 FillLiteralArrayData<uint32_t>(lit_array, tag, value);
305 break;
306 }
307 case panda_file::LiteralTag::ARRAY_I64: {
308 FillLiteralArrayData<uint64_t>(lit_array, tag, value);
309 break;
310 }
311 case panda_file::LiteralTag::ARRAY_F32: {
312 FillLiteralArrayData<float>(lit_array, tag, value);
313 break;
314 }
315 case panda_file::LiteralTag::ARRAY_F64: {
316 FillLiteralArrayData<double>(lit_array, tag, value);
317 break;
318 }
319 case panda_file::LiteralTag::ARRAY_STRING: {
320 FillLiteralArrayData<uint32_t>(lit_array, tag, value);
321 break;
322 }
323 default: {
324 FillLiteralData(lit_array, value, tag);
325 break;
326 }
327 }
328 });
329 }
330
GetLiteralArrays()331 void Disassembler::GetLiteralArrays()
332 {
333 const auto lit_arrays_id = file_->GetLiteralArraysId();
334
335 LOG(DEBUG, DISASSEMBLER) << "\n[getting literal arrays]\nid: " << lit_arrays_id.GetOffset() << "\n";
336
337 panda_file::LiteralDataAccessor lit_array_accessor(*file_, lit_arrays_id);
338 size_t num_litarrays = lit_array_accessor.GetLiteralNum();
339 for (size_t index = 0; index < num_litarrays; index++) {
340 panda::pandasm::LiteralArray lit_ar;
341 GetLiteralArray(&lit_ar, index);
342 prog_.literalarray_table.emplace(std::to_string(index), lit_ar);
343 }
344 }
345
GetRecords()346 void Disassembler::GetRecords()
347 {
348 LOG(DEBUG, DISASSEMBLER) << "\n[getting records]\n";
349
350 const auto class_idx = file_->GetClasses();
351 for (size_t i = 0; i < class_idx.size(); i++) {
352 uint32_t id = class_idx[i];
353
354 if (id > file_->GetHeader()->file_size) {
355 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << std::dec
356 << file_->GetHeader()->class_idx_off + sizeof(uint32_t) * i << " ("
357 << "0x" << std::hex << file_->GetHeader()->class_idx_off + sizeof(uint32_t) * i
358 << "). binary file corrupted. record offset (" << id << ") out of bounds ("
359 << file_->GetHeader()->file_size << ")!";
360 break;
361 }
362
363 const panda_file::File::EntityId record_id {id};
364 auto language = GetClassLanguage(record_id);
365 if (language != file_language_ && file_language_ != pandasm::extensions::Language::ECMASCRIPT) {
366 if (file_language_ == pandasm::extensions::Language::PANDA_ASSEMBLY) {
367 file_language_ = language;
368 } else {
369 LOG(ERROR, DISASSEMBLER) << "> possible error encountered at " << std::dec
370 << file_->GetHeader()->class_idx_off + sizeof(uint32_t) * i << " ("
371 << "0x" << std::hex << file_->GetHeader()->class_idx_off + sizeof(uint32_t) * i
372 << "). record's language differs from file's language (or is default)!";
373 }
374 }
375
376 pandasm::Record record("", language);
377 GetRecord(&record, record_id);
378
379 if (prog_.record_table.find(record.name) == prog_.record_table.end()) {
380 record_name_to_id_.emplace(record.name, record_id);
381 prog_.record_table.emplace(record.name, std::move(record));
382 }
383 }
384 }
385
GetFields(pandasm::Record * record,const panda_file::File::EntityId & record_id)386 void Disassembler::GetFields(pandasm::Record *record, const panda_file::File::EntityId &record_id)
387 {
388 panda_file::ClassDataAccessor class_accessor {*file_, record_id};
389
390 class_accessor.EnumerateFields([&](panda_file::FieldDataAccessor &field_accessor) -> void {
391 pandasm::Field field(record->language);
392
393 panda_file::File::EntityId field_name_id = field_accessor.GetNameId();
394 field.name = StringDataToString(file_->GetStringData(field_name_id));
395
396 uint32_t field_type = field_accessor.GetType();
397 field.type = FieldTypeToPandasmType(field_type);
398
399 GetMetaData(&field, field_accessor.GetFieldId());
400
401 record->field_list.push_back(std::move(field));
402 });
403 }
404
GetMethods(const panda_file::File::EntityId & record_id)405 void Disassembler::GetMethods(const panda_file::File::EntityId &record_id)
406 {
407 panda_file::ClassDataAccessor class_accessor {*file_, record_id};
408
409 pandasm::extensions::Language language = PFLangToPandasmLang(class_accessor.GetSourceLang());
410
411 class_accessor.EnumerateMethods([&](panda_file::MethodDataAccessor &method_accessor) -> void {
412 const auto method_id = method_accessor.GetMethodId();
413
414 pandasm::Function method("", language);
415 GetMethod(&method, method_id);
416
417 if (prog_.function_table.find(method.name) == prog_.function_table.end()) {
418 method_name_to_id_.emplace(method.name, method_id);
419 prog_.function_table.emplace(method.name, std::move(method));
420 }
421 });
422 }
423
GetParams(pandasm::Function * method,const panda_file::File::EntityId & proto_id) const424 void Disassembler::GetParams(pandasm::Function *method, const panda_file::File::EntityId &proto_id) const
425 {
426 /**
427 * frame size - 2^16 - 1
428 */
429 static const uint32_t MAX_ARG_NUM = 0xFFFF;
430
431 LOG(DEBUG, DISASSEMBLER) << "[getting params]\nproto id: " << proto_id.GetOffset();
432
433 if (method == nullptr) {
434 LOG(ERROR, DISASSEMBLER) << "> nullptr received!";
435 return;
436 }
437
438 panda_file::ProtoDataAccessor proto_accessor(*file_, proto_id);
439
440 auto params_num = proto_accessor.GetNumArgs();
441 if (params_num > MAX_ARG_NUM) {
442 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << std::dec << proto_id.GetOffset() << " ("
443 << "0x" << std::hex << proto_id.GetOffset() << "). number of function's arguments ("
444 << params_num << ") exceeds MAX_ARG_NUM (" << MAX_ARG_NUM << ") !";
445
446 return;
447 }
448
449 size_t ref_idx = 0;
450 method->return_type = PFTypeToPandasmType(proto_accessor.GetReturnType(), proto_accessor, ref_idx);
451
452 for (uint8_t i = 0; i < params_num; i++) {
453 auto arg_type = PFTypeToPandasmType(proto_accessor.GetArgType(i), proto_accessor, ref_idx);
454 method->params.push_back(pandasm::Function::Parameter(arg_type, method->language));
455 }
456 }
457
GetExceptions(pandasm::Function * method,panda_file::File::EntityId method_id,panda_file::File::EntityId code_id) const458 LabelTable Disassembler::GetExceptions(pandasm::Function *method, panda_file::File::EntityId method_id,
459 panda_file::File::EntityId code_id) const
460 {
461 LOG(DEBUG, DISASSEMBLER) << "[getting exceptions]\ncode id: " << code_id.GetOffset();
462 if (method == nullptr) {
463 LOG(DEBUG, DISASSEMBLER) << "> nullptr received!\n";
464 return LabelTable {};
465 }
466 panda_file::CodeDataAccessor code_accessor(*file_, code_id);
467
468 const auto bc_ins = BytecodeInstruction(code_accessor.GetInstructions());
469 const auto bc_ins_last = bc_ins.JumpTo(code_accessor.GetCodeSize());
470
471 size_t try_idx = 0;
472 LabelTable label_table {};
473 code_accessor.EnumerateTryBlocks([&](panda_file::CodeDataAccessor::TryBlock &try_block) {
474 pandasm::Function::CatchBlock catch_block_pa {};
475 if (!LocateTryBlock(bc_ins, bc_ins_last, try_block, &catch_block_pa, &label_table, try_idx)) {
476 return false;
477 }
478 size_t catch_idx = 0;
479 try_block.EnumerateCatchBlocks([&](panda_file::CodeDataAccessor::CatchBlock &catch_block) {
480 auto class_idx = catch_block.GetTypeIdx();
481 if (class_idx == panda_file::INVALID_INDEX) {
482 catch_block_pa.exception_record = "";
483 } else {
484 const auto class_id = file_->ResolveClassIndex(method_id, class_idx);
485 auto language = GetClassLanguage(class_id);
486 catch_block_pa.exception_record = GetFullRecordName(class_id, language);
487 }
488 if (!LocateCatchBlock(bc_ins, bc_ins_last, catch_block, &catch_block_pa, &label_table, try_idx,
489 catch_idx)) {
490 return false;
491 }
492
493 method->catch_blocks.push_back(catch_block_pa);
494 catch_block_pa.catch_begin_label = "";
495 catch_block_pa.catch_end_label = "";
496 catch_idx++;
497
498 return true;
499 });
500 try_idx++;
501
502 return true;
503 });
504
505 return label_table;
506 }
507
LocateTryBlock(const BytecodeInstruction & bc_ins,const BytecodeInstruction & bc_ins_last,const panda_file::CodeDataAccessor::TryBlock & try_block,pandasm::Function::CatchBlock * catch_block_pa,LabelTable * label_table,size_t try_idx) const508 bool Disassembler::LocateTryBlock(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last,
509 const panda_file::CodeDataAccessor::TryBlock &try_block,
510 pandasm::Function::CatchBlock *catch_block_pa, LabelTable *label_table,
511 size_t try_idx) const
512 {
513 const auto try_begin_bc_ins = bc_ins.JumpTo(try_block.GetStartPc());
514 const auto try_end_bc_ins = bc_ins.JumpTo(try_block.GetStartPc() + try_block.GetLength());
515
516 const size_t try_begin_idx = GetBytecodeInstructionNumber(bc_ins, try_begin_bc_ins);
517 const size_t try_end_idx = GetBytecodeInstructionNumber(bc_ins, try_end_bc_ins);
518
519 const bool try_begin_offset_in_range = bc_ins_last.GetAddress() > try_begin_bc_ins.GetAddress();
520 const bool try_end_offset_in_range = bc_ins_last.GetAddress() >= try_end_bc_ins.GetAddress();
521 const bool try_begin_offset_valid = try_begin_idx != std::numeric_limits<size_t>::max();
522 const bool try_end_offset_valid = try_end_idx != std::numeric_limits<size_t>::max();
523
524 if (!try_begin_offset_in_range || !try_begin_offset_valid) {
525 LOG(ERROR, DISASSEMBLER) << "> invalid try block begin offset! addr is: 0x" << std::hex
526 << try_begin_bc_ins.GetAddress();
527 return false;
528 } else {
529 std::stringstream ss {};
530 ss << "try_begin_label_" << try_idx;
531
532 LabelTable::iterator it = label_table->find(try_begin_idx);
533 if (it == label_table->end()) {
534 catch_block_pa->try_begin_label = ss.str();
535 label_table->insert(std::pair<size_t, std::string>(try_begin_idx, ss.str()));
536 } else {
537 catch_block_pa->try_begin_label = it->second;
538 }
539 }
540
541 if (!try_end_offset_in_range || !try_end_offset_valid) {
542 LOG(ERROR, DISASSEMBLER) << "> invalid try block end offset! addr is: 0x" << std::hex
543 << try_end_bc_ins.GetAddress();
544 return false;
545 } else {
546 std::stringstream ss {};
547 ss << "try_end_label_" << try_idx;
548
549 LabelTable::iterator it = label_table->find(try_end_idx);
550 if (it == label_table->end()) {
551 catch_block_pa->try_end_label = ss.str();
552 label_table->insert(std::pair<size_t, std::string>(try_end_idx, ss.str()));
553 } else {
554 catch_block_pa->try_end_label = it->second;
555 }
556 }
557
558 return true;
559 }
560
LocateCatchBlock(const BytecodeInstruction & bc_ins,const BytecodeInstruction & bc_ins_last,const panda_file::CodeDataAccessor::CatchBlock & catch_block,pandasm::Function::CatchBlock * catch_block_pa,LabelTable * label_table,size_t try_idx,size_t catch_idx) const561 bool Disassembler::LocateCatchBlock(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last,
562 const panda_file::CodeDataAccessor::CatchBlock &catch_block,
563 pandasm::Function::CatchBlock *catch_block_pa, LabelTable *label_table,
564 size_t try_idx, size_t catch_idx) const
565 {
566 const auto handler_begin_offset = catch_block.GetHandlerPc();
567 const auto handler_end_offset = handler_begin_offset + catch_block.GetCodeSize();
568
569 const auto handler_begin_bc_ins = bc_ins.JumpTo(handler_begin_offset);
570 const auto handler_end_bc_ins = bc_ins.JumpTo(handler_end_offset);
571
572 const size_t handler_begin_idx = GetBytecodeInstructionNumber(bc_ins, handler_begin_bc_ins);
573 const size_t handler_end_idx = GetBytecodeInstructionNumber(bc_ins, handler_end_bc_ins);
574
575 const bool handler_begin_offset_in_range = bc_ins_last.GetAddress() > handler_begin_bc_ins.GetAddress();
576 const bool handler_end_offset_in_range = bc_ins_last.GetAddress() > handler_end_bc_ins.GetAddress();
577 const bool handler_end_present = catch_block.GetCodeSize() != 0;
578 const bool handler_begin_offset_valid = handler_begin_idx != std::numeric_limits<size_t>::max();
579 const bool handler_end_offset_valid = handler_end_idx != std::numeric_limits<size_t>::max();
580
581 if (!handler_begin_offset_in_range || !handler_begin_offset_valid) {
582 LOG(ERROR, DISASSEMBLER) << "> invalid catch block begin offset! addr is: 0x" << std::hex
583 << handler_begin_bc_ins.GetAddress();
584 return false;
585 } else {
586 std::stringstream ss {};
587 ss << "handler_begin_label_" << try_idx << "_" << catch_idx;
588
589 LabelTable::iterator it = label_table->find(handler_begin_idx);
590 if (it == label_table->end()) {
591 catch_block_pa->catch_begin_label = ss.str();
592 label_table->insert(std::pair<size_t, std::string>(handler_begin_idx, ss.str()));
593 } else {
594 catch_block_pa->catch_begin_label = it->second;
595 }
596 }
597
598 if (!handler_end_offset_in_range || !handler_end_offset_valid) {
599 LOG(ERROR, DISASSEMBLER) << "> invalid catch block end offset! addr is: 0x" << std::hex
600 << handler_end_bc_ins.GetAddress();
601 return false;
602 } else if (handler_end_present) {
603 std::stringstream ss {};
604 ss << "handler_end_label_" << try_idx << "_" << catch_idx;
605
606 LabelTable::iterator it = label_table->find(handler_end_idx);
607 if (it == label_table->end()) {
608 catch_block_pa->catch_end_label = ss.str();
609 label_table->insert(std::pair<size_t, std::string>(handler_end_idx, ss.str()));
610 } else {
611 catch_block_pa->catch_end_label = it->second;
612 }
613 }
614
615 return true;
616 }
617
GetMetaData(pandasm::Function * method,const panda_file::File::EntityId & method_id) const618 void Disassembler::GetMetaData(pandasm::Function *method, const panda_file::File::EntityId &method_id) const
619 {
620 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nmethod id: " << method_id;
621
622 if (method == nullptr) {
623 LOG(ERROR, DISASSEMBLER) << "> nullptr received!";
624 return;
625 }
626
627 panda_file::MethodDataAccessor method_accessor(*file_, method_id);
628
629 const auto method_name_raw = StringDataToString(file_->GetStringData(method_accessor.GetNameId()));
630
631 if (!method_accessor.IsStatic()) {
632 const auto class_name = StringDataToString(file_->GetStringData(method_accessor.GetClassId()));
633 auto this_type = pandasm::Type::FromDescriptor(class_name);
634
635 this_type = pandasm::Type(MakePandasmFriendly(this_type.GetComponentName()), this_type.GetRank());
636
637 LOG(DEBUG, DISASSEMBLER) << "method is not static. emplacing self-argument of type " << this_type.GetName();
638
639 method->params.insert(method->params.begin(), pandasm::Function::Parameter(this_type, method->language));
640 } else {
641 method->metadata->SetAttribute("static");
642 }
643
644 if (file_->IsExternal(method_accessor.GetMethodId())) {
645 method->metadata->SetAttribute("external");
646 }
647
648 if (method_accessor.IsNative()) {
649 method->metadata->SetAttribute("native");
650 }
651
652 if (method_accessor.IsAbstract()) {
653 method->metadata->SetAttribute("noimpl");
654 }
655
656 // no language data for external methods
657 const bool is_ctor_js =
658 method_name_raw == pandasm::extensions::GetCtorName(pandasm::extensions::Language::ECMASCRIPT);
659 const bool is_cctor_js =
660 method_name_raw == pandasm::extensions::GetCctorName(pandasm::extensions::Language::ECMASCRIPT);
661 const bool is_ctor_panda =
662 method_name_raw == pandasm::extensions::GetCtorName(pandasm::extensions::Language::PANDA_ASSEMBLY);
663 const bool is_cctor_panda =
664 method_name_raw == pandasm::extensions::GetCctorName(pandasm::extensions::Language::PANDA_ASSEMBLY);
665
666 const bool is_ctor = is_ctor_js || is_ctor_panda;
667 const bool is_cctor = is_cctor_js || is_cctor_panda;
668
669 if (is_ctor) {
670 method->metadata->SetAttribute("ctor");
671 } else if (is_cctor) {
672 method->metadata->SetAttribute("cctor");
673 }
674 }
675
GetMetaData(pandasm::Record * record,const panda_file::File::EntityId & record_id) const676 void Disassembler::GetMetaData(pandasm::Record *record, const panda_file::File::EntityId &record_id) const
677 {
678 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nrecord id: " << record_id;
679
680 if (record == nullptr) {
681 LOG(ERROR, DISASSEMBLER) << "> nullptr received!";
682 return;
683 }
684
685 if (file_->IsExternal(record_id)) {
686 record->metadata->SetAttribute("external");
687 }
688 }
689
GetMetaData(pandasm::Field * field,const panda_file::File::EntityId & field_id) const690 void Disassembler::GetMetaData(pandasm::Field *field, const panda_file::File::EntityId &field_id) const
691 {
692 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nfield id: " << field_id;
693
694 if (field == nullptr) {
695 LOG(ERROR, DISASSEMBLER) << "> nullptr received!";
696 return;
697 }
698
699 panda_file::FieldDataAccessor field_accessor(*file_, field_id);
700
701 if (field_accessor.IsExternal()) {
702 field->metadata->SetAttribute("external");
703 }
704
705 if (field_accessor.IsStatic()) {
706 field->metadata->SetAttribute("static");
707 }
708 }
709
GetLanguageSpecificMetadata() const710 void Disassembler::GetLanguageSpecificMetadata() const
711 {
712 LOG(DEBUG, DISASSEMBLER) << "\n[getting language-specific annotations]\n";
713 }
714
715 // CODECHECK-NOLINTNEXTLINE(C_RULE_ID_FUNCTION_SIZE)
AnnotationTagToString(const char tag) const716 std::string Disassembler::AnnotationTagToString(const char tag) const
717 {
718 switch (tag) {
719 case '1':
720 return "u1";
721 case '2':
722 return "i8";
723 case '3':
724 return "u8";
725 case '4':
726 return "i16";
727 case '5':
728 return "u16";
729 case '6':
730 return "i32";
731 case '7':
732 return "u32";
733 case '8':
734 return "i64";
735 case '9':
736 return "u64";
737 case 'A':
738 return "f32";
739 case 'B':
740 return "f64";
741 case 'C':
742 return "string";
743 case 'D':
744 return "record";
745 case 'E':
746 return "method";
747 case 'F':
748 return "enum";
749 case 'G':
750 return "annotation";
751 case 'I':
752 return "void";
753 case 'J':
754 return "method_handle";
755 case 'K':
756 return "u1[]";
757 case 'L':
758 return "i8[]";
759 case 'M':
760 return "u8[]";
761 case 'N':
762 return "i16[]";
763 case 'O':
764 return "u16[]";
765 case 'P':
766 return "i32[]";
767 case 'Q':
768 return "u32[]";
769 case 'R':
770 return "i64[]";
771 case 'S':
772 return "u64[]";
773 case 'T':
774 return "f32[]";
775 case 'U':
776 return "f64[]";
777 case 'V':
778 return "string[]";
779 case 'W':
780 return "record[]";
781 case 'X':
782 return "method[]";
783 case 'Y':
784 return "enum[]";
785 case 'Z':
786 return "annotation[]";
787 case '@':
788 return "method_handle[]";
789 case '*':
790 return "nullptr string";
791 default:
792 return std::string();
793 }
794 }
795
ScalarValueToString(const panda_file::ScalarValue & value,const std::string & type) const796 std::string Disassembler::ScalarValueToString(const panda_file::ScalarValue &value, const std::string &type) const
797 {
798 std::stringstream ss;
799
800 if (type == "i8") {
801 int8_t res = value.Get<int8_t>();
802 ss << static_cast<int>(res);
803 } else if (type == "u1" || type == "u8") {
804 uint8_t res = value.Get<uint8_t>();
805 ss << static_cast<unsigned int>(res);
806 } else if (type == "i16") {
807 ss << value.Get<int16_t>();
808 } else if (type == "u16") {
809 ss << value.Get<uint16_t>();
810 } else if (type == "i32") {
811 ss << value.Get<int32_t>();
812 } else if (type == "u32") {
813 ss << value.Get<uint32_t>();
814 } else if (type == "i64") {
815 ss << value.Get<int64_t>();
816 } else if (type == "u64") {
817 ss << value.Get<uint64_t>();
818 } else if (type == "f32") {
819 ss << value.Get<float>();
820 } else if (type == "f64") {
821 ss << value.Get<double>();
822 } else if (type == "string") {
823 const auto id = value.Get<panda_file::File::EntityId>();
824 ss << "\"" << StringDataToString(file_->GetStringData(id)) << "\"";
825 } else if (type == "record") {
826 const auto id = value.Get<panda_file::File::EntityId>();
827 auto language = GetClassLanguage(id);
828 ss << GetFullRecordName(id, language);
829 } else if (type == "method") {
830 const auto id = value.Get<panda_file::File::EntityId>();
831 auto language = GetClassLanguage(id);
832 ss << GetFullMethodName(id, language);
833 } else if (type == "enum") {
834 const auto id = value.Get<panda_file::File::EntityId>();
835 panda_file::FieldDataAccessor field_accessor(*file_, id);
836 ss << GetFullRecordName(field_accessor.GetClassId(), pandasm::extensions::Language::PANDA_ASSEMBLY) << "."
837 << StringDataToString(file_->GetStringData(field_accessor.GetNameId()));
838 } else if (type == "annotation") {
839 const auto id = value.Get<panda_file::File::EntityId>();
840 ss << "id_" << id.GetOffset();
841 } else if (type == "void") {
842 return std::string();
843 } else if (type == "method_handle") {
844 }
845
846 return ss.str();
847 }
848
ArrayValueToString(const panda_file::ArrayValue & value,const std::string & type,const size_t idx) const849 std::string Disassembler::ArrayValueToString(const panda_file::ArrayValue &value, const std::string &type,
850 const size_t idx) const
851 {
852 std::stringstream ss;
853
854 if (type == "i8") {
855 int8_t res = value.Get<int8_t>(idx);
856 ss << static_cast<int>(res);
857 } else if (type == "u1" || type == "u8") {
858 uint8_t res = value.Get<uint8_t>(idx);
859 ss << static_cast<unsigned int>(res);
860 } else if (type == "i16") {
861 ss << value.Get<int16_t>(idx);
862 } else if (type == "u16") {
863 ss << value.Get<uint16_t>(idx);
864 } else if (type == "i32") {
865 ss << value.Get<int32_t>(idx);
866 } else if (type == "u32") {
867 ss << value.Get<uint32_t>(idx);
868 } else if (type == "i64") {
869 ss << value.Get<int64_t>(idx);
870 } else if (type == "u64") {
871 ss << value.Get<uint64_t>(idx);
872 } else if (type == "f32") {
873 ss << value.Get<float>(idx);
874 } else if (type == "f64") {
875 ss << value.Get<double>(idx);
876 } else if (type == "string") {
877 const auto id = value.Get<panda_file::File::EntityId>(idx);
878 ss << '\"' << StringDataToString(file_->GetStringData(id)) << '\"';
879 } else if (type == "record") {
880 const auto id = value.Get<panda_file::File::EntityId>(idx);
881 auto language = GetClassLanguage(id);
882 ss << GetFullRecordName(id, language);
883 } else if (type == "method") {
884 const auto id = value.Get<panda_file::File::EntityId>(idx);
885 panda_file::ClassDataAccessor method_accessor {*file_, id};
886 pandasm::extensions::Language language = PFLangToPandasmLang(method_accessor.GetSourceLang());
887 ss << GetFullMethodName(id, language);
888 } else if (type == "enum") {
889 const auto id = value.Get<panda_file::File::EntityId>(idx);
890 panda_file::FieldDataAccessor field_accessor(*file_, id);
891 ss << GetFullRecordName(field_accessor.GetClassId(), pandasm::extensions::Language::PANDA_ASSEMBLY) << "."
892 << StringDataToString(file_->GetStringData(field_accessor.GetNameId()));
893 } else if (type == "annotation") {
894 const auto id = value.Get<panda_file::File::EntityId>(idx);
895 ss << "id_" << id.GetOffset();
896 } else if (type == "method_handle") {
897 } else if (type == "nullptr string") {
898 }
899
900 return ss.str();
901 }
902
GetFullMethodName(const panda_file::File::EntityId & method_id,pandasm::extensions::Language language) const903 std::string Disassembler::GetFullMethodName(const panda_file::File::EntityId &method_id,
904 pandasm::extensions::Language language) const
905 {
906 panda::panda_file::MethodDataAccessor method_accessor(*file_, method_id);
907
908 const auto method_name_raw = StringDataToString(file_->GetStringData(method_accessor.GetNameId()));
909
910 pandasm::Function method(method_name_raw, language);
911 GetParams(&method, method_accessor.GetProtoId());
912 GetMetaData(&method, method_id);
913
914 method.name = pandasm::MangleFunctionName(method.name, method.params, method.return_type);
915 method.name = MakePandasmFriendly(method.name);
916 std::string class_name = GetFullRecordName(method_accessor.GetClassId(), language);
917 if (IsSystemType(class_name)) {
918 class_name = "";
919 } else {
920 class_name += ".";
921 }
922
923 return class_name + method.name;
924 }
925
GetFullRecordName(const panda_file::File::EntityId & class_id,pandasm::extensions::Language language) const926 std::string Disassembler::GetFullRecordName(const panda_file::File::EntityId &class_id,
927 [[maybe_unused]] pandasm::extensions::Language language) const
928 {
929 std::string name = StringDataToString(file_->GetStringData(class_id));
930
931 auto type = pandasm::Type::FromDescriptor(name);
932 type = pandasm::Type(MakePandasmFriendly(type.GetComponentName()), type.GetRank());
933
934 return type.GetName();
935 }
936
GetRecordInfo(const panda_file::File::EntityId & record_id,RecordInfo * record_info) const937 void Disassembler::GetRecordInfo(const panda_file::File::EntityId &record_id, RecordInfo *record_info) const
938 {
939 constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
940
941 if (file_->IsExternal(record_id)) {
942 return;
943 }
944
945 panda_file::ClassDataAccessor class_accessor {*file_, record_id};
946 std::stringstream ss;
947
948 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
949 << class_accessor.GetClassId().GetOffset() << ", size: 0x" << std::setfill('0')
950 << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex << class_accessor.GetSize() << " (" << std::dec
951 << class_accessor.GetSize() << ")";
952
953 record_info->record_info = ss.str();
954 ss.str(std::string());
955
956 class_accessor.EnumerateFields([&](panda_file::FieldDataAccessor &field_accessor) -> void {
957 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
958 << field_accessor.GetFieldId().GetOffset() << ", type: 0x" << std::hex << field_accessor.GetType();
959
960 record_info->fields_info.push_back(ss.str());
961
962 ss.str(std::string());
963 });
964 }
965
GetMethodInfo(const panda_file::File::EntityId & method_id,MethodInfo * method_info) const966 void Disassembler::GetMethodInfo(const panda_file::File::EntityId &method_id, MethodInfo *method_info) const
967 {
968 constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
969
970 panda_file::MethodDataAccessor method_accessor {*file_, method_id};
971 std::stringstream ss;
972
973 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
974 << method_accessor.GetMethodId().GetOffset();
975
976 if (method_accessor.GetCodeId().has_value()) {
977 ss << ", code offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
978 << method_accessor.GetCodeId().value().GetOffset();
979
980 GetInsInfo(method_accessor.GetCodeId().value(), method_info);
981 } else {
982 ss << ", <no code>";
983 }
984
985 method_info->method_info = ss.str();
986 }
987
Serialize(size_t index,const pandasm::LiteralArray & lit_array,std::ostream & os) const988 void Disassembler::Serialize(size_t index, const pandasm::LiteralArray &lit_array, std::ostream &os) const
989 {
990 // remove once literals are supported in assembly_format
991
992 if (lit_array.literals_.empty()) {
993 return;
994 }
995
996 os << ".array array_" << index << " {\n";
997
998 SerializeValues(lit_array, os);
999
1000 os << "}\n";
1001 }
1002
1003 template <class T>
1004 using make_storage = std::conditional_t<std::is_integral_v<T>, std::make_unsigned<T>, std::common_type<T>>;
1005
1006 template <class T>
SerializeArrayValues(const pandasm::LiteralArray & lit_array,std::ostream & os)1007 static void SerializeArrayValues(const pandasm::LiteralArray &lit_array, std::ostream &os)
1008 {
1009 using S = typename make_storage<T>::type;
1010 os << std::get<S>(lit_array.literals_[0].value_);
1011
1012 for (size_t i = 1; i < lit_array.literals_.size(); i++) {
1013 os << ", " << bit_cast<T>(std::get<S>(lit_array.literals_[i].value_));
1014 }
1015 }
1016
SerializeValues(const pandasm::LiteralArray & lit_array,std::ostream & os) const1017 void Disassembler::SerializeValues(const pandasm::LiteralArray &lit_array, std::ostream &os) const
1018 {
1019 panda_file::LiteralTag tag = lit_array.literals_[0].tag_;
1020 switch (tag) {
1021 case panda_file::LiteralTag::ARRAY_I8: {
1022 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1023 os << "\t"
1024 << "i8 " << static_cast<int16_t>(bit_cast<int8_t>(std::get<uint8_t>(lit_array.literals_[i].value_)))
1025 << "\n";
1026 }
1027 break;
1028 }
1029 case panda_file::LiteralTag::ARRAY_I16: {
1030 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1031 os << "\t"
1032 << "i16 " << bit_cast<int16_t>(std::get<uint16_t>(lit_array.literals_[i].value_)) << "\n";
1033 }
1034 break;
1035 }
1036 case panda_file::LiteralTag::ARRAY_I32: {
1037 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1038 os << "\t"
1039 << "i32 " << bit_cast<int32_t>(std::get<uint32_t>(lit_array.literals_[i].value_)) << "\n";
1040 }
1041 break;
1042 }
1043 case panda_file::LiteralTag::ARRAY_I64: {
1044 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1045 os << "\t"
1046 << "i64 " << bit_cast<int64_t>(std::get<uint64_t>(lit_array.literals_[i].value_)) << "\n";
1047 }
1048 break;
1049 }
1050 case panda_file::LiteralTag::ARRAY_F64: {
1051 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1052 os << "\t"
1053 << "f64 " << std::get<double>(lit_array.literals_[i].value_) << "\n";
1054 }
1055 break;
1056 }
1057 case panda_file::LiteralTag::ARRAY_F32: {
1058 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1059 os << "\t"
1060 << "f32 " << std::get<float>(lit_array.literals_[i].value_) << "\n";
1061 }
1062 break;
1063 }
1064 case panda_file::LiteralTag::ARRAY_STRING: {
1065 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1066 os << "\t"
1067 << "string " << std::get<std::string>(lit_array.literals_[i].value_) << "\n";
1068 }
1069 break;
1070 }
1071 case panda_file::LiteralTag::BOOL: {
1072 os << "\t"
1073 << "bool " << std::get<bool>(lit_array.literals_[0].value_)
1074 << "\n";
1075 break;
1076 }
1077 case panda_file::LiteralTag::INTEGER: {
1078 os << "\t" << "i32 " << bit_cast<int32_t>(std::get<uint32_t>(lit_array.literals_[0].value_)) << "\n";
1079 break;
1080 }
1081 case panda_file::LiteralTag::DOUBLE: {
1082 os << "\t" << "i32 " << std::get<double>(lit_array.literals_[0].value_) << "\n";
1083 break;
1084 }
1085 case panda_file::LiteralTag::STRING: {
1086 os << "\t" << "string " << std::get<std::string>(lit_array.literals_[0].value_) << "\n";
1087 break;
1088 }
1089 case panda_file::LiteralTag::METHOD: {
1090 os << "\t" << "method " << std::get<std::string>(lit_array.literals_[0].value_) << "\n";
1091 break;
1092 }
1093 case panda_file::LiteralTag::GENERATORMETHOD: {
1094 os << "\t" << "generator_method " << std::get<std::string>(lit_array.literals_[0].value_) << "\n";
1095 break;
1096 }
1097 case panda_file::LiteralTag::ACCESSOR: {
1098 os << "\t"
1099 << "accessor "
1100 << static_cast<int16_t>(bit_cast<int8_t>(std::get<uint8_t>(lit_array.literals_[0].value_))) << "\n";
1101 break;
1102 }
1103 case panda_file::LiteralTag::METHODAFFILIATE: {
1104 os << "\t" << "method_affiliate " << std::get<uint16_t>(lit_array.literals_[0].value_) << "\n";
1105 break;
1106 }
1107 case panda_file::LiteralTag::NULLVALUE: {
1108 os << "\t"
1109 << "null_value "
1110 << static_cast<int16_t>(bit_cast<int8_t>(std::get<uint8_t>(lit_array.literals_[0].value_))) << "\n";
1111 break;
1112 }
1113 default:
1114 break;
1115 }
1116 }
1117
Serialize(const pandasm::Record & record,std::ostream & os,bool print_information) const1118 void Disassembler::Serialize(const pandasm::Record &record, std::ostream &os, bool print_information) const
1119 {
1120 if (IsSystemType(record.name)) {
1121 return;
1122 }
1123
1124 os << ".record " << record.name;
1125
1126 const auto record_iter = prog_j_ann_.record_annotations.find(record.name);
1127 const bool record_in_table = record_iter != prog_j_ann_.record_annotations.end();
1128 if (record_in_table) {
1129 Serialize(*record.metadata, record_iter->second.ann_list, os);
1130 } else {
1131 Serialize(*record.metadata, {}, os);
1132 }
1133
1134 if (record.metadata->IsForeign()) {
1135 os << "\n\n";
1136 return;
1137 }
1138
1139 os << " {\n";
1140
1141 if (print_information && prog_info_.records_info.find(record.name) != prog_info_.records_info.end()) {
1142 os << " # " << prog_info_.records_info.at(record.name).record_info << "\n";
1143 SerializeFields(record, os, true);
1144 } else {
1145 SerializeFields(record, os, false);
1146 }
1147
1148 os << "}\n\n";
1149 }
1150
SerializeFields(const pandasm::Record & record,std::ostream & os,bool print_information) const1151 void Disassembler::SerializeFields(const pandasm::Record &record, std::ostream &os, bool print_information) const
1152 {
1153 constexpr size_t INFO_OFFSET = 80;
1154
1155 const auto record_iter = prog_j_ann_.record_annotations.find(record.name);
1156 const bool record_in_table = record_iter != prog_j_ann_.record_annotations.end();
1157 const auto rec_inf = (print_information) ? (prog_info_.records_info.at(record.name)) : (RecordInfo {});
1158 size_t field_idx = 0;
1159
1160 std::stringstream ss;
1161 for (const auto &f : record.field_list) {
1162 ss << "\t" << f.type.GetName() << " " << f.name;
1163 if (record_in_table) {
1164 const auto field_iter = record_iter->second.field_annotations.find(f.name);
1165 if (field_iter != record_iter->second.field_annotations.end()) {
1166 Serialize(*f.metadata, field_iter->second, ss);
1167 } else {
1168 Serialize(*f.metadata, {}, ss);
1169 }
1170 } else {
1171 Serialize(*f.metadata, {}, ss);
1172 }
1173
1174 if (print_information) {
1175 os << std::setw(INFO_OFFSET) << std::left << ss.str() << " # " << rec_inf.fields_info.at(field_idx) << "\n";
1176 } else {
1177 os << ss.str() << "\n";
1178 }
1179
1180 ss.str(std::string());
1181 ss.clear();
1182
1183 field_idx++;
1184 }
1185 }
1186
Serialize(const pandasm::Function & method,std::ostream & os,bool print_information) const1187 void Disassembler::Serialize(const pandasm::Function &method, std::ostream &os, bool print_information) const
1188 {
1189 os << ".function " << method.return_type.GetName() << " " << method.name << "(";
1190
1191 if (method.params.size() > 0) {
1192 os << method.params[0].type.GetName() << " a0";
1193
1194 for (uint8_t i = 1; i < method.params.size(); i++) {
1195 os << ", " << method.params[i].type.GetName() << " a" << (size_t)i;
1196 }
1197 }
1198 os << ")";
1199
1200 const auto method_iter = prog_j_ann_.method_annotations.find(method.name);
1201 if (method_iter != prog_j_ann_.method_annotations.end()) {
1202 Serialize(*method.metadata, method_iter->second, os);
1203 } else {
1204 Serialize(*method.metadata, {}, os);
1205 }
1206
1207 if (!method.HasImplementation()) {
1208 os << "\n\n";
1209 return;
1210 }
1211
1212 if (print_information && prog_info_.methods_info.find(method.name) != prog_info_.methods_info.end()) {
1213 const auto method_info = prog_info_.methods_info.at(method.name);
1214
1215 size_t width = 0;
1216 for (const auto &i : method.ins) {
1217 if (i.ToString().size() > width) {
1218 width = i.ToString().size();
1219 }
1220 }
1221
1222 os << " { # " << method_info.method_info << "\n";
1223
1224 for (size_t i = 0; i < method.ins.size(); i++) {
1225 os << "\t" << std::setw(width) << std::left << method.ins.at(i).ToString("", true, method.regs_num) << " # "
1226 << method_info.instructions_info.at(i) << "\n";
1227 }
1228 } else {
1229 os << " {\n";
1230
1231 for (const auto &i : method.ins) {
1232 os << "\t" << i.ToString("", true, method.regs_num) << "\n";
1233 }
1234 }
1235
1236 if (method.catch_blocks.size() != 0) {
1237 os << "\n";
1238
1239 for (const auto &catch_block : method.catch_blocks) {
1240 Serialize(catch_block, os);
1241
1242 os << "\n";
1243 }
1244 }
1245
1246 os << "}\n\n";
1247 }
1248
Serialize(const pandasm::Function::CatchBlock & catch_block,std::ostream & os) const1249 void Disassembler::Serialize(const pandasm::Function::CatchBlock &catch_block, std::ostream &os) const
1250 {
1251 if (catch_block.exception_record == "") {
1252 os << ".catchall ";
1253 } else {
1254 os << ".catch " << catch_block.exception_record << ", ";
1255 }
1256
1257 os << catch_block.try_begin_label << ", " << catch_block.try_end_label << ", " << catch_block.catch_begin_label;
1258
1259 if (catch_block.catch_end_label != "") {
1260 os << ", " << catch_block.catch_end_label;
1261 }
1262 }
1263
Serialize(const pandasm::ItemMetadata & meta,const AnnotationList & ann_list,std::ostream & os) const1264 void Disassembler::Serialize(const pandasm::ItemMetadata &meta, const AnnotationList &ann_list, std::ostream &os) const
1265 {
1266 auto bool_attributes = meta.GetBoolAttributes();
1267 auto attributes = meta.GetAttributes();
1268
1269 if (bool_attributes.empty() && attributes.empty() && ann_list.empty()) {
1270 return;
1271 }
1272
1273 os << " <";
1274
1275 size_t size = bool_attributes.size();
1276 size_t idx = 0;
1277 for (const auto &attr : bool_attributes) {
1278 os << attr;
1279 ++idx;
1280
1281 if (!attributes.empty() || !ann_list.empty() || idx < size) {
1282 os << ", ";
1283 }
1284 }
1285
1286 size = attributes.size();
1287 idx = 0;
1288 for (const auto &[key, values] : attributes) {
1289 for (size_t i = 0; i < values.size(); i++) {
1290 os << key << "=" << values[i];
1291
1292 if (i < values.size() - 1) {
1293 os << ", ";
1294 }
1295 }
1296
1297 ++idx;
1298
1299 if (!ann_list.empty() || idx < size) {
1300 os << ", ";
1301 }
1302 }
1303
1304 size = ann_list.size();
1305 idx = 0;
1306 for (const auto &[key, value] : ann_list) {
1307 os << key << "=" << value;
1308
1309 ++idx;
1310
1311 if (idx < size) {
1312 os << ", ";
1313 }
1314 }
1315
1316 os << ">";
1317 }
1318
SerializeLanguage(std::ostream & os) const1319 void Disassembler::SerializeLanguage(std::ostream &os) const
1320 {
1321 std::string lang = pandasm::extensions::LanguageToString(file_language_);
1322 if (!lang.empty()) {
1323 os << ".language " << lang << "\n\n";
1324 }
1325 }
1326
PFLangToPandasmLang(const std::optional<panda_file::SourceLang> & language) const1327 pandasm::extensions::Language Disassembler::PFLangToPandasmLang(
1328 const std::optional<panda_file::SourceLang> &language) const
1329 {
1330 const auto lang = language.value_or(panda_file::SourceLang::PANDA_ASSEMBLY);
1331 switch (lang) {
1332 case panda_file::SourceLang::ECMASCRIPT:
1333 return pandasm::extensions::Language::ECMASCRIPT;
1334 case panda_file::SourceLang::PANDA_ASSEMBLY:
1335 [[fallthrough]];
1336 default:
1337 return pandasm::extensions::Language::PANDA_ASSEMBLY;
1338 }
1339 }
1340
StringDataToString(panda_file::File::StringData sd) const1341 std::string Disassembler::StringDataToString(panda_file::File::StringData sd) const
1342 {
1343 std::string res((char *)sd.data);
1344 return res;
1345 }
1346
BytecodeOpcodeToPandasmOpcode(uint8_t o) const1347 pandasm::Opcode Disassembler::BytecodeOpcodeToPandasmOpcode(uint8_t o) const
1348 {
1349 return BytecodeOpcodeToPandasmOpcode(BytecodeInstruction::Opcode(o));
1350 }
1351
IDToString(BytecodeInstruction bc_ins,panda_file::File::EntityId method_id,pandasm::extensions::Language language) const1352 std::string Disassembler::IDToString(BytecodeInstruction bc_ins, panda_file::File::EntityId method_id,
1353 pandasm::extensions::Language language) const
1354 {
1355 std::stringstream name;
1356
1357 if (bc_ins.HasFlag(BytecodeInstruction::Flags::TYPE_ID)) {
1358 auto idx = bc_ins.GetId().AsIndex();
1359 auto id = file_->ResolveClassIndex(method_id, idx);
1360 name << StringDataToString(file_->GetStringData(id));
1361
1362 auto type = pandasm::Type::FromDescriptor(name.str());
1363 type = pandasm::Type(MakePandasmFriendly(type.GetComponentName()), type.GetRank());
1364
1365 name.str("");
1366 name << type.GetName();
1367 } else if (bc_ins.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) {
1368 auto idx = bc_ins.GetId().AsIndex();
1369 auto id = file_->ResolveMethodIndex(method_id, idx);
1370 panda_file::MethodDataAccessor method_accessor(*file_, id);
1371
1372 name << GetFullMethodName(method_accessor.GetMethodId(), language);
1373 } else if (bc_ins.HasFlag(BytecodeInstruction::Flags::STRING_ID)) {
1374 name << '\"';
1375
1376 if (skip_strings_ || quiet_) {
1377 name << std::hex << "0x" << bc_ins.GetId().AsFileId();
1378 } else {
1379 name << StringDataToString(file_->GetStringData(bc_ins.GetId().AsFileId()));
1380 }
1381
1382 name << '\"';
1383 } else if (bc_ins.HasFlag(BytecodeInstruction::Flags::FIELD_ID)) {
1384 auto idx = bc_ins.GetId().AsIndex();
1385 auto id = file_->ResolveFieldIndex(method_id, idx);
1386 panda_file::FieldDataAccessor field_accessor(*file_, id);
1387
1388 name << GetFullRecordName(field_accessor.GetClassId(), language);
1389 name << '.';
1390 name << StringDataToString(file_->GetStringData(field_accessor.GetNameId()));
1391 } else if (bc_ins.HasFlag(BytecodeInstruction::Flags::LITERALARRAY_ID)) {
1392 panda_file::LiteralDataAccessor lit_array_accessor(*file_, file_->GetLiteralArraysId());
1393 auto idx = bc_ins.GetId().AsFileId().GetOffset();
1394
1395 name << idx;
1396 }
1397
1398 return name.str();
1399 }
1400
GetBytecodeInstructionNumber(BytecodeInstruction bc_ins_first,BytecodeInstruction bc_ins_cur) const1401 size_t Disassembler::GetBytecodeInstructionNumber(BytecodeInstruction bc_ins_first,
1402 BytecodeInstruction bc_ins_cur) const
1403 {
1404 size_t count = 0;
1405
1406 while (bc_ins_first.GetAddress() != bc_ins_cur.GetAddress()) {
1407 count++;
1408 bc_ins_first = bc_ins_first.GetNext();
1409 if (bc_ins_first.GetAddress() > bc_ins_cur.GetAddress()) {
1410 return std::numeric_limits<size_t>::max();
1411 }
1412 }
1413
1414 return count;
1415 }
1416
GetClassLanguage(panda_file::File::EntityId class_id) const1417 pandasm::extensions::Language Disassembler::GetClassLanguage(panda_file::File::EntityId class_id) const
1418 {
1419 if (file_->IsExternal(class_id)) {
1420 return pandasm::extensions::Language::PANDA_ASSEMBLY;
1421 }
1422
1423 panda_file::ClassDataAccessor cda(*file_, class_id);
1424 return PFLangToPandasmLang(cda.GetSourceLang());
1425 }
1426
GetInstructions(pandasm::Function * method,panda_file::File::EntityId method_id,panda_file::File::EntityId code_id) const1427 IdList Disassembler::GetInstructions(pandasm::Function *method, panda_file::File::EntityId method_id,
1428 panda_file::File::EntityId code_id) const
1429 {
1430 panda_file::CodeDataAccessor code_accessor(*file_, code_id);
1431
1432 const auto ins_sz = code_accessor.GetCodeSize();
1433 const auto ins_arr = code_accessor.GetInstructions();
1434
1435 method->regs_num = code_accessor.GetNumVregs();
1436
1437 auto bc_ins = BytecodeInstruction(ins_arr);
1438 const auto bc_ins_last = bc_ins.JumpTo(ins_sz);
1439
1440 LabelTable label_table = GetExceptions(method, method_id, code_id);
1441
1442 IdList unknown_external_methods {};
1443
1444 while (bc_ins.GetAddress() != bc_ins_last.GetAddress()) {
1445 if (bc_ins.GetAddress() > bc_ins_last.GetAddress()) {
1446 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << std::dec << code_id.GetOffset() << " ("
1447 << "0x" << std::hex << code_id.GetOffset()
1448 << "). bytecode instructions sequence corrupted for method " << method->name
1449 << "! went out of bounds";
1450
1451 break;
1452 }
1453
1454 auto pa_ins = BytecodeInstructionToPandasmInstruction(bc_ins, method_id, method->language);
1455 // alter instructions operands depending on instruction type
1456 if (pa_ins.IsConditionalJump() || pa_ins.IsJump()) {
1457 const int32_t jmp_offset = std::get<int64_t>(pa_ins.imms.at(0));
1458 const auto bc_ins_dest = bc_ins.JumpTo(jmp_offset);
1459 if (bc_ins_last.GetAddress() > bc_ins_dest.GetAddress()) {
1460 size_t idx = GetBytecodeInstructionNumber(BytecodeInstruction(ins_arr), bc_ins_dest);
1461
1462 if (idx != std::numeric_limits<size_t>::max()) {
1463 if (label_table.find(idx) == label_table.end()) {
1464 std::stringstream ss {};
1465 ss << "jump_label_" << label_table.size();
1466 label_table[idx] = ss.str();
1467 }
1468
1469 pa_ins.imms.clear();
1470 pa_ins.ids.push_back(label_table[idx]);
1471 } else {
1472 LOG(ERROR, DISASSEMBLER)
1473 << "> error encountered at " << std::dec << code_id.GetOffset() << " ("
1474 << "0x" << std::hex << code_id.GetOffset() << "). incorrect instruction at offset "
1475 << (bc_ins.GetAddress() - ins_arr) << ": invalid jump offset " << jmp_offset
1476 << " - jumping in the middle of another instruction!";
1477 }
1478 } else {
1479 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << std::dec << code_id.GetOffset() << " ("
1480 << "0x" << std::hex << code_id.GetOffset()
1481 << "). incorrect instruction at offset: " << (bc_ins.GetAddress() - ins_arr)
1482 << ": invalid jump offset " << jmp_offset << " - jumping out of bounds!";
1483 }
1484 }
1485
1486 // check if method id is unknown external method. if so, emplace it in table
1487 if (bc_ins.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) {
1488 const auto arg_method_idx = bc_ins.GetId().AsIndex();
1489 const auto arg_method_id = file_->ResolveMethodIndex(method_id, arg_method_idx);
1490
1491 const auto arg_method_name = GetFullMethodName(arg_method_id, method->language);
1492
1493 const bool is_present = prog_.function_table.find(arg_method_name) != prog_.function_table.cend();
1494 const bool is_external = file_->IsExternal(arg_method_id);
1495 if (is_external && !is_present) {
1496 unknown_external_methods.push_back(arg_method_id);
1497 }
1498 }
1499
1500 method->ins.push_back(pa_ins);
1501 bc_ins = bc_ins.GetNext();
1502 }
1503
1504 for (const auto &pair : label_table) {
1505 method->ins[pair.first].label = pair.second;
1506 method->ins[pair.first].set_label = true;
1507 }
1508
1509 return unknown_external_methods;
1510 }
1511
1512 } // namespace panda::disasm
1513