• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "disassembler.h"
17 #include "mangling.h"
18 #include "utils/logger.h"
19 
20 #include <iomanip>
21 
22 namespace panda::disasm {
23 
Disassembler(Disassembler && that)24 Disassembler::Disassembler(Disassembler &&that)
25 {
26     this->file_ = std::move(that.file_);
27 
28     this->prog_ = std::move(that.prog_);
29 
30     this->file_language_ = std::move(that.file_language_);
31 
32     this->record_name_to_id_ = std::move(that.record_name_to_id_);
33     this->method_name_to_id_ = std::move(that.method_name_to_id_);
34 
35     this->skip_strings_ = std::move(that.skip_strings_);
36     this->quiet_ = std::move(that.quiet_);
37 
38     this->prog_info_ = std::move(that.prog_info_);
39     this->prog_j_ann_ = std::move(that.prog_j_ann_);
40 }
41 
Disassemble(const std::string & filename_in,bool quiet,bool skip_strings)42 void Disassembler::Disassemble(const std::string &filename_in, bool quiet, bool skip_strings)
43 {
44     auto file_new = panda_file::File::Open(filename_in);
45     file_.swap(file_new);
46 
47     if (file_ != nullptr) {
48         prog_ = pandasm::Program {};
49 
50         record_name_to_id_.clear();
51         method_name_to_id_.clear();
52 
53         skip_strings_ = skip_strings;
54         quiet_ = quiet;
55 
56         prog_info_ = ProgInfo {};
57         prog_j_ann_ = ProgJavaAnnotations {};
58 
59         GetLiteralArrays();
60         GetRecords();
61 
62         GetLanguageSpecificMetadata();
63     } else {
64         LOG(ERROR, DISASSEMBLER) << "> Failed to open the specified pandafile: <" << filename_in << ">";
65     }
66 }
67 
CollectInfo()68 void Disassembler::CollectInfo()
69 {
70     LOG(DEBUG, DISASSEMBLER) << "\n[getting program info]\n";
71 
72     for (const auto &pair : record_name_to_id_) {
73         GetRecordInfo(pair.second, &prog_info_.records_info[pair.first]);
74     }
75 
76     for (const auto &pair : method_name_to_id_) {
77         GetMethodInfo(pair.second, &prog_info_.methods_info[pair.first]);
78     }
79 }
80 
Serialize(std::ostream & os,bool add_separators,bool print_information) const81 void Disassembler::Serialize(std::ostream &os, bool add_separators, bool print_information) const
82 {
83     if (os.bad()) {
84         LOG(DEBUG, DISASSEMBLER) << "> serialization failed. os bad\n";
85         return;
86     }
87 
88     if (file_ != nullptr) {
89         os << "#\n# source binary: " << file_->GetFilename() << "\n#\n\n";
90     }
91 
92     SerializeLanguage(os);
93 
94     if (add_separators) {
95         os << "# ====================\n"
96               "# LITERALS\n\n";
97     }
98 
99     LOG(DEBUG, DISASSEMBLER) << "[serializing literals]";
100 
101     size_t index = 0;
102     for (const auto &pair : prog_.literalarray_table) {
103         Serialize(index++, pair.second, os);
104     }
105 
106     os << "\n";
107 
108     if (add_separators) {
109         os << "# ====================\n"
110               "# RECORDS\n\n";
111     }
112 
113     LOG(DEBUG, DISASSEMBLER) << "[serializing records]";
114 
115     for (const auto &r : prog_.record_table) {
116         Serialize(r.second, os, print_information);
117     }
118 
119     if (add_separators) {
120         os << "# ====================\n"
121               "# METHODS\n\n";
122     }
123 
124     LOG(DEBUG, DISASSEMBLER) << "[serializing methods]";
125 
126     for (const auto &m : prog_.function_table) {
127         Serialize(m.second, os, print_information);
128     }
129 }
130 
IsPandasmFriendly(const char c)131 inline bool Disassembler::IsPandasmFriendly(const char c)
132 {
133     return isalnum(c) || c == '_';
134 }
135 
IsSystemType(const std::string & type_name)136 inline bool Disassembler::IsSystemType(const std::string &type_name)
137 {
138     bool is_array_type = (type_name.find('[') != std::string::npos);
139     bool is_global = (type_name == "_GLOBAL");
140 
141     return is_array_type || is_global;
142 }
143 
MakePandasmFriendly(const std::string & str)144 std::string Disassembler::MakePandasmFriendly(const std::string &str)
145 {
146     auto str_new = str;
147     std::replace_if(
148         str_new.begin(), str_new.end(), [](const char c) { return !IsPandasmFriendly(c); }, '_');
149 
150     return str_new;
151 }
152 
GetRecord(pandasm::Record * record,const panda_file::File::EntityId & record_id)153 void Disassembler::GetRecord(pandasm::Record *record, const panda_file::File::EntityId &record_id)
154 {
155     LOG(DEBUG, DISASSEMBLER) << "\n[getting record]\nid: " << record_id.GetOffset();
156 
157     if (record == nullptr) {
158         LOG(ERROR, DISASSEMBLER) << "> nullptr received!";
159         return;
160     }
161 
162     auto language = GetClassLanguage(record_id);
163     record->name = GetFullRecordName(record_id, language);
164 
165     LOG(DEBUG, DISASSEMBLER) << "name: " << record->name;
166 
167     GetMetaData(record, record_id);
168 
169     if (!file_->IsExternal(record_id)) {
170         GetMethods(record_id);
171         GetFields(record, record_id);
172     }
173 }
174 
GetMethod(pandasm::Function * method,const panda_file::File::EntityId & method_id)175 void Disassembler::GetMethod(pandasm::Function *method, const panda_file::File::EntityId &method_id)
176 {
177     LOG(DEBUG, DISASSEMBLER) << "\n[getting method]\nid: " << method_id.GetOffset();
178 
179     if (method == nullptr) {
180         LOG(ERROR, DISASSEMBLER) << "> nullptr received!";
181         return;
182     }
183 
184     panda_file::MethodDataAccessor method_accessor(*file_, method_id);
185     pandasm::extensions::Language language = PFLangToPandasmLang(method_accessor.GetSourceLang());
186 
187     method->name = GetFullMethodName(method_id, language);
188 
189     LOG(DEBUG, DISASSEMBLER) << "name: " << method->name;
190 
191     GetParams(method, method_accessor.GetProtoId());
192     GetMetaData(method, method_id);
193 
194     if (method->HasImplementation()) {
195         if (method_accessor.GetCodeId().has_value()) {
196             const IdList id_list = GetInstructions(method, method_id, method_accessor.GetCodeId().value());
197 
198             for (const auto &id : id_list) {
199                 pandasm::Function new_method("", language);
200                 GetMethod(&new_method, id);
201 
202                 method_name_to_id_.emplace(new_method.name, id);
203                 prog_.function_table.emplace(new_method.name, std::move(new_method));
204             }
205         } else {
206             LOG(ERROR, DISASSEMBLER) << "> error encountered at " << std::dec << method_id << " ("
207                                      << "0x" << std::hex << method_id
208                                      << "). Implementation of method is expected, but no \'CODE\' tag was found";
209         }
210     }
211 }
212 
213 template <typename T>
FillLiteralArrayData(pandasm::LiteralArray * lit_array,const panda_file::LiteralTag & tag,const panda_file::LiteralDataAccessor::LiteralValue & value) const214 void Disassembler::FillLiteralArrayData(pandasm::LiteralArray *lit_array, const panda_file::LiteralTag &tag,
215                                         const panda_file::LiteralDataAccessor::LiteralValue &value) const
216 {
217     panda_file::File::EntityId id(std::get<uint32_t>(value));
218     auto sp = file_->GetSpanFromId(id);
219     // CODECHECK-NOLINTNEXTLINE(C_RULE_ID_HORIZON_SPACE)
220     auto len = panda_file::helpers::Read<sizeof(uint32_t)>(&sp);
221     if (tag != panda_file::LiteralTag::ARRAY_STRING) {
222         for (size_t i = 0; i < len; i++) {
223             pandasm::LiteralArray::Literal lit;
224             lit.tag_ = tag;
225             lit.value_ = bit_cast<T>(panda_file::helpers::Read<sizeof(T)>(&sp));
226             lit_array->literals_.push_back(lit);
227         }
228         return;
229     }
230     for (size_t i = 0; i < len; i++) {
231         // CODECHECK-NOLINTNEXTLINE(C_RULE_ID_HORIZON_SPACE)
232         auto str_id = panda_file::helpers::Read<sizeof(T)>(&sp);
233         pandasm::LiteralArray::Literal lit;
234         lit.tag_ = tag;
235         lit.value_ = StringDataToString(file_->GetStringData(panda_file::File::EntityId(str_id)));
236         lit_array->literals_.push_back(lit);
237     }
238 }
239 
FillLiteralData(pandasm::LiteralArray * lit_array,const panda_file::LiteralDataAccessor::LiteralValue & value,const panda_file::LiteralTag & tag) const240 void Disassembler::FillLiteralData(pandasm::LiteralArray *lit_array,
241                                    const panda_file::LiteralDataAccessor::LiteralValue &value,
242                                    const panda_file::LiteralTag &tag) const
243 {
244     pandasm::LiteralArray::Literal lit;
245     lit.tag_ = tag;
246     switch (tag) {
247         case panda_file::LiteralTag::BOOL: {
248             lit.value_ = std::get<bool>(value);
249             break;
250         }
251         case panda_file::LiteralTag::ACCESSOR:
252         case panda_file::LiteralTag::NULLVALUE: {
253             lit.value_ = std::get<uint8_t>(value);
254             break;
255         }
256         case panda_file::LiteralTag::METHODAFFILIATE: {
257             lit.value_ = std::get<uint16_t>(value);
258             break;
259         }
260         case panda_file::LiteralTag::INTEGER: {
261             lit.value_ = std::get<uint32_t>(value);
262             break;
263         }
264         case panda_file::LiteralTag::DOUBLE: {
265             lit.value_ = std::get<double>(value);
266             break;
267         }
268         case panda_file::LiteralTag::STRING:
269         case panda_file::LiteralTag::METHOD:
270         case panda_file::LiteralTag::GENERATORMETHOD: {
271             auto str_data = file_->GetStringData(panda_file::File::EntityId(std::get<uint32_t>(value)));
272             lit.value_ = StringDataToString(str_data);
273             break;
274         }
275         case panda_file::LiteralTag::TAGVALUE: {
276             return;
277         }
278         default: {
279             UNREACHABLE();
280         }
281     }
282     lit_array->literals_.push_back(lit);
283 }
284 
GetLiteralArray(pandasm::LiteralArray * lit_array,const size_t index) const285 void Disassembler::GetLiteralArray(pandasm::LiteralArray *lit_array, const size_t index) const
286 {
287     LOG(DEBUG, DISASSEMBLER) << "\n[getting literal array]\nindex: " << index;
288 
289     panda_file::LiteralDataAccessor lit_array_accessor(*file_, file_->GetLiteralArraysId());
290 
291     lit_array_accessor.EnumerateLiteralVals(
292         index, [this, lit_array](const panda_file::LiteralDataAccessor::LiteralValue &value,
293                                  const panda_file::LiteralTag &tag) {
294             switch (tag) {
295                 case panda_file::LiteralTag::ARRAY_I8: {
296                     FillLiteralArrayData<uint8_t>(lit_array, tag, value);
297                     break;
298                 }
299                 case panda_file::LiteralTag::ARRAY_I16: {
300                     FillLiteralArrayData<uint16_t>(lit_array, tag, value);
301                     break;
302                 }
303                 case panda_file::LiteralTag::ARRAY_I32: {
304                     FillLiteralArrayData<uint32_t>(lit_array, tag, value);
305                     break;
306                 }
307                 case panda_file::LiteralTag::ARRAY_I64: {
308                     FillLiteralArrayData<uint64_t>(lit_array, tag, value);
309                     break;
310                 }
311                 case panda_file::LiteralTag::ARRAY_F32: {
312                     FillLiteralArrayData<float>(lit_array, tag, value);
313                     break;
314                 }
315                 case panda_file::LiteralTag::ARRAY_F64: {
316                     FillLiteralArrayData<double>(lit_array, tag, value);
317                     break;
318                 }
319                 case panda_file::LiteralTag::ARRAY_STRING: {
320                     FillLiteralArrayData<uint32_t>(lit_array, tag, value);
321                     break;
322                 }
323                 default: {
324                     FillLiteralData(lit_array, value, tag);
325                     break;
326                 }
327             }
328         });
329 }
330 
GetLiteralArrays()331 void Disassembler::GetLiteralArrays()
332 {
333     const auto lit_arrays_id = file_->GetLiteralArraysId();
334 
335     LOG(DEBUG, DISASSEMBLER) << "\n[getting literal arrays]\nid: " << lit_arrays_id.GetOffset() << "\n";
336 
337     panda_file::LiteralDataAccessor lit_array_accessor(*file_, lit_arrays_id);
338     size_t num_litarrays = lit_array_accessor.GetLiteralNum();
339     for (size_t index = 0; index < num_litarrays; index++) {
340         panda::pandasm::LiteralArray lit_ar;
341         GetLiteralArray(&lit_ar, index);
342         prog_.literalarray_table.emplace(std::to_string(index), lit_ar);
343     }
344 }
345 
GetRecords()346 void Disassembler::GetRecords()
347 {
348     LOG(DEBUG, DISASSEMBLER) << "\n[getting records]\n";
349 
350     const auto class_idx = file_->GetClasses();
351     for (size_t i = 0; i < class_idx.size(); i++) {
352         uint32_t id = class_idx[i];
353 
354         if (id > file_->GetHeader()->file_size) {
355             LOG(ERROR, DISASSEMBLER) << "> error encountered at " << std::dec
356                                      << file_->GetHeader()->class_idx_off + sizeof(uint32_t) * i << " ("
357                                      << "0x" << std::hex << file_->GetHeader()->class_idx_off + sizeof(uint32_t) * i
358                                      << "). binary file corrupted. record offset (" << id << ") out of bounds ("
359                                      << file_->GetHeader()->file_size << ")!";
360             break;
361         }
362 
363         const panda_file::File::EntityId record_id {id};
364         auto language = GetClassLanguage(record_id);
365         if (language != file_language_ && file_language_ != pandasm::extensions::Language::ECMASCRIPT) {
366             if (file_language_ == pandasm::extensions::Language::PANDA_ASSEMBLY) {
367                 file_language_ = language;
368             } else {
369                 LOG(ERROR, DISASSEMBLER) << "> possible error encountered at " << std::dec
370                                          << file_->GetHeader()->class_idx_off + sizeof(uint32_t) * i << " ("
371                                          << "0x" << std::hex << file_->GetHeader()->class_idx_off + sizeof(uint32_t) * i
372                                          << "). record's language differs from file's language (or is default)!";
373             }
374         }
375 
376         pandasm::Record record("", language);
377         GetRecord(&record, record_id);
378 
379         if (prog_.record_table.find(record.name) == prog_.record_table.end()) {
380             record_name_to_id_.emplace(record.name, record_id);
381             prog_.record_table.emplace(record.name, std::move(record));
382         }
383     }
384 }
385 
GetFields(pandasm::Record * record,const panda_file::File::EntityId & record_id)386 void Disassembler::GetFields(pandasm::Record *record, const panda_file::File::EntityId &record_id)
387 {
388     panda_file::ClassDataAccessor class_accessor {*file_, record_id};
389 
390     class_accessor.EnumerateFields([&](panda_file::FieldDataAccessor &field_accessor) -> void {
391         pandasm::Field field(record->language);
392 
393         panda_file::File::EntityId field_name_id = field_accessor.GetNameId();
394         field.name = StringDataToString(file_->GetStringData(field_name_id));
395 
396         uint32_t field_type = field_accessor.GetType();
397         field.type = FieldTypeToPandasmType(field_type);
398 
399         GetMetaData(&field, field_accessor.GetFieldId());
400 
401         record->field_list.push_back(std::move(field));
402     });
403 }
404 
GetMethods(const panda_file::File::EntityId & record_id)405 void Disassembler::GetMethods(const panda_file::File::EntityId &record_id)
406 {
407     panda_file::ClassDataAccessor class_accessor {*file_, record_id};
408 
409     pandasm::extensions::Language language = PFLangToPandasmLang(class_accessor.GetSourceLang());
410 
411     class_accessor.EnumerateMethods([&](panda_file::MethodDataAccessor &method_accessor) -> void {
412         const auto method_id = method_accessor.GetMethodId();
413 
414         pandasm::Function method("", language);
415         GetMethod(&method, method_id);
416 
417         if (prog_.function_table.find(method.name) == prog_.function_table.end()) {
418             method_name_to_id_.emplace(method.name, method_id);
419             prog_.function_table.emplace(method.name, std::move(method));
420         }
421     });
422 }
423 
GetParams(pandasm::Function * method,const panda_file::File::EntityId & proto_id) const424 void Disassembler::GetParams(pandasm::Function *method, const panda_file::File::EntityId &proto_id) const
425 {
426     /**
427      * frame size - 2^16 - 1
428      */
429     static const uint32_t MAX_ARG_NUM = 0xFFFF;
430 
431     LOG(DEBUG, DISASSEMBLER) << "[getting params]\nproto id: " << proto_id.GetOffset();
432 
433     if (method == nullptr) {
434         LOG(ERROR, DISASSEMBLER) << "> nullptr received!";
435         return;
436     }
437 
438     panda_file::ProtoDataAccessor proto_accessor(*file_, proto_id);
439 
440     auto params_num = proto_accessor.GetNumArgs();
441     if (params_num > MAX_ARG_NUM) {
442         LOG(ERROR, DISASSEMBLER) << "> error encountered at " << std::dec << proto_id.GetOffset() << " ("
443                                  << "0x" << std::hex << proto_id.GetOffset() << "). number of function's arguments ("
444                                  << params_num << ") exceeds MAX_ARG_NUM (" << MAX_ARG_NUM << ") !";
445 
446         return;
447     }
448 
449     size_t ref_idx = 0;
450     method->return_type = PFTypeToPandasmType(proto_accessor.GetReturnType(), proto_accessor, ref_idx);
451 
452     for (uint8_t i = 0; i < params_num; i++) {
453         auto arg_type = PFTypeToPandasmType(proto_accessor.GetArgType(i), proto_accessor, ref_idx);
454         method->params.push_back(pandasm::Function::Parameter(arg_type, method->language));
455     }
456 }
457 
GetExceptions(pandasm::Function * method,panda_file::File::EntityId method_id,panda_file::File::EntityId code_id) const458 LabelTable Disassembler::GetExceptions(pandasm::Function *method, panda_file::File::EntityId method_id,
459                                        panda_file::File::EntityId code_id) const
460 {
461     LOG(DEBUG, DISASSEMBLER) << "[getting exceptions]\ncode id: " << code_id.GetOffset();
462     if (method == nullptr) {
463         LOG(DEBUG, DISASSEMBLER) << "> nullptr received!\n";
464         return LabelTable {};
465     }
466     panda_file::CodeDataAccessor code_accessor(*file_, code_id);
467 
468     const auto bc_ins = BytecodeInstruction(code_accessor.GetInstructions());
469     const auto bc_ins_last = bc_ins.JumpTo(code_accessor.GetCodeSize());
470 
471     size_t try_idx = 0;
472     LabelTable label_table {};
473     code_accessor.EnumerateTryBlocks([&](panda_file::CodeDataAccessor::TryBlock &try_block) {
474         pandasm::Function::CatchBlock catch_block_pa {};
475         if (!LocateTryBlock(bc_ins, bc_ins_last, try_block, &catch_block_pa, &label_table, try_idx)) {
476             return false;
477         }
478         size_t catch_idx = 0;
479         try_block.EnumerateCatchBlocks([&](panda_file::CodeDataAccessor::CatchBlock &catch_block) {
480             auto class_idx = catch_block.GetTypeIdx();
481             if (class_idx == panda_file::INVALID_INDEX) {
482                 catch_block_pa.exception_record = "";
483             } else {
484                 const auto class_id = file_->ResolveClassIndex(method_id, class_idx);
485                 auto language = GetClassLanguage(class_id);
486                 catch_block_pa.exception_record = GetFullRecordName(class_id, language);
487             }
488             if (!LocateCatchBlock(bc_ins, bc_ins_last, catch_block, &catch_block_pa, &label_table, try_idx,
489                                   catch_idx)) {
490                 return false;
491             }
492 
493             method->catch_blocks.push_back(catch_block_pa);
494             catch_block_pa.catch_begin_label = "";
495             catch_block_pa.catch_end_label = "";
496             catch_idx++;
497 
498             return true;
499         });
500         try_idx++;
501 
502         return true;
503     });
504 
505     return label_table;
506 }
507 
LocateTryBlock(const BytecodeInstruction & bc_ins,const BytecodeInstruction & bc_ins_last,const panda_file::CodeDataAccessor::TryBlock & try_block,pandasm::Function::CatchBlock * catch_block_pa,LabelTable * label_table,size_t try_idx) const508 bool Disassembler::LocateTryBlock(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last,
509                                   const panda_file::CodeDataAccessor::TryBlock &try_block,
510                                   pandasm::Function::CatchBlock *catch_block_pa, LabelTable *label_table,
511                                   size_t try_idx) const
512 {
513     const auto try_begin_bc_ins = bc_ins.JumpTo(try_block.GetStartPc());
514     const auto try_end_bc_ins = bc_ins.JumpTo(try_block.GetStartPc() + try_block.GetLength());
515 
516     const size_t try_begin_idx = GetBytecodeInstructionNumber(bc_ins, try_begin_bc_ins);
517     const size_t try_end_idx = GetBytecodeInstructionNumber(bc_ins, try_end_bc_ins);
518 
519     const bool try_begin_offset_in_range = bc_ins_last.GetAddress() > try_begin_bc_ins.GetAddress();
520     const bool try_end_offset_in_range = bc_ins_last.GetAddress() >= try_end_bc_ins.GetAddress();
521     const bool try_begin_offset_valid = try_begin_idx != std::numeric_limits<size_t>::max();
522     const bool try_end_offset_valid = try_end_idx != std::numeric_limits<size_t>::max();
523 
524     if (!try_begin_offset_in_range || !try_begin_offset_valid) {
525         LOG(ERROR, DISASSEMBLER) << "> invalid try block begin offset! addr is: 0x" << std::hex
526                                  << try_begin_bc_ins.GetAddress();
527         return false;
528     } else {
529         std::stringstream ss {};
530         ss << "try_begin_label_" << try_idx;
531 
532         LabelTable::iterator it = label_table->find(try_begin_idx);
533         if (it == label_table->end()) {
534             catch_block_pa->try_begin_label = ss.str();
535             label_table->insert(std::pair<size_t, std::string>(try_begin_idx, ss.str()));
536         } else {
537             catch_block_pa->try_begin_label = it->second;
538         }
539     }
540 
541     if (!try_end_offset_in_range || !try_end_offset_valid) {
542         LOG(ERROR, DISASSEMBLER) << "> invalid try block end offset! addr is: 0x" << std::hex
543                                  << try_end_bc_ins.GetAddress();
544         return false;
545     } else {
546         std::stringstream ss {};
547         ss << "try_end_label_" << try_idx;
548 
549         LabelTable::iterator it = label_table->find(try_end_idx);
550         if (it == label_table->end()) {
551             catch_block_pa->try_end_label = ss.str();
552             label_table->insert(std::pair<size_t, std::string>(try_end_idx, ss.str()));
553         } else {
554             catch_block_pa->try_end_label = it->second;
555         }
556     }
557 
558     return true;
559 }
560 
LocateCatchBlock(const BytecodeInstruction & bc_ins,const BytecodeInstruction & bc_ins_last,const panda_file::CodeDataAccessor::CatchBlock & catch_block,pandasm::Function::CatchBlock * catch_block_pa,LabelTable * label_table,size_t try_idx,size_t catch_idx) const561 bool Disassembler::LocateCatchBlock(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last,
562                                     const panda_file::CodeDataAccessor::CatchBlock &catch_block,
563                                     pandasm::Function::CatchBlock *catch_block_pa, LabelTable *label_table,
564                                     size_t try_idx, size_t catch_idx) const
565 {
566     const auto handler_begin_offset = catch_block.GetHandlerPc();
567     const auto handler_end_offset = handler_begin_offset + catch_block.GetCodeSize();
568 
569     const auto handler_begin_bc_ins = bc_ins.JumpTo(handler_begin_offset);
570     const auto handler_end_bc_ins = bc_ins.JumpTo(handler_end_offset);
571 
572     const size_t handler_begin_idx = GetBytecodeInstructionNumber(bc_ins, handler_begin_bc_ins);
573     const size_t handler_end_idx = GetBytecodeInstructionNumber(bc_ins, handler_end_bc_ins);
574 
575     const bool handler_begin_offset_in_range = bc_ins_last.GetAddress() > handler_begin_bc_ins.GetAddress();
576     const bool handler_end_offset_in_range = bc_ins_last.GetAddress() > handler_end_bc_ins.GetAddress();
577     const bool handler_end_present = catch_block.GetCodeSize() != 0;
578     const bool handler_begin_offset_valid = handler_begin_idx != std::numeric_limits<size_t>::max();
579     const bool handler_end_offset_valid = handler_end_idx != std::numeric_limits<size_t>::max();
580 
581     if (!handler_begin_offset_in_range || !handler_begin_offset_valid) {
582         LOG(ERROR, DISASSEMBLER) << "> invalid catch block begin offset! addr is: 0x" << std::hex
583                                  << handler_begin_bc_ins.GetAddress();
584         return false;
585     } else {
586         std::stringstream ss {};
587         ss << "handler_begin_label_" << try_idx << "_" << catch_idx;
588 
589         LabelTable::iterator it = label_table->find(handler_begin_idx);
590         if (it == label_table->end()) {
591             catch_block_pa->catch_begin_label = ss.str();
592             label_table->insert(std::pair<size_t, std::string>(handler_begin_idx, ss.str()));
593         } else {
594             catch_block_pa->catch_begin_label = it->second;
595         }
596     }
597 
598     if (!handler_end_offset_in_range || !handler_end_offset_valid) {
599         LOG(ERROR, DISASSEMBLER) << "> invalid catch block end offset! addr is: 0x" << std::hex
600                                  << handler_end_bc_ins.GetAddress();
601         return false;
602     } else if (handler_end_present) {
603         std::stringstream ss {};
604         ss << "handler_end_label_" << try_idx << "_" << catch_idx;
605 
606         LabelTable::iterator it = label_table->find(handler_end_idx);
607         if (it == label_table->end()) {
608             catch_block_pa->catch_end_label = ss.str();
609             label_table->insert(std::pair<size_t, std::string>(handler_end_idx, ss.str()));
610         } else {
611             catch_block_pa->catch_end_label = it->second;
612         }
613     }
614 
615     return true;
616 }
617 
GetMetaData(pandasm::Function * method,const panda_file::File::EntityId & method_id) const618 void Disassembler::GetMetaData(pandasm::Function *method, const panda_file::File::EntityId &method_id) const
619 {
620     LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nmethod id: " << method_id;
621 
622     if (method == nullptr) {
623         LOG(ERROR, DISASSEMBLER) << "> nullptr received!";
624         return;
625     }
626 
627     panda_file::MethodDataAccessor method_accessor(*file_, method_id);
628 
629     const auto method_name_raw = StringDataToString(file_->GetStringData(method_accessor.GetNameId()));
630 
631     if (!method_accessor.IsStatic()) {
632         const auto class_name = StringDataToString(file_->GetStringData(method_accessor.GetClassId()));
633         auto this_type = pandasm::Type::FromDescriptor(class_name);
634 
635         this_type = pandasm::Type(MakePandasmFriendly(this_type.GetComponentName()), this_type.GetRank());
636 
637         LOG(DEBUG, DISASSEMBLER) << "method is not static. emplacing self-argument of type " << this_type.GetName();
638 
639         method->params.insert(method->params.begin(), pandasm::Function::Parameter(this_type, method->language));
640     } else {
641         method->metadata->SetAttribute("static");
642     }
643 
644     if (file_->IsExternal(method_accessor.GetMethodId())) {
645         method->metadata->SetAttribute("external");
646     }
647 
648     if (method_accessor.IsNative()) {
649         method->metadata->SetAttribute("native");
650     }
651 
652     if (method_accessor.IsAbstract()) {
653         method->metadata->SetAttribute("noimpl");
654     }
655 
656     // no language data for external methods
657     const bool is_ctor_js =
658         method_name_raw == pandasm::extensions::GetCtorName(pandasm::extensions::Language::ECMASCRIPT);
659     const bool is_cctor_js =
660         method_name_raw == pandasm::extensions::GetCctorName(pandasm::extensions::Language::ECMASCRIPT);
661     const bool is_ctor_panda =
662         method_name_raw == pandasm::extensions::GetCtorName(pandasm::extensions::Language::PANDA_ASSEMBLY);
663     const bool is_cctor_panda =
664         method_name_raw == pandasm::extensions::GetCctorName(pandasm::extensions::Language::PANDA_ASSEMBLY);
665 
666     const bool is_ctor = is_ctor_js || is_ctor_panda;
667     const bool is_cctor = is_cctor_js || is_cctor_panda;
668 
669     if (is_ctor) {
670         method->metadata->SetAttribute("ctor");
671     } else if (is_cctor) {
672         method->metadata->SetAttribute("cctor");
673     }
674 }
675 
GetMetaData(pandasm::Record * record,const panda_file::File::EntityId & record_id) const676 void Disassembler::GetMetaData(pandasm::Record *record, const panda_file::File::EntityId &record_id) const
677 {
678     LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nrecord id: " << record_id;
679 
680     if (record == nullptr) {
681         LOG(ERROR, DISASSEMBLER) << "> nullptr received!";
682         return;
683     }
684 
685     if (file_->IsExternal(record_id)) {
686         record->metadata->SetAttribute("external");
687     }
688 }
689 
GetMetaData(pandasm::Field * field,const panda_file::File::EntityId & field_id) const690 void Disassembler::GetMetaData(pandasm::Field *field, const panda_file::File::EntityId &field_id) const
691 {
692     LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nfield id: " << field_id;
693 
694     if (field == nullptr) {
695         LOG(ERROR, DISASSEMBLER) << "> nullptr received!";
696         return;
697     }
698 
699     panda_file::FieldDataAccessor field_accessor(*file_, field_id);
700 
701     if (field_accessor.IsExternal()) {
702         field->metadata->SetAttribute("external");
703     }
704 
705     if (field_accessor.IsStatic()) {
706         field->metadata->SetAttribute("static");
707     }
708 }
709 
GetLanguageSpecificMetadata() const710 void Disassembler::GetLanguageSpecificMetadata() const
711 {
712     LOG(DEBUG, DISASSEMBLER) << "\n[getting language-specific annotations]\n";
713 }
714 
715 // CODECHECK-NOLINTNEXTLINE(C_RULE_ID_FUNCTION_SIZE)
AnnotationTagToString(const char tag) const716 std::string Disassembler::AnnotationTagToString(const char tag) const
717 {
718     switch (tag) {
719         case '1':
720             return "u1";
721         case '2':
722             return "i8";
723         case '3':
724             return "u8";
725         case '4':
726             return "i16";
727         case '5':
728             return "u16";
729         case '6':
730             return "i32";
731         case '7':
732             return "u32";
733         case '8':
734             return "i64";
735         case '9':
736             return "u64";
737         case 'A':
738             return "f32";
739         case 'B':
740             return "f64";
741         case 'C':
742             return "string";
743         case 'D':
744             return "record";
745         case 'E':
746             return "method";
747         case 'F':
748             return "enum";
749         case 'G':
750             return "annotation";
751         case 'I':
752             return "void";
753         case 'J':
754             return "method_handle";
755         case 'K':
756             return "u1[]";
757         case 'L':
758             return "i8[]";
759         case 'M':
760             return "u8[]";
761         case 'N':
762             return "i16[]";
763         case 'O':
764             return "u16[]";
765         case 'P':
766             return "i32[]";
767         case 'Q':
768             return "u32[]";
769         case 'R':
770             return "i64[]";
771         case 'S':
772             return "u64[]";
773         case 'T':
774             return "f32[]";
775         case 'U':
776             return "f64[]";
777         case 'V':
778             return "string[]";
779         case 'W':
780             return "record[]";
781         case 'X':
782             return "method[]";
783         case 'Y':
784             return "enum[]";
785         case 'Z':
786             return "annotation[]";
787         case '@':
788             return "method_handle[]";
789         case '*':
790             return "nullptr string";
791         default:
792             return std::string();
793     }
794 }
795 
ScalarValueToString(const panda_file::ScalarValue & value,const std::string & type) const796 std::string Disassembler::ScalarValueToString(const panda_file::ScalarValue &value, const std::string &type) const
797 {
798     std::stringstream ss;
799 
800     if (type == "i8") {
801         int8_t res = value.Get<int8_t>();
802         ss << static_cast<int>(res);
803     } else if (type == "u1" || type == "u8") {
804         uint8_t res = value.Get<uint8_t>();
805         ss << static_cast<unsigned int>(res);
806     } else if (type == "i16") {
807         ss << value.Get<int16_t>();
808     } else if (type == "u16") {
809         ss << value.Get<uint16_t>();
810     } else if (type == "i32") {
811         ss << value.Get<int32_t>();
812     } else if (type == "u32") {
813         ss << value.Get<uint32_t>();
814     } else if (type == "i64") {
815         ss << value.Get<int64_t>();
816     } else if (type == "u64") {
817         ss << value.Get<uint64_t>();
818     } else if (type == "f32") {
819         ss << value.Get<float>();
820     } else if (type == "f64") {
821         ss << value.Get<double>();
822     } else if (type == "string") {
823         const auto id = value.Get<panda_file::File::EntityId>();
824         ss << "\"" << StringDataToString(file_->GetStringData(id)) << "\"";
825     } else if (type == "record") {
826         const auto id = value.Get<panda_file::File::EntityId>();
827         auto language = GetClassLanguage(id);
828         ss << GetFullRecordName(id, language);
829     } else if (type == "method") {
830         const auto id = value.Get<panda_file::File::EntityId>();
831         auto language = GetClassLanguage(id);
832         ss << GetFullMethodName(id, language);
833     } else if (type == "enum") {
834         const auto id = value.Get<panda_file::File::EntityId>();
835         panda_file::FieldDataAccessor field_accessor(*file_, id);
836         ss << GetFullRecordName(field_accessor.GetClassId(), pandasm::extensions::Language::PANDA_ASSEMBLY) << "."
837            << StringDataToString(file_->GetStringData(field_accessor.GetNameId()));
838     } else if (type == "annotation") {
839         const auto id = value.Get<panda_file::File::EntityId>();
840         ss << "id_" << id.GetOffset();
841     } else if (type == "void") {
842         return std::string();
843     } else if (type == "method_handle") {
844     }
845 
846     return ss.str();
847 }
848 
ArrayValueToString(const panda_file::ArrayValue & value,const std::string & type,const size_t idx) const849 std::string Disassembler::ArrayValueToString(const panda_file::ArrayValue &value, const std::string &type,
850                                              const size_t idx) const
851 {
852     std::stringstream ss;
853 
854     if (type == "i8") {
855         int8_t res = value.Get<int8_t>(idx);
856         ss << static_cast<int>(res);
857     } else if (type == "u1" || type == "u8") {
858         uint8_t res = value.Get<uint8_t>(idx);
859         ss << static_cast<unsigned int>(res);
860     } else if (type == "i16") {
861         ss << value.Get<int16_t>(idx);
862     } else if (type == "u16") {
863         ss << value.Get<uint16_t>(idx);
864     } else if (type == "i32") {
865         ss << value.Get<int32_t>(idx);
866     } else if (type == "u32") {
867         ss << value.Get<uint32_t>(idx);
868     } else if (type == "i64") {
869         ss << value.Get<int64_t>(idx);
870     } else if (type == "u64") {
871         ss << value.Get<uint64_t>(idx);
872     } else if (type == "f32") {
873         ss << value.Get<float>(idx);
874     } else if (type == "f64") {
875         ss << value.Get<double>(idx);
876     } else if (type == "string") {
877         const auto id = value.Get<panda_file::File::EntityId>(idx);
878         ss << '\"' << StringDataToString(file_->GetStringData(id)) << '\"';
879     } else if (type == "record") {
880         const auto id = value.Get<panda_file::File::EntityId>(idx);
881         auto language = GetClassLanguage(id);
882         ss << GetFullRecordName(id, language);
883     } else if (type == "method") {
884         const auto id = value.Get<panda_file::File::EntityId>(idx);
885         panda_file::ClassDataAccessor method_accessor {*file_, id};
886         pandasm::extensions::Language language = PFLangToPandasmLang(method_accessor.GetSourceLang());
887         ss << GetFullMethodName(id, language);
888     } else if (type == "enum") {
889         const auto id = value.Get<panda_file::File::EntityId>(idx);
890         panda_file::FieldDataAccessor field_accessor(*file_, id);
891         ss << GetFullRecordName(field_accessor.GetClassId(), pandasm::extensions::Language::PANDA_ASSEMBLY) << "."
892            << StringDataToString(file_->GetStringData(field_accessor.GetNameId()));
893     } else if (type == "annotation") {
894         const auto id = value.Get<panda_file::File::EntityId>(idx);
895         ss << "id_" << id.GetOffset();
896     } else if (type == "method_handle") {
897     } else if (type == "nullptr string") {
898     }
899 
900     return ss.str();
901 }
902 
GetFullMethodName(const panda_file::File::EntityId & method_id,pandasm::extensions::Language language) const903 std::string Disassembler::GetFullMethodName(const panda_file::File::EntityId &method_id,
904                                             pandasm::extensions::Language language) const
905 {
906     panda::panda_file::MethodDataAccessor method_accessor(*file_, method_id);
907 
908     const auto method_name_raw = StringDataToString(file_->GetStringData(method_accessor.GetNameId()));
909 
910     pandasm::Function method(method_name_raw, language);
911     GetParams(&method, method_accessor.GetProtoId());
912     GetMetaData(&method, method_id);
913 
914     method.name = pandasm::MangleFunctionName(method.name, method.params, method.return_type);
915     method.name = MakePandasmFriendly(method.name);
916     std::string class_name = GetFullRecordName(method_accessor.GetClassId(), language);
917     if (IsSystemType(class_name)) {
918         class_name = "";
919     } else {
920         class_name += ".";
921     }
922 
923     return class_name + method.name;
924 }
925 
GetFullRecordName(const panda_file::File::EntityId & class_id,pandasm::extensions::Language language) const926 std::string Disassembler::GetFullRecordName(const panda_file::File::EntityId &class_id,
927                                             [[maybe_unused]] pandasm::extensions::Language language) const
928 {
929     std::string name = StringDataToString(file_->GetStringData(class_id));
930 
931     auto type = pandasm::Type::FromDescriptor(name);
932     type = pandasm::Type(MakePandasmFriendly(type.GetComponentName()), type.GetRank());
933 
934     return type.GetName();
935 }
936 
GetRecordInfo(const panda_file::File::EntityId & record_id,RecordInfo * record_info) const937 void Disassembler::GetRecordInfo(const panda_file::File::EntityId &record_id, RecordInfo *record_info) const
938 {
939     constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
940 
941     if (file_->IsExternal(record_id)) {
942         return;
943     }
944 
945     panda_file::ClassDataAccessor class_accessor {*file_, record_id};
946     std::stringstream ss;
947 
948     ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
949        << class_accessor.GetClassId().GetOffset() << ", size: 0x" << std::setfill('0')
950        << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex << class_accessor.GetSize() << " (" << std::dec
951        << class_accessor.GetSize() << ")";
952 
953     record_info->record_info = ss.str();
954     ss.str(std::string());
955 
956     class_accessor.EnumerateFields([&](panda_file::FieldDataAccessor &field_accessor) -> void {
957         ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
958            << field_accessor.GetFieldId().GetOffset() << ", type: 0x" << std::hex << field_accessor.GetType();
959 
960         record_info->fields_info.push_back(ss.str());
961 
962         ss.str(std::string());
963     });
964 }
965 
GetMethodInfo(const panda_file::File::EntityId & method_id,MethodInfo * method_info) const966 void Disassembler::GetMethodInfo(const panda_file::File::EntityId &method_id, MethodInfo *method_info) const
967 {
968     constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
969 
970     panda_file::MethodDataAccessor method_accessor {*file_, method_id};
971     std::stringstream ss;
972 
973     ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
974        << method_accessor.GetMethodId().GetOffset();
975 
976     if (method_accessor.GetCodeId().has_value()) {
977         ss << ", code offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
978            << method_accessor.GetCodeId().value().GetOffset();
979 
980         GetInsInfo(method_accessor.GetCodeId().value(), method_info);
981     } else {
982         ss << ", <no code>";
983     }
984 
985     method_info->method_info = ss.str();
986 }
987 
Serialize(size_t index,const pandasm::LiteralArray & lit_array,std::ostream & os) const988 void Disassembler::Serialize(size_t index, const pandasm::LiteralArray &lit_array, std::ostream &os) const
989 {
990     // remove once literals are supported in assembly_format
991 
992     if (lit_array.literals_.empty()) {
993         return;
994     }
995 
996     os << ".array array_" << index << " {\n";
997 
998     SerializeValues(lit_array, os);
999 
1000     os << "}\n";
1001 }
1002 
1003 template <class T>
1004 using make_storage = std::conditional_t<std::is_integral_v<T>, std::make_unsigned<T>, std::common_type<T>>;
1005 
1006 template <class T>
SerializeArrayValues(const pandasm::LiteralArray & lit_array,std::ostream & os)1007 static void SerializeArrayValues(const pandasm::LiteralArray &lit_array, std::ostream &os)
1008 {
1009     using S = typename make_storage<T>::type;
1010     os << std::get<S>(lit_array.literals_[0].value_);
1011 
1012     for (size_t i = 1; i < lit_array.literals_.size(); i++) {
1013         os << ", " << bit_cast<T>(std::get<S>(lit_array.literals_[i].value_));
1014     }
1015 }
1016 
SerializeValues(const pandasm::LiteralArray & lit_array,std::ostream & os) const1017 void Disassembler::SerializeValues(const pandasm::LiteralArray &lit_array, std::ostream &os) const
1018 {
1019     panda_file::LiteralTag tag = lit_array.literals_[0].tag_;
1020     switch (tag) {
1021         case panda_file::LiteralTag::ARRAY_I8: {
1022             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1023                 os << "\t"
1024                    << "i8 " << static_cast<int16_t>(bit_cast<int8_t>(std::get<uint8_t>(lit_array.literals_[i].value_)))
1025                    << "\n";
1026             }
1027             break;
1028         }
1029         case panda_file::LiteralTag::ARRAY_I16: {
1030             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1031                 os << "\t"
1032                    << "i16 " << bit_cast<int16_t>(std::get<uint16_t>(lit_array.literals_[i].value_)) << "\n";
1033             }
1034             break;
1035         }
1036         case panda_file::LiteralTag::ARRAY_I32: {
1037             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1038                 os << "\t"
1039                    << "i32 " << bit_cast<int32_t>(std::get<uint32_t>(lit_array.literals_[i].value_)) << "\n";
1040             }
1041             break;
1042         }
1043         case panda_file::LiteralTag::ARRAY_I64: {
1044             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1045                 os << "\t"
1046                    << "i64 " << bit_cast<int64_t>(std::get<uint64_t>(lit_array.literals_[i].value_)) << "\n";
1047             }
1048             break;
1049         }
1050         case panda_file::LiteralTag::ARRAY_F64: {
1051             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1052                 os << "\t"
1053                    << "f64 " << std::get<double>(lit_array.literals_[i].value_) << "\n";
1054             }
1055             break;
1056         }
1057         case panda_file::LiteralTag::ARRAY_F32: {
1058             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1059                 os << "\t"
1060                    << "f32 " << std::get<float>(lit_array.literals_[i].value_) << "\n";
1061             }
1062             break;
1063         }
1064         case panda_file::LiteralTag::ARRAY_STRING: {
1065             for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1066                 os << "\t"
1067                    << "string " << std::get<std::string>(lit_array.literals_[i].value_) << "\n";
1068             }
1069             break;
1070         }
1071         case panda_file::LiteralTag::BOOL: {
1072             os << "\t"
1073                << "bool " <<  std::get<bool>(lit_array.literals_[0].value_)
1074                << "\n";
1075             break;
1076         }
1077         case panda_file::LiteralTag::INTEGER: {
1078             os << "\t" << "i32 " << bit_cast<int32_t>(std::get<uint32_t>(lit_array.literals_[0].value_)) << "\n";
1079             break;
1080         }
1081         case panda_file::LiteralTag::DOUBLE: {
1082             os << "\t" << "i32 " << std::get<double>(lit_array.literals_[0].value_) << "\n";
1083             break;
1084         }
1085         case panda_file::LiteralTag::STRING: {
1086             os << "\t" << "string " << std::get<std::string>(lit_array.literals_[0].value_) << "\n";
1087             break;
1088         }
1089         case panda_file::LiteralTag::METHOD: {
1090             os << "\t" << "method " << std::get<std::string>(lit_array.literals_[0].value_) << "\n";
1091             break;
1092         }
1093         case panda_file::LiteralTag::GENERATORMETHOD: {
1094             os << "\t" << "generator_method " << std::get<std::string>(lit_array.literals_[0].value_) << "\n";
1095             break;
1096         }
1097         case panda_file::LiteralTag::ACCESSOR: {
1098             os << "\t"
1099                << "accessor "
1100                << static_cast<int16_t>(bit_cast<int8_t>(std::get<uint8_t>(lit_array.literals_[0].value_))) << "\n";
1101             break;
1102         }
1103         case panda_file::LiteralTag::METHODAFFILIATE: {
1104             os << "\t" << "method_affiliate " << std::get<uint16_t>(lit_array.literals_[0].value_) << "\n";
1105             break;
1106         }
1107         case panda_file::LiteralTag::NULLVALUE: {
1108             os << "\t"
1109                << "null_value "
1110                << static_cast<int16_t>(bit_cast<int8_t>(std::get<uint8_t>(lit_array.literals_[0].value_))) << "\n";
1111             break;
1112         }
1113         default:
1114             break;
1115     }
1116 }
1117 
Serialize(const pandasm::Record & record,std::ostream & os,bool print_information) const1118 void Disassembler::Serialize(const pandasm::Record &record, std::ostream &os, bool print_information) const
1119 {
1120     if (IsSystemType(record.name)) {
1121         return;
1122     }
1123 
1124     os << ".record " << record.name;
1125 
1126     const auto record_iter = prog_j_ann_.record_annotations.find(record.name);
1127     const bool record_in_table = record_iter != prog_j_ann_.record_annotations.end();
1128     if (record_in_table) {
1129         Serialize(*record.metadata, record_iter->second.ann_list, os);
1130     } else {
1131         Serialize(*record.metadata, {}, os);
1132     }
1133 
1134     if (record.metadata->IsForeign()) {
1135         os << "\n\n";
1136         return;
1137     }
1138 
1139     os << " {\n";
1140 
1141     if (print_information && prog_info_.records_info.find(record.name) != prog_info_.records_info.end()) {
1142         os << " # " << prog_info_.records_info.at(record.name).record_info << "\n";
1143         SerializeFields(record, os, true);
1144     } else {
1145         SerializeFields(record, os, false);
1146     }
1147 
1148     os << "}\n\n";
1149 }
1150 
SerializeFields(const pandasm::Record & record,std::ostream & os,bool print_information) const1151 void Disassembler::SerializeFields(const pandasm::Record &record, std::ostream &os, bool print_information) const
1152 {
1153     constexpr size_t INFO_OFFSET = 80;
1154 
1155     const auto record_iter = prog_j_ann_.record_annotations.find(record.name);
1156     const bool record_in_table = record_iter != prog_j_ann_.record_annotations.end();
1157     const auto rec_inf = (print_information) ? (prog_info_.records_info.at(record.name)) : (RecordInfo {});
1158     size_t field_idx = 0;
1159 
1160     std::stringstream ss;
1161     for (const auto &f : record.field_list) {
1162         ss << "\t" << f.type.GetName() << " " << f.name;
1163         if (record_in_table) {
1164             const auto field_iter = record_iter->second.field_annotations.find(f.name);
1165             if (field_iter != record_iter->second.field_annotations.end()) {
1166                 Serialize(*f.metadata, field_iter->second, ss);
1167             } else {
1168                 Serialize(*f.metadata, {}, ss);
1169             }
1170         } else {
1171             Serialize(*f.metadata, {}, ss);
1172         }
1173 
1174         if (print_information) {
1175             os << std::setw(INFO_OFFSET) << std::left << ss.str() << " # " << rec_inf.fields_info.at(field_idx) << "\n";
1176         } else {
1177             os << ss.str() << "\n";
1178         }
1179 
1180         ss.str(std::string());
1181         ss.clear();
1182 
1183         field_idx++;
1184     }
1185 }
1186 
Serialize(const pandasm::Function & method,std::ostream & os,bool print_information) const1187 void Disassembler::Serialize(const pandasm::Function &method, std::ostream &os, bool print_information) const
1188 {
1189     os << ".function " << method.return_type.GetName() << " " << method.name << "(";
1190 
1191     if (method.params.size() > 0) {
1192         os << method.params[0].type.GetName() << " a0";
1193 
1194         for (uint8_t i = 1; i < method.params.size(); i++) {
1195             os << ", " << method.params[i].type.GetName() << " a" << (size_t)i;
1196         }
1197     }
1198     os << ")";
1199 
1200     const auto method_iter = prog_j_ann_.method_annotations.find(method.name);
1201     if (method_iter != prog_j_ann_.method_annotations.end()) {
1202         Serialize(*method.metadata, method_iter->second, os);
1203     } else {
1204         Serialize(*method.metadata, {}, os);
1205     }
1206 
1207     if (!method.HasImplementation()) {
1208         os << "\n\n";
1209         return;
1210     }
1211 
1212     if (print_information && prog_info_.methods_info.find(method.name) != prog_info_.methods_info.end()) {
1213         const auto method_info = prog_info_.methods_info.at(method.name);
1214 
1215         size_t width = 0;
1216         for (const auto &i : method.ins) {
1217             if (i.ToString().size() > width) {
1218                 width = i.ToString().size();
1219             }
1220         }
1221 
1222         os << " { # " << method_info.method_info << "\n";
1223 
1224         for (size_t i = 0; i < method.ins.size(); i++) {
1225             os << "\t" << std::setw(width) << std::left << method.ins.at(i).ToString("", true, method.regs_num) << " # "
1226                << method_info.instructions_info.at(i) << "\n";
1227         }
1228     } else {
1229         os << " {\n";
1230 
1231         for (const auto &i : method.ins) {
1232             os << "\t" << i.ToString("", true, method.regs_num) << "\n";
1233         }
1234     }
1235 
1236     if (method.catch_blocks.size() != 0) {
1237         os << "\n";
1238 
1239         for (const auto &catch_block : method.catch_blocks) {
1240             Serialize(catch_block, os);
1241 
1242             os << "\n";
1243         }
1244     }
1245 
1246     os << "}\n\n";
1247 }
1248 
Serialize(const pandasm::Function::CatchBlock & catch_block,std::ostream & os) const1249 void Disassembler::Serialize(const pandasm::Function::CatchBlock &catch_block, std::ostream &os) const
1250 {
1251     if (catch_block.exception_record == "") {
1252         os << ".catchall ";
1253     } else {
1254         os << ".catch " << catch_block.exception_record << ", ";
1255     }
1256 
1257     os << catch_block.try_begin_label << ", " << catch_block.try_end_label << ", " << catch_block.catch_begin_label;
1258 
1259     if (catch_block.catch_end_label != "") {
1260         os << ", " << catch_block.catch_end_label;
1261     }
1262 }
1263 
Serialize(const pandasm::ItemMetadata & meta,const AnnotationList & ann_list,std::ostream & os) const1264 void Disassembler::Serialize(const pandasm::ItemMetadata &meta, const AnnotationList &ann_list, std::ostream &os) const
1265 {
1266     auto bool_attributes = meta.GetBoolAttributes();
1267     auto attributes = meta.GetAttributes();
1268 
1269     if (bool_attributes.empty() && attributes.empty() && ann_list.empty()) {
1270         return;
1271     }
1272 
1273     os << " <";
1274 
1275     size_t size = bool_attributes.size();
1276     size_t idx = 0;
1277     for (const auto &attr : bool_attributes) {
1278         os << attr;
1279         ++idx;
1280 
1281         if (!attributes.empty() || !ann_list.empty() || idx < size) {
1282             os << ", ";
1283         }
1284     }
1285 
1286     size = attributes.size();
1287     idx = 0;
1288     for (const auto &[key, values] : attributes) {
1289         for (size_t i = 0; i < values.size(); i++) {
1290             os << key << "=" << values[i];
1291 
1292             if (i < values.size() - 1) {
1293                 os << ", ";
1294             }
1295         }
1296 
1297         ++idx;
1298 
1299         if (!ann_list.empty() || idx < size) {
1300             os << ", ";
1301         }
1302     }
1303 
1304     size = ann_list.size();
1305     idx = 0;
1306     for (const auto &[key, value] : ann_list) {
1307         os << key << "=" << value;
1308 
1309         ++idx;
1310 
1311         if (idx < size) {
1312             os << ", ";
1313         }
1314     }
1315 
1316     os << ">";
1317 }
1318 
SerializeLanguage(std::ostream & os) const1319 void Disassembler::SerializeLanguage(std::ostream &os) const
1320 {
1321     std::string lang = pandasm::extensions::LanguageToString(file_language_);
1322     if (!lang.empty()) {
1323         os << ".language " << lang << "\n\n";
1324     }
1325 }
1326 
PFLangToPandasmLang(const std::optional<panda_file::SourceLang> & language) const1327 pandasm::extensions::Language Disassembler::PFLangToPandasmLang(
1328     const std::optional<panda_file::SourceLang> &language) const
1329 {
1330     const auto lang = language.value_or(panda_file::SourceLang::PANDA_ASSEMBLY);
1331     switch (lang) {
1332         case panda_file::SourceLang::ECMASCRIPT:
1333             return pandasm::extensions::Language::ECMASCRIPT;
1334         case panda_file::SourceLang::PANDA_ASSEMBLY:
1335             [[fallthrough]];
1336         default:
1337             return pandasm::extensions::Language::PANDA_ASSEMBLY;
1338     }
1339 }
1340 
StringDataToString(panda_file::File::StringData sd) const1341 std::string Disassembler::StringDataToString(panda_file::File::StringData sd) const
1342 {
1343     std::string res((char *)sd.data);
1344     return res;
1345 }
1346 
BytecodeOpcodeToPandasmOpcode(uint8_t o) const1347 pandasm::Opcode Disassembler::BytecodeOpcodeToPandasmOpcode(uint8_t o) const
1348 {
1349     return BytecodeOpcodeToPandasmOpcode(BytecodeInstruction::Opcode(o));
1350 }
1351 
IDToString(BytecodeInstruction bc_ins,panda_file::File::EntityId method_id,pandasm::extensions::Language language) const1352 std::string Disassembler::IDToString(BytecodeInstruction bc_ins, panda_file::File::EntityId method_id,
1353                                      pandasm::extensions::Language language) const
1354 {
1355     std::stringstream name;
1356 
1357     if (bc_ins.HasFlag(BytecodeInstruction::Flags::TYPE_ID)) {
1358         auto idx = bc_ins.GetId().AsIndex();
1359         auto id = file_->ResolveClassIndex(method_id, idx);
1360         name << StringDataToString(file_->GetStringData(id));
1361 
1362         auto type = pandasm::Type::FromDescriptor(name.str());
1363         type = pandasm::Type(MakePandasmFriendly(type.GetComponentName()), type.GetRank());
1364 
1365         name.str("");
1366         name << type.GetName();
1367     } else if (bc_ins.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) {
1368         auto idx = bc_ins.GetId().AsIndex();
1369         auto id = file_->ResolveMethodIndex(method_id, idx);
1370         panda_file::MethodDataAccessor method_accessor(*file_, id);
1371 
1372         name << GetFullMethodName(method_accessor.GetMethodId(), language);
1373     } else if (bc_ins.HasFlag(BytecodeInstruction::Flags::STRING_ID)) {
1374         name << '\"';
1375 
1376         if (skip_strings_ || quiet_) {
1377             name << std::hex << "0x" << bc_ins.GetId().AsFileId();
1378         } else {
1379             name << StringDataToString(file_->GetStringData(bc_ins.GetId().AsFileId()));
1380         }
1381 
1382         name << '\"';
1383     } else if (bc_ins.HasFlag(BytecodeInstruction::Flags::FIELD_ID)) {
1384         auto idx = bc_ins.GetId().AsIndex();
1385         auto id = file_->ResolveFieldIndex(method_id, idx);
1386         panda_file::FieldDataAccessor field_accessor(*file_, id);
1387 
1388         name << GetFullRecordName(field_accessor.GetClassId(), language);
1389         name << '.';
1390         name << StringDataToString(file_->GetStringData(field_accessor.GetNameId()));
1391     } else if (bc_ins.HasFlag(BytecodeInstruction::Flags::LITERALARRAY_ID)) {
1392         panda_file::LiteralDataAccessor lit_array_accessor(*file_, file_->GetLiteralArraysId());
1393         auto idx = bc_ins.GetId().AsFileId().GetOffset();
1394 
1395         name << idx;
1396     }
1397 
1398     return name.str();
1399 }
1400 
GetBytecodeInstructionNumber(BytecodeInstruction bc_ins_first,BytecodeInstruction bc_ins_cur) const1401 size_t Disassembler::GetBytecodeInstructionNumber(BytecodeInstruction bc_ins_first,
1402                                                   BytecodeInstruction bc_ins_cur) const
1403 {
1404     size_t count = 0;
1405 
1406     while (bc_ins_first.GetAddress() != bc_ins_cur.GetAddress()) {
1407         count++;
1408         bc_ins_first = bc_ins_first.GetNext();
1409         if (bc_ins_first.GetAddress() > bc_ins_cur.GetAddress()) {
1410             return std::numeric_limits<size_t>::max();
1411         }
1412     }
1413 
1414     return count;
1415 }
1416 
GetClassLanguage(panda_file::File::EntityId class_id) const1417 pandasm::extensions::Language Disassembler::GetClassLanguage(panda_file::File::EntityId class_id) const
1418 {
1419     if (file_->IsExternal(class_id)) {
1420         return pandasm::extensions::Language::PANDA_ASSEMBLY;
1421     }
1422 
1423     panda_file::ClassDataAccessor cda(*file_, class_id);
1424     return PFLangToPandasmLang(cda.GetSourceLang());
1425 }
1426 
GetInstructions(pandasm::Function * method,panda_file::File::EntityId method_id,panda_file::File::EntityId code_id) const1427 IdList Disassembler::GetInstructions(pandasm::Function *method, panda_file::File::EntityId method_id,
1428                                      panda_file::File::EntityId code_id) const
1429 {
1430     panda_file::CodeDataAccessor code_accessor(*file_, code_id);
1431 
1432     const auto ins_sz = code_accessor.GetCodeSize();
1433     const auto ins_arr = code_accessor.GetInstructions();
1434 
1435     method->regs_num = code_accessor.GetNumVregs();
1436 
1437     auto bc_ins = BytecodeInstruction(ins_arr);
1438     const auto bc_ins_last = bc_ins.JumpTo(ins_sz);
1439 
1440     LabelTable label_table = GetExceptions(method, method_id, code_id);
1441 
1442     IdList unknown_external_methods {};
1443 
1444     while (bc_ins.GetAddress() != bc_ins_last.GetAddress()) {
1445         if (bc_ins.GetAddress() > bc_ins_last.GetAddress()) {
1446             LOG(ERROR, DISASSEMBLER) << "> error encountered at " << std::dec << code_id.GetOffset() << " ("
1447                                      << "0x" << std::hex << code_id.GetOffset()
1448                                      << "). bytecode instructions sequence corrupted for method " << method->name
1449                                      << "! went out of bounds";
1450 
1451             break;
1452         }
1453 
1454         auto pa_ins = BytecodeInstructionToPandasmInstruction(bc_ins, method_id, method->language);
1455         // alter instructions operands depending on instruction type
1456         if (pa_ins.IsConditionalJump() || pa_ins.IsJump()) {
1457             const int32_t jmp_offset = std::get<int64_t>(pa_ins.imms.at(0));
1458             const auto bc_ins_dest = bc_ins.JumpTo(jmp_offset);
1459             if (bc_ins_last.GetAddress() > bc_ins_dest.GetAddress()) {
1460                 size_t idx = GetBytecodeInstructionNumber(BytecodeInstruction(ins_arr), bc_ins_dest);
1461 
1462                 if (idx != std::numeric_limits<size_t>::max()) {
1463                     if (label_table.find(idx) == label_table.end()) {
1464                         std::stringstream ss {};
1465                         ss << "jump_label_" << label_table.size();
1466                         label_table[idx] = ss.str();
1467                     }
1468 
1469                     pa_ins.imms.clear();
1470                     pa_ins.ids.push_back(label_table[idx]);
1471                 } else {
1472                     LOG(ERROR, DISASSEMBLER)
1473                         << "> error encountered at " << std::dec << code_id.GetOffset() << " ("
1474                         << "0x" << std::hex << code_id.GetOffset() << "). incorrect instruction at offset "
1475                         << (bc_ins.GetAddress() - ins_arr) << ": invalid jump offset " << jmp_offset
1476                         << " - jumping in the middle of another instruction!";
1477                 }
1478             } else {
1479                 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << std::dec << code_id.GetOffset() << " ("
1480                                          << "0x" << std::hex << code_id.GetOffset()
1481                                          << "). incorrect instruction at offset: " << (bc_ins.GetAddress() - ins_arr)
1482                                          << ": invalid jump offset " << jmp_offset << " - jumping out of bounds!";
1483             }
1484         }
1485 
1486         // check if method id is unknown external method. if so, emplace it in table
1487         if (bc_ins.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) {
1488             const auto arg_method_idx = bc_ins.GetId().AsIndex();
1489             const auto arg_method_id = file_->ResolveMethodIndex(method_id, arg_method_idx);
1490 
1491             const auto arg_method_name = GetFullMethodName(arg_method_id, method->language);
1492 
1493             const bool is_present = prog_.function_table.find(arg_method_name) != prog_.function_table.cend();
1494             const bool is_external = file_->IsExternal(arg_method_id);
1495             if (is_external && !is_present) {
1496                 unknown_external_methods.push_back(arg_method_id);
1497             }
1498         }
1499 
1500         method->ins.push_back(pa_ins);
1501         bc_ins = bc_ins.GetNext();
1502     }
1503 
1504     for (const auto &pair : label_table) {
1505         method->ins[pair.first].label = pair.second;
1506         method->ins[pair.first].set_label = true;
1507     }
1508 
1509     return unknown_external_methods;
1510 }
1511 
1512 }  // namespace panda::disasm
1513