1 /*
2 * Copyright (c) 2021-2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "disassembler.h"
17 #include "libpandafile/util/collect_util.h"
18 #include "mangling.h"
19 #include "utils/logger.h"
20 #include "utils/const_value.h"
21
22 #include <iomanip>
23 #include <type_traits>
24
25 #include "get_language_specific_metadata.inc"
26
27 namespace panda::disasm {
28
Disassemble(const std::string & filename_in,const bool quiet,const bool skip_strings)29 void Disassembler::Disassemble(const std::string &filename_in, const bool quiet, const bool skip_strings)
30 {
31 auto file_new = panda_file::File::Open(filename_in);
32 file_.swap(file_new);
33
34 if (file_ != nullptr) {
35 prog_ = pandasm::Program {};
36
37 record_name_to_id_.clear();
38 method_name_to_id_.clear();
39 string_offset_to_name_.clear();
40 skip_strings_ = skip_strings;
41 quiet_ = quiet;
42
43 prog_info_ = ProgInfo {};
44
45 prog_ann_ = ProgAnnotations {};
46
47 GetRecords();
48 GetLiteralArrays();
49
50 GetLanguageSpecificMetadata();
51 } else {
52 LOG(ERROR, DISASSEMBLER) << "> unable to open specified pandafile: <" << filename_in << ">";
53 }
54 }
55
CollectInfo()56 void Disassembler::CollectInfo()
57 {
58 LOG(DEBUG, DISASSEMBLER) << "\n[getting program info]\n";
59
60 debug_info_extractor_ = std::make_unique<panda_file::DebugInfoExtractor>(file_.get());
61
62 for (const auto &pair : record_name_to_id_) {
63 GetRecordInfo(pair.second, &prog_info_.records_info[pair.first]);
64 }
65
66 for (const auto &pair : method_name_to_id_) {
67 GetMethodInfo(pair.second, &prog_info_.methods_info[pair.first]);
68 }
69 }
70
Serialize(std::ostream & os,bool add_separators,bool print_information) const71 void Disassembler::Serialize(std::ostream &os, bool add_separators, bool print_information) const
72 {
73 if (os.bad()) {
74 LOG(DEBUG, DISASSEMBLER) << "> serialization failed. os bad\n";
75 return;
76 }
77
78 if (file_ != nullptr) {
79 std::string abc_file = GetFileNameByPath(file_->GetFilename());
80 os << "# source binary: " << abc_file << "\n\n";
81 }
82
83 if (add_separators) {
84 os << "# ====================\n"
85 "# LITERALS\n\n";
86 }
87
88 LOG(DEBUG, DISASSEMBLER) << "[serializing literals]";
89
90 for (const auto &[key, lit_arr] : prog_.literalarray_table) {
91 Serialize(key, lit_arr, os);
92 }
93
94 for (const auto &[module_offset, array_table] : modulearray_table_) {
95 Serialize(module_offset, array_table, os);
96 }
97
98 os << "\n";
99
100 if (add_separators) {
101 os << "# ====================\n"
102 "# RECORDS\n\n";
103 }
104
105 LOG(DEBUG, DISASSEMBLER) << "[serializing records]";
106
107 for (const auto &r : prog_.record_table) {
108 Serialize(r.second, os, print_information);
109 }
110
111 if (add_separators) {
112 os << "# ====================\n"
113 "# METHODS\n\n";
114 }
115
116 LOG(DEBUG, DISASSEMBLER) << "[serializing methods]";
117
118 for (const auto &m : prog_.function_table) {
119 Serialize(m.second, os, print_information);
120 }
121
122 if (add_separators) {
123 os << "# ====================\n"
124 "# STRING\n\n";
125 }
126
127 LOG(DEBUG, DISASSEMBLER) << "[serializing strings]";
128
129 for (const auto &[offset, name_value] : string_offset_to_name_) {
130 SerializeStrings(offset, name_value, os);
131 }
132 }
133
IsSystemType(const std::string & type_name)134 inline bool Disassembler::IsSystemType(const std::string &type_name)
135 {
136 bool is_array_type = type_name.find('[') != std::string::npos;
137 bool is_global = type_name == "_GLOBAL";
138
139 return is_array_type || is_global;
140 }
141
GetRecord(pandasm::Record * record,const panda_file::File::EntityId & record_id)142 void Disassembler::GetRecord(pandasm::Record *record, const panda_file::File::EntityId &record_id)
143 {
144 LOG(DEBUG, DISASSEMBLER) << "\n[getting record]\nid: " << record_id << " (0x" << std::hex << record_id << ")";
145
146 if (record == nullptr) {
147 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but record ptr expected!";
148
149 return;
150 }
151
152 record->name = GetFullRecordName(record_id);
153
154 LOG(DEBUG, DISASSEMBLER) << "name: " << record->name;
155
156 GetMetaData(record, record_id);
157
158 if (!file_->IsExternal(record_id)) {
159 GetMethods(record_id);
160 GetFields(record, record_id);
161 }
162 }
163
AddMethodToTables(const panda_file::File::EntityId & method_id)164 void Disassembler::AddMethodToTables(const panda_file::File::EntityId &method_id)
165 {
166 pandasm::Function new_method("", GetMethodLanguage(method_id));
167 GetMethod(&new_method, method_id);
168
169 const auto signature = pandasm::GetFunctionSignatureFromName(new_method.name, new_method.params);
170 if (prog_.function_table.find(signature) != prog_.function_table.end()) {
171 return;
172 }
173
174 GetMethodAnnotations(new_method, method_id);
175 method_name_to_id_.emplace(signature, method_id);
176 prog_.function_synonyms[new_method.name].push_back(signature);
177 prog_.function_table.emplace(signature, std::move(new_method));
178 }
179
GetMethod(pandasm::Function * method,const panda_file::File::EntityId & method_id)180 void Disassembler::GetMethod(pandasm::Function *method, const panda_file::File::EntityId &method_id)
181 {
182 LOG(DEBUG, DISASSEMBLER) << "\n[getting method]\nid: " << method_id << " (0x" << std::hex << method_id << ")";
183
184 if (method == nullptr) {
185 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
186
187 return;
188 }
189
190 panda_file::MethodDataAccessor method_accessor(*file_, method_id);
191
192 method->name = GetFullMethodName(method_id);
193
194 LOG(DEBUG, DISASSEMBLER) << "name: " << method->name;
195
196 GetMetaData(method, method_id);
197
198 if (method_accessor.GetCodeId().has_value()) {
199 auto code_id = method_accessor.GetCodeId().value();
200 GetParams(method, code_id);
201 const IdList id_list = GetInstructions(method, method_id, code_id);
202
203 for (const auto &id : id_list) {
204 AddMethodToTables(id);
205 }
206 } else {
207 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << method_id << " (0x" << std::hex << method_id
208 << "). implementation of method expected, but no \'CODE\' tag was found!";
209
210 return;
211 }
212 }
213
214 template <typename T>
FillLiteralArrayData(pandasm::LiteralArray * lit_array,const panda_file::LiteralTag & tag,const panda_file::LiteralDataAccessor::LiteralValue & value) const215 void Disassembler::FillLiteralArrayData(pandasm::LiteralArray *lit_array, const panda_file::LiteralTag &tag,
216 const panda_file::LiteralDataAccessor::LiteralValue &value) const
217 {
218 panda_file::File::EntityId id(std::get<uint32_t>(value));
219 auto sp = file_->GetSpanFromId(id);
220 auto len = panda_file::helpers::Read<sizeof(uint32_t)>(&sp);
221 if (tag != panda_file::LiteralTag::ARRAY_STRING) {
222 for (size_t i = 0; i < len; i++) {
223 pandasm::LiteralArray::Literal lit;
224 lit.tag_ = tag;
225 lit.value_ = bit_cast<T>(panda_file::helpers::Read<sizeof(T)>(&sp));
226 lit_array->literals_.push_back(lit);
227 }
228 return;
229 }
230 for (size_t i = 0; i < len; i++) {
231 auto str_id = panda_file::helpers::Read<sizeof(T)>(&sp);
232 pandasm::LiteralArray::Literal lit;
233 lit.tag_ = tag;
234 lit.value_ = StringDataToString(file_->GetStringData(panda_file::File::EntityId(str_id)));
235 lit_array->literals_.push_back(lit);
236 }
237 }
238
FillLiteralData(pandasm::LiteralArray * lit_array,const panda_file::LiteralDataAccessor::LiteralValue & value,const panda_file::LiteralTag & tag) const239 void Disassembler::FillLiteralData(pandasm::LiteralArray *lit_array,
240 const panda_file::LiteralDataAccessor::LiteralValue &value,
241 const panda_file::LiteralTag &tag) const
242 {
243 pandasm::LiteralArray::Literal lit;
244 lit.tag_ = tag;
245 switch (tag) {
246 case panda_file::LiteralTag::BOOL: {
247 lit.value_ = std::get<bool>(value);
248 break;
249 }
250 case panda_file::LiteralTag::ACCESSOR:
251 case panda_file::LiteralTag::NULLVALUE:
252 case panda_file::LiteralTag::BUILTINTYPEINDEX: {
253 lit.value_ = std::get<uint8_t>(value);
254 break;
255 }
256 case panda_file::LiteralTag::METHODAFFILIATE: {
257 lit.value_ = std::get<uint16_t>(value);
258 break;
259 }
260 case panda_file::LiteralTag::LITERALBUFFERINDEX:
261 case panda_file::LiteralTag::INTEGER: {
262 lit.value_ = std::get<uint32_t>(value);
263 break;
264 }
265 case panda_file::LiteralTag::DOUBLE: {
266 lit.value_ = std::get<double>(value);
267 break;
268 }
269 case panda_file::LiteralTag::STRING:
270 case panda_file::LiteralTag::ETS_IMPLEMENTS: {
271 auto str_data = file_->GetStringData(panda_file::File::EntityId(std::get<uint32_t>(value)));
272 lit.value_ = StringDataToString(str_data);
273 break;
274 }
275 case panda_file::LiteralTag::METHOD:
276 case panda_file::LiteralTag::GETTER:
277 case panda_file::LiteralTag::SETTER:
278 case panda_file::LiteralTag::GENERATORMETHOD: {
279 panda_file::MethodDataAccessor mda(*file_, panda_file::File::EntityId(std::get<uint32_t>(value)));
280 lit.value_ = StringDataToString(file_->GetStringData(mda.GetNameId()));
281 break;
282 }
283 case panda_file::LiteralTag::LITERALARRAY: {
284 std::stringstream ss;
285 ss << "0x" << std::hex << std::get<uint32_t>(value);
286 lit.value_ = ss.str();
287 break;
288 }
289 case panda_file::LiteralTag::TAGVALUE: {
290 return;
291 }
292 default: {
293 UNREACHABLE();
294 }
295 }
296 lit_array->literals_.push_back(lit);
297 }
298
GetLiteralArrayByOffset(pandasm::LiteralArray * lit_array,panda_file::File::EntityId offset) const299 void Disassembler::GetLiteralArrayByOffset(pandasm::LiteralArray *lit_array, panda_file::File::EntityId offset) const
300 {
301 panda_file::LiteralDataAccessor lit_array_accessor(*file_, file_->GetLiteralArraysId());
302 lit_array_accessor.EnumerateLiteralVals(
303 offset, [this, lit_array](const panda_file::LiteralDataAccessor::LiteralValue &value,
304 const panda_file::LiteralTag &tag) {
305 switch (tag) {
306 case panda_file::LiteralTag::ARRAY_U1: {
307 FillLiteralArrayData<bool>(lit_array, tag, value);
308 break;
309 }
310 case panda_file::LiteralTag::ARRAY_I8:
311 case panda_file::LiteralTag::ARRAY_U8: {
312 FillLiteralArrayData<uint8_t>(lit_array, tag, value);
313 break;
314 }
315 case panda_file::LiteralTag::ARRAY_I16:
316 case panda_file::LiteralTag::ARRAY_U16: {
317 FillLiteralArrayData<uint16_t>(lit_array, tag, value);
318 break;
319 }
320 case panda_file::LiteralTag::ARRAY_I32:
321 case panda_file::LiteralTag::ARRAY_U32: {
322 FillLiteralArrayData<uint32_t>(lit_array, tag, value);
323 break;
324 }
325 case panda_file::LiteralTag::ARRAY_I64:
326 case panda_file::LiteralTag::ARRAY_U64: {
327 FillLiteralArrayData<uint64_t>(lit_array, tag, value);
328 break;
329 }
330 case panda_file::LiteralTag::ARRAY_F32: {
331 FillLiteralArrayData<float>(lit_array, tag, value);
332 break;
333 }
334 case panda_file::LiteralTag::ARRAY_F64: {
335 FillLiteralArrayData<double>(lit_array, tag, value);
336 break;
337 }
338 case panda_file::LiteralTag::ARRAY_STRING: {
339 FillLiteralArrayData<uint32_t>(lit_array, tag, value);
340 break;
341 }
342 default: {
343 FillLiteralData(lit_array, value, tag);
344 break;
345 }
346 }
347 });
348 }
349
GetLiteralArray(pandasm::LiteralArray * lit_array,size_t index) const350 void Disassembler::GetLiteralArray(pandasm::LiteralArray *lit_array, size_t index) const
351 {
352 panda_file::LiteralDataAccessor lit_array_accessor(*file_, file_->GetLiteralArraysId());
353 GetLiteralArrayByOffset(lit_array, lit_array_accessor.GetLiteralArrayId(index));
354 }
355
IsModuleLiteralOffset(const panda_file::File::EntityId & id) const356 bool Disassembler::IsModuleLiteralOffset(const panda_file::File::EntityId &id) const
357 {
358 return module_literals_.find(id.GetOffset()) != module_literals_.end();
359 }
360
GetLiteralArrays()361 void Disassembler::GetLiteralArrays()
362 {
363 if (panda_file::ContainsLiteralArrayInHeader(file_->GetHeader()->version)) {
364 const auto lit_arrays_id = file_->GetLiteralArraysId();
365 LOG(DEBUG, DISASSEMBLER) << "\n[getting literal arrays]\nid: " << lit_arrays_id << " (0x" << std::hex
366 << lit_arrays_id << ")";
367
368 panda_file::LiteralDataAccessor lda(*file_, lit_arrays_id);
369 size_t num_litarrays = lda.GetLiteralNum();
370 for (size_t index = 0; index < num_litarrays; index++) {
371 auto id = lda.GetLiteralArrayId(index);
372 if (module_request_phase_literals_.count(id.GetOffset())) {
373 continue;
374 }
375 FillLiteralArrayTable(id, index);
376 }
377 } else {
378 panda::libpandafile::CollectUtil collect_util;
379 std::unordered_set<uint32_t> literal_array_ids;
380 collect_util.CollectLiteralArray(*file_, literal_array_ids);
381 size_t index = 0;
382 for (uint32_t literal_array_id : literal_array_ids) {
383 panda_file::File::EntityId id {literal_array_id};
384 FillLiteralArrayTable(id, index);
385 index++;
386 }
387 }
388 }
389
FillLiteralArrayTable(panda_file::File::EntityId & id,size_t index)390 void Disassembler::FillLiteralArrayTable(panda_file::File::EntityId &id, size_t index)
391 {
392 if (IsModuleLiteralOffset(id)) {
393 std::stringstream ss;
394 ss << index << " 0x" << std::hex << id.GetOffset();
395 modulearray_table_.emplace(ss.str(), GetModuleLiteralArray(id));
396 return;
397 }
398 std::stringstream ss;
399 ss << index << " 0x" << std::hex << id.GetOffset();
400 panda::pandasm::LiteralArray lit_arr;
401 GetLiteralArrayByOffset(&lit_arr, id);
402 prog_.literalarray_table.emplace(ss.str(), lit_arr);
403 }
404
ModuleTagToString(panda_file::ModuleTag & tag) const405 std::string Disassembler::ModuleTagToString(panda_file::ModuleTag &tag) const
406 {
407 switch (tag) {
408 case panda_file::ModuleTag::REGULAR_IMPORT:
409 return "REGULAR_IMPORT";
410 case panda_file::ModuleTag::NAMESPACE_IMPORT:
411 return "NAMESPACE_IMPORT";
412 case panda_file::ModuleTag::LOCAL_EXPORT:
413 return "LOCAL_EXPORT";
414 case panda_file::ModuleTag::INDIRECT_EXPORT:
415 return "INDIRECT_EXPORT";
416 case panda_file::ModuleTag::STAR_EXPORT:
417 return "STAR_EXPORT";
418 default: {
419 UNREACHABLE();
420 break;
421 }
422 }
423 return "";
424 }
425
GetModuleLiteralArray(panda_file::File::EntityId & module_id) const426 std::vector<std::string> Disassembler::GetModuleLiteralArray(panda_file::File::EntityId &module_id) const
427 {
428 panda_file::ModuleDataAccessor mda(*file_, module_id);
429 const std::vector<uint32_t> &request_modules_offset = mda.getRequestModules();
430 std::vector<std::string> module_literal_array;
431 std::stringstream module_requests_stringstream;
432 module_requests_stringstream << "\tMODULE_REQUEST_ARRAY: {\n";
433 for (size_t index = 0; index < request_modules_offset.size(); ++index) {
434 module_requests_stringstream << "\t\t" << index <<
435 " : " << GetStringByOffset(request_modules_offset[index]) << ",\n";
436 }
437 module_requests_stringstream << "\t}";
438 module_literal_array.push_back(module_requests_stringstream.str());
439 mda.EnumerateModuleRecord([&](panda_file::ModuleTag tag, uint32_t export_name_offset, uint32_t request_module_idx,
440 uint32_t import_name_offset, uint32_t local_name_offset) {
441 std::stringstream ss;
442 ss << "\tModuleTag: " << ModuleTagToString(tag);
443 if (tag == panda_file::ModuleTag::REGULAR_IMPORT ||
444 tag == panda_file::ModuleTag::NAMESPACE_IMPORT || tag == panda_file::ModuleTag::LOCAL_EXPORT) {
445 if (!IsValidOffset(local_name_offset)) {
446 LOG(ERROR, DISASSEMBLER) << "Get invalid local name offset!" << std::endl;
447 return;
448 }
449 ss << ", local_name: " << GetStringByOffset(local_name_offset);
450 }
451 if (tag == panda_file::ModuleTag::LOCAL_EXPORT || tag == panda_file::ModuleTag::INDIRECT_EXPORT) {
452 if (!IsValidOffset(export_name_offset)) {
453 LOG(ERROR, DISASSEMBLER) << "Get invalid export name offset!" << std::endl;
454 return;
455 }
456 ss << ", export_name: " << GetStringByOffset(export_name_offset);
457 }
458 if (tag == panda_file::ModuleTag::REGULAR_IMPORT || tag == panda_file::ModuleTag::INDIRECT_EXPORT) {
459 if (!IsValidOffset(import_name_offset)) {
460 LOG(ERROR, DISASSEMBLER) << "Get invalid import name offset!" << std::endl;
461 return;
462 }
463 ss << ", import_name: " << GetStringByOffset(import_name_offset);
464 }
465 if (tag != panda_file::ModuleTag::LOCAL_EXPORT) {
466 if (request_module_idx >= request_modules_offset.size() ||
467 !IsValidOffset(request_modules_offset[request_module_idx])) {
468 LOG(ERROR, DISASSEMBLER) << "Get invalid request module offset!" << std::endl;
469 return;
470 }
471 ss << ", module_request: " << GetStringByOffset(request_modules_offset[request_module_idx]);
472 }
473 module_literal_array.push_back(ss.str());
474 });
475
476 return module_literal_array;
477 }
478
GetRecords()479 void Disassembler::GetRecords()
480 {
481 LOG(DEBUG, DISASSEMBLER) << "\n[getting records]\n";
482
483 const auto class_idx = file_->GetClasses();
484
485 for (size_t i = 0; i < class_idx.size(); i++) {
486 uint32_t class_id = class_idx[i];
487 auto class_off = file_->GetHeader()->class_idx_off + sizeof(uint32_t) * i;
488
489 if (class_id > file_->GetHeader()->file_size) {
490 LOG(ERROR, DISASSEMBLER) << "> error encountered in record at " << class_off << " (0x" << std::hex
491 << class_off << "). binary file corrupted. record offset (0x" << class_id
492 << ") out of bounds (0x" << file_->GetHeader()->file_size << ")!";
493 break;
494 }
495
496 const panda_file::File::EntityId record_id {class_id};
497 auto language = GetRecordLanguage(record_id);
498
499 pandasm::Record record("", language);
500 GetRecord(&record, record_id);
501
502 if (prog_.record_table.find(record.name) == prog_.record_table.end()) {
503 record_name_to_id_.emplace(record.name, record_id);
504 prog_.record_table.emplace(record.name, std::move(record));
505 }
506 }
507 }
508
GetFields(pandasm::Record * record,const panda_file::File::EntityId & record_id)509 void Disassembler::GetFields(pandasm::Record *record, const panda_file::File::EntityId &record_id)
510 {
511 panda_file::ClassDataAccessor class_accessor {*file_, record_id};
512
513 class_accessor.EnumerateFields([&](panda_file::FieldDataAccessor &field_accessor) -> void {
514 pandasm::Field field(record->language);
515
516 panda_file::File::EntityId field_name_id = field_accessor.GetNameId();
517 field.name = StringDataToString(file_->GetStringData(field_name_id));
518
519 uint32_t field_type = field_accessor.GetType();
520 field.type = FieldTypeToPandasmType(field_type);
521
522 GetMetaData(&field, field_accessor.GetFieldId(), record->name == ark::SCOPE_NAME_RECORD);
523
524 record->field_list.push_back(std::move(field));
525 });
526 }
527
GetMethods(const panda_file::File::EntityId & record_id)528 void Disassembler::GetMethods(const panda_file::File::EntityId &record_id)
529 {
530 panda_file::ClassDataAccessor class_accessor {*file_, record_id};
531
532 class_accessor.EnumerateMethods([&](panda_file::MethodDataAccessor &method_accessor) -> void {
533 AddMethodToTables(method_accessor.GetMethodId());
534 });
535 }
536
GetAnnotationElements(pandasm::Function & method,const panda_file::AnnotationDataAccessor & ada,const std::string & annotation_name)537 void Disassembler::GetAnnotationElements(pandasm::Function &method, const panda_file::AnnotationDataAccessor &ada,
538 const std::string &annotation_name)
539 {
540 uint32_t elem_count = ada.GetCount();
541 for (uint32_t i = 0; i < elem_count; i++) {
542 panda_file::AnnotationDataAccessor::Elem adae = ada.GetElement(i);
543 const auto &elem_name =
544 std::string {reinterpret_cast<const char *>(file_->GetStringData(adae.GetNameId()).data)};
545 panda_file::AnnotationDataAccessor::Tag tag = ada.GetTag(i);
546 auto value_type = pandasm::Value::GetCharAsType(tag.GetItem());
547 switch (value_type) {
548 case pandasm::Value::Type::U1: {
549 bool ann_elem_value = adae.GetScalarValue().Get<bool>();
550 AddAnnotationElement<bool>(method, annotation_name, elem_name, ann_elem_value);
551 break;
552 }
553 case pandasm::Value::Type::U32: {
554 uint32_t ann_elem_value = adae.GetScalarValue().Get<uint32_t>();
555 AddAnnotationElement<uint32_t>(method, annotation_name, elem_name, ann_elem_value);
556 break;
557 }
558 case pandasm::Value::Type::F64: {
559 double ann_elem_value = adae.GetScalarValue().Get<double>();
560 AddAnnotationElement<double>(method, annotation_name, elem_name, ann_elem_value);
561 break;
562 }
563 case pandasm::Value::Type::STRING: {
564 uint32_t string_id = adae.GetScalarValue().Get<uint32_t>();
565 std::string_view ann_elem_value {
566 reinterpret_cast<const char *>(file_->GetStringData(panda_file::File::EntityId(string_id)).data)};
567 AddAnnotationElement<std::string_view>(method, annotation_name, elem_name, ann_elem_value);
568 break;
569 }
570 case pandasm::Value::Type::LITERALARRAY: {
571 uint32_t literalArray_offset = adae.GetScalarValue().Get<uint32_t>();
572 AddAnnotationElement<panda::pandasm::LiteralArray, std::string_view>(
573 method, annotation_name, elem_name, std::string_view {std::to_string(literalArray_offset)});
574 break;
575 }
576 default:
577 UNREACHABLE();
578 }
579 }
580 }
581
GetMethodAnnotations(pandasm::Function & method,const panda_file::File::EntityId & method_id)582 void Disassembler::GetMethodAnnotations(pandasm::Function &method, const panda_file::File::EntityId &method_id)
583 {
584 panda_file::MethodDataAccessor mda(*file_, method_id);
585 mda.EnumerateAnnotations([&](panda_file::File::EntityId annotation_id) {
586 panda_file::AnnotationDataAccessor ada(*file_, annotation_id);
587 auto annotation_name =
588 std::string {reinterpret_cast<const char *>(file_->GetStringData(ada.GetClassId()).data)};
589 annotation_name.pop_back(); // remove ; from annotation name
590
591 if (annotation_name.empty()) {
592 return;
593 }
594
595 std::vector<pandasm::AnnotationData> method_annotation = method.metadata->GetAnnotations();
596 std::vector<pandasm::AnnotationElement> elements;
597 pandasm::AnnotationData ann_data(annotation_name, elements);
598 std::vector<pandasm::AnnotationData> annotations;
599 annotations.push_back(std::move(ann_data));
600 method.metadata->AddAnnotations(annotations);
601
602 GetAnnotationElements(method, ada, annotation_name);
603 });
604 }
605
606 template <typename T, typename U = T>
AddAnnotationElement(pandasm::Function & method,const std::string & annotation_name,const std::string & key,const U & value)607 void Disassembler::AddAnnotationElement(pandasm::Function &method, const std::string &annotation_name,
608 const std::string &key, const U &value)
609 {
610 if (key.empty()) {
611 return;
612 }
613
614 std::unique_ptr<pandasm::Value> pandasmValue;
615 if constexpr (std::is_same<T, uint32_t>::value) {
616 pandasmValue = std::move(
617 std::make_unique<pandasm::ScalarValue>(pandasm::ScalarValue::Create<pandasm::Value::Type::U32>(value)));
618 } else if constexpr (std::is_same<T, double>::value) {
619 pandasmValue = std::move(
620 std::make_unique<pandasm::ScalarValue>(pandasm::ScalarValue::Create<pandasm::Value::Type::F64>(value)));
621 } else if constexpr (std::is_same<T, bool>::value) {
622 pandasmValue = std::move(
623 std::make_unique<pandasm::ScalarValue>(pandasm::ScalarValue::Create<pandasm::Value::Type::U1>(value)));
624 } else if constexpr (std::is_same<T, std::string_view>::value) {
625 pandasmValue = std::move(
626 std::make_unique<pandasm::ScalarValue>(pandasm::ScalarValue::Create<pandasm::Value::Type::STRING>(value)));
627 } else if constexpr (std::is_same<T, panda::pandasm::LiteralArray>::value) {
628 static_assert(std::is_same<U, std::string_view>::value);
629 pandasmValue = std::move(std::make_unique<pandasm::ScalarValue>(
630 pandasm::ScalarValue::Create<pandasm::Value::Type::LITERALARRAY>(value)));
631 } else {
632 UNREACHABLE();
633 }
634
635 std::vector<pandasm::AnnotationData> method_annotation = method.metadata->GetAnnotations();
636 const auto ann_iter =
637 std::find_if(method_annotation.begin(), method_annotation.end(),
638 [&](pandasm::AnnotationData &ann) -> bool { return ann.GetName() == annotation_name; });
639
640 pandasm::AnnotationElement annotation_element(key, std::move(pandasmValue));
641 ann_iter->AddElement(std::move(annotation_element));
642 method.metadata->SetAnnotations(std::move(method_annotation));
643 }
644
GetAnnotationByMethodName(const std::string & method_name) const645 std::optional<std::vector<std::string>> Disassembler::GetAnnotationByMethodName(const std::string &method_name) const
646 {
647 const auto method_synonyms_iter = prog_.function_synonyms.find(method_name);
648 bool is_signature = method_synonyms_iter != prog_.function_synonyms.end();
649 if (!is_signature) {
650 return std::nullopt;
651 }
652
653 const auto method_iter = prog_.function_table.find(method_synonyms_iter->second.back());
654 bool is_method = method_iter != prog_.function_table.end();
655 const auto annotations = method_iter->second.metadata->GetAnnotations();
656 if (!is_method || annotations.empty()) {
657 return std::nullopt;
658 }
659
660 std::vector<std::string> ann;
661 for (const auto &ann_data : annotations) {
662 ann.emplace_back(ann_data.GetName());
663 }
664 return ann;
665 }
666
GetSerializedMethodAnnotation(const std::string & method_name,const std::string & anno_name) const667 std::optional<std::string> Disassembler::GetSerializedMethodAnnotation(const std::string &method_name,
668 const std::string &anno_name) const
669 {
670 const auto method_synonyms_iter = prog_.function_synonyms.find(method_name);
671 if (method_synonyms_iter == prog_.function_synonyms.end()) {
672 return std::nullopt;
673 }
674
675 const auto method_iter = prog_.function_table.find(method_synonyms_iter->second.back());
676 if (method_iter == prog_.function_table.end()) {
677 return std::nullopt;
678 }
679
680 const auto annotations = method_iter->second.metadata->GetAnnotations();
681 if (annotations.empty()) {
682 return std::nullopt;
683 }
684
685 const auto annotation_iter =
686 std::find_if(annotations.begin(), annotations.end(),
687 [&](const pandasm::AnnotationData &ann) -> bool { return ann.GetName() == anno_name; });
688 if (annotation_iter == annotations.end()) {
689 return std::nullopt;
690 }
691
692 std::ostringstream os;
693 SerializeMethodAnnotation(*annotation_iter, os);
694 return os.str();
695 }
696
GetSerializedRecord(const std::string & record_name) const697 std::optional<std::string> Disassembler::GetSerializedRecord(const std::string &record_name) const
698 {
699 const auto record_iter = prog_.record_table.find(record_name);
700 if (record_iter == prog_.record_table.end()) {
701 return std::nullopt;
702 }
703 std::ostringstream os;
704 Serialize(record_iter->second, os, false);
705 return os.str();
706 }
707
GetStrings() const708 std::vector<std::string> Disassembler::GetStrings() const
709 {
710 std::vector<std::string> strings;
711 for (auto &str_info : string_offset_to_name_) {
712 strings.emplace_back(str_info.second);
713 }
714
715 return strings;
716 }
717
GetModuleLiterals() const718 std::vector<std::string> Disassembler::GetModuleLiterals() const
719 {
720 std::vector<std::string> module_literals;
721 for (auto &module_array : modulearray_table_) {
722 for (auto &module : module_array.second) {
723 module_literals.emplace_back(module);
724 }
725 }
726
727 return module_literals;
728 }
729
GetParams(pandasm::Function * method,const panda_file::File::EntityId & code_id) const730 void Disassembler::GetParams(pandasm::Function *method, const panda_file::File::EntityId &code_id) const
731 {
732 /**
733 * frame size - 2^16 - 1
734 */
735 static const uint32_t MAX_ARG_NUM = 0xFFFF;
736
737 LOG(DEBUG, DISASSEMBLER) << "[getting params number]\ncode id: " << code_id << " (0x" << std::hex << code_id << ")";
738
739 if (method == nullptr) {
740 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
741
742 return;
743 }
744
745 panda_file::CodeDataAccessor code_accessor(*file_, code_id);
746
747 auto params_num = code_accessor.GetNumArgs();
748 if (params_num > MAX_ARG_NUM) {
749 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
750 << "). number of function's arguments (" << std::dec << params_num
751 << ") exceeds MAX_ARG_NUM (" << MAX_ARG_NUM << ") !";
752
753 return;
754 }
755
756 method->return_type = pandasm::Type("any", 0);
757
758 for (uint8_t i = 0; i < params_num; i++) {
759 method->params.push_back(pandasm::Function::Parameter(pandasm::Type("any", 0), method->language));
760 }
761 }
762
GetExceptions(pandasm::Function * method,panda_file::File::EntityId method_id,panda_file::File::EntityId code_id) const763 LabelTable Disassembler::GetExceptions(pandasm::Function *method, panda_file::File::EntityId method_id,
764 panda_file::File::EntityId code_id) const
765 {
766 LOG(DEBUG, DISASSEMBLER) << "[getting exceptions]\ncode id: " << code_id << " (0x" << std::hex << code_id << ")";
767
768 if (method == nullptr) {
769 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!\n";
770 return LabelTable {};
771 }
772
773 panda_file::CodeDataAccessor code_accessor(*file_, code_id);
774
775 const auto bc_ins = BytecodeInstruction(code_accessor.GetInstructions());
776 const auto bc_ins_last = bc_ins.JumpTo(code_accessor.GetCodeSize());
777
778 size_t try_idx = 0;
779 LabelTable label_table {};
780 code_accessor.EnumerateTryBlocks([&](panda_file::CodeDataAccessor::TryBlock &try_block) {
781 pandasm::Function::CatchBlock catch_block_pa {};
782 if (!LocateTryBlock(bc_ins, bc_ins_last, try_block, &catch_block_pa, &label_table, try_idx)) {
783 return false;
784 }
785 size_t catch_idx = 0;
786 try_block.EnumerateCatchBlocks([&](panda_file::CodeDataAccessor::CatchBlock &catch_block) {
787 auto class_idx = catch_block.GetTypeIdx();
788 if (class_idx == panda_file::INVALID_INDEX) {
789 catch_block_pa.exception_record = "";
790 } else {
791 const auto class_id = file_->ResolveClassIndex(method_id, class_idx);
792 catch_block_pa.exception_record = GetFullRecordName(class_id);
793 }
794 if (!LocateCatchBlock(bc_ins, bc_ins_last, catch_block, &catch_block_pa, &label_table, try_idx,
795 catch_idx)) {
796 return false;
797 }
798
799 method->catch_blocks.push_back(catch_block_pa);
800 catch_block_pa.catch_begin_label = "";
801 catch_block_pa.catch_end_label = "";
802 catch_idx++;
803
804 return true;
805 });
806 try_idx++;
807
808 return true;
809 });
810
811 return label_table;
812 }
813
getBytecodeInstructionNumber(BytecodeInstruction bc_ins_first,BytecodeInstruction bc_ins_cur)814 static size_t getBytecodeInstructionNumber(BytecodeInstruction bc_ins_first, BytecodeInstruction bc_ins_cur)
815 {
816 size_t count = 0;
817
818 while (bc_ins_first.GetAddress() != bc_ins_cur.GetAddress()) {
819 count++;
820 bc_ins_first = bc_ins_first.GetNext();
821 if (bc_ins_first.GetAddress() > bc_ins_cur.GetAddress()) {
822 return std::numeric_limits<size_t>::max();
823 }
824 }
825
826 return count;
827 }
828
LocateTryBlock(const BytecodeInstruction & bc_ins,const BytecodeInstruction & bc_ins_last,const panda_file::CodeDataAccessor::TryBlock & try_block,pandasm::Function::CatchBlock * catch_block_pa,LabelTable * label_table,size_t try_idx) const829 bool Disassembler::LocateTryBlock(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last,
830 const panda_file::CodeDataAccessor::TryBlock &try_block,
831 pandasm::Function::CatchBlock *catch_block_pa, LabelTable *label_table,
832 size_t try_idx) const
833 {
834 const auto try_begin_bc_ins = bc_ins.JumpTo(try_block.GetStartPc());
835 const auto try_end_bc_ins = bc_ins.JumpTo(try_block.GetStartPc() + try_block.GetLength());
836
837 const size_t try_begin_idx = getBytecodeInstructionNumber(bc_ins, try_begin_bc_ins);
838 const size_t try_end_idx = getBytecodeInstructionNumber(bc_ins, try_end_bc_ins);
839
840 const bool try_begin_offset_in_range = bc_ins_last.GetAddress() > try_begin_bc_ins.GetAddress();
841 const bool try_end_offset_in_range = bc_ins_last.GetAddress() >= try_end_bc_ins.GetAddress();
842 const bool try_begin_offset_valid = try_begin_idx != std::numeric_limits<size_t>::max();
843 const bool try_end_offset_valid = try_end_idx != std::numeric_limits<size_t>::max();
844
845 if (!try_begin_offset_in_range || !try_begin_offset_valid) {
846 LOG(ERROR, DISASSEMBLER) << "> invalid try block begin offset! address is: 0x" << std::hex
847 << try_begin_bc_ins.GetAddress();
848 return false;
849 } else {
850 std::stringstream ss {};
851 ss << "try_begin_label_" << try_idx;
852
853 LabelTable::iterator it = label_table->find(try_begin_idx);
854 if (it == label_table->end()) {
855 catch_block_pa->try_begin_label = ss.str();
856 label_table->insert(std::pair<size_t, std::string>(try_begin_idx, ss.str()));
857 } else {
858 catch_block_pa->try_begin_label = it->second;
859 }
860 }
861
862 if (!try_end_offset_in_range || !try_end_offset_valid) {
863 LOG(ERROR, DISASSEMBLER) << "> invalid try block end offset! address is: 0x" << std::hex
864 << try_end_bc_ins.GetAddress();
865 return false;
866 } else {
867 std::stringstream ss {};
868 ss << "try_end_label_" << try_idx;
869
870 LabelTable::iterator it = label_table->find(try_end_idx);
871 if (it == label_table->end()) {
872 catch_block_pa->try_end_label = ss.str();
873 label_table->insert(std::pair<size_t, std::string>(try_end_idx, ss.str()));
874 } else {
875 catch_block_pa->try_end_label = it->second;
876 }
877 }
878
879 return true;
880 }
881
LocateCatchBlock(const BytecodeInstruction & bc_ins,const BytecodeInstruction & bc_ins_last,const panda_file::CodeDataAccessor::CatchBlock & catch_block,pandasm::Function::CatchBlock * catch_block_pa,LabelTable * label_table,size_t try_idx,size_t catch_idx) const882 bool Disassembler::LocateCatchBlock(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last,
883 const panda_file::CodeDataAccessor::CatchBlock &catch_block,
884 pandasm::Function::CatchBlock *catch_block_pa, LabelTable *label_table,
885 size_t try_idx, size_t catch_idx) const
886 {
887 const auto handler_begin_offset = catch_block.GetHandlerPc();
888 const auto handler_end_offset = handler_begin_offset + catch_block.GetCodeSize();
889
890 const auto handler_begin_bc_ins = bc_ins.JumpTo(handler_begin_offset);
891 const auto handler_end_bc_ins = bc_ins.JumpTo(handler_end_offset);
892
893 const size_t handler_begin_idx = getBytecodeInstructionNumber(bc_ins, handler_begin_bc_ins);
894 const size_t handler_end_idx = getBytecodeInstructionNumber(bc_ins, handler_end_bc_ins);
895
896 const bool handler_begin_offset_in_range = bc_ins_last.GetAddress() > handler_begin_bc_ins.GetAddress();
897 const bool handler_end_offset_in_range = bc_ins_last.GetAddress() >= handler_end_bc_ins.GetAddress();
898 const bool handler_end_present = catch_block.GetCodeSize() != 0;
899 const bool handler_begin_offset_valid = handler_begin_idx != std::numeric_limits<size_t>::max();
900 const bool handler_end_offset_valid = handler_end_idx != std::numeric_limits<size_t>::max();
901
902 if (!handler_begin_offset_in_range || !handler_begin_offset_valid) {
903 LOG(ERROR, DISASSEMBLER) << "> invalid catch block begin offset! address is: 0x" << std::hex
904 << handler_begin_bc_ins.GetAddress();
905 return false;
906 } else {
907 std::stringstream ss {};
908 ss << "handler_begin_label_" << try_idx << "_" << catch_idx;
909
910 LabelTable::iterator it = label_table->find(handler_begin_idx);
911 if (it == label_table->end()) {
912 catch_block_pa->catch_begin_label = ss.str();
913 label_table->insert(std::pair<size_t, std::string>(handler_begin_idx, ss.str()));
914 } else {
915 catch_block_pa->catch_begin_label = it->second;
916 }
917 }
918
919 if (!handler_end_offset_in_range || !handler_end_offset_valid) {
920 LOG(ERROR, DISASSEMBLER) << "> invalid catch block end offset! address is: 0x" << std::hex
921 << handler_end_bc_ins.GetAddress();
922 return false;
923 } else if (handler_end_present) {
924 std::stringstream ss {};
925 ss << "handler_end_label_" << try_idx << "_" << catch_idx;
926
927 LabelTable::iterator it = label_table->find(handler_end_idx);
928 if (it == label_table->end()) {
929 catch_block_pa->catch_end_label = ss.str();
930 label_table->insert(std::pair<size_t, std::string>(handler_end_idx, ss.str()));
931 } else {
932 catch_block_pa->catch_end_label = it->second;
933 }
934 }
935
936 return true;
937 }
938
GetMetaData(pandasm::Function * method,const panda_file::File::EntityId & method_id) const939 void Disassembler::GetMetaData(pandasm::Function *method, const panda_file::File::EntityId &method_id) const
940 {
941 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nmethod id: " << method_id << " (0x" << std::hex << method_id
942 << ")";
943
944 if (method == nullptr) {
945 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
946
947 return;
948 }
949
950 panda_file::MethodDataAccessor method_accessor(*file_, method_id);
951
952 const auto method_name_raw = StringDataToString(file_->GetStringData(method_accessor.GetNameId()));
953
954 if (!method_accessor.IsStatic()) {
955 const auto class_name = StringDataToString(file_->GetStringData(method_accessor.GetClassId()));
956 auto this_type = pandasm::Type::FromDescriptor(class_name);
957
958 LOG(DEBUG, DISASSEMBLER) << "method (raw: \'" << method_name_raw
959 << "\') is not static. emplacing self-argument of type " << this_type.GetName();
960
961 method->params.insert(method->params.begin(), pandasm::Function::Parameter(this_type, method->language));
962 } else {
963 method->metadata->SetAttribute("static");
964 }
965
966 if (file_->IsExternal(method_accessor.GetMethodId())) {
967 method->metadata->SetAttribute("external");
968 }
969
970 std::string ctor_name = panda::panda_file::GetCtorName(method->language);
971 std::string cctor_name = panda::panda_file::GetCctorName(method->language);
972
973 const bool is_ctor = (method_name_raw == ctor_name);
974 const bool is_cctor = (method_name_raw == cctor_name);
975
976 if (is_ctor) {
977 method->metadata->SetAttribute("ctor");
978 method->name.replace(method->name.find(ctor_name), ctor_name.length(), "_ctor_");
979 } else if (is_cctor) {
980 method->metadata->SetAttribute("cctor");
981 method->name.replace(method->name.find(cctor_name), cctor_name.length(), "_cctor_");
982 }
983 }
984
GetMetaData(pandasm::Record * record,const panda_file::File::EntityId & record_id) const985 void Disassembler::GetMetaData(pandasm::Record *record, const panda_file::File::EntityId &record_id) const
986 {
987 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nrecord id: " << record_id << " (0x" << std::hex << record_id
988 << ")";
989
990 if (record == nullptr) {
991 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but record ptr expected!";
992
993 return;
994 }
995
996 if (file_->IsExternal(record_id)) {
997 record->metadata->SetAttribute("external");
998 }
999 }
1000
GetMetadataFieldValue(panda_file::FieldDataAccessor & field_accessor,pandasm::Field * field,bool isScopeNamesRecord)1001 void Disassembler::GetMetadataFieldValue(panda_file::FieldDataAccessor &field_accessor, pandasm::Field *field,
1002 bool isScopeNamesRecord)
1003 {
1004 if (field->type.GetId() == panda_file::Type::TypeId::U32) {
1005 const auto offset = field_accessor.GetValue<uint32_t>().value();
1006 bool isScopeNameField = isScopeNamesRecord || field->name == ark::SCOPE_NAMES;
1007 if (field->name == ark::MODULE_REQUEST_PAHSE_IDX) {
1008 module_request_phase_literals_.insert(offset);
1009 } else if (field->name != ark::TYPE_SUMMARY_FIELD_NAME && !isScopeNameField) {
1010 LOG(DEBUG, DISASSEMBLER) << "Module literalarray " << field->name << " at offset 0x" << std::hex << offset
1011 << " is excluded";
1012 module_literals_.insert(offset);
1013 }
1014 field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::U32>(offset));
1015 } else if (field->type.GetId() == panda_file::Type::TypeId::U8) {
1016 const uint8_t val = field_accessor.GetValue<uint8_t>().value();
1017 field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::U8>(val));
1018 } else if (field->type.GetId() == panda_file::Type::TypeId::F64) {
1019 std::optional<double> val = field_accessor.GetValue<double>();
1020 if (val.has_value()) {
1021 field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::F64>(val.value()));
1022 }
1023 } else if (field->type.GetId() == panda_file::Type::TypeId::U1) {
1024 std::optional<bool> val = field_accessor.GetValue<bool>();
1025 if (val.has_value()) {
1026 field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::U1>(val.value()));
1027 }
1028 } else if (field->type.GetId() == panda_file::Type::TypeId::REFERENCE && field->type.GetName() == "panda.String") {
1029 std::optional<uint32_t> string_offset_val = field_accessor.GetValue<uint32_t>();
1030 if (string_offset_val.has_value()) {
1031 std::string_view val {reinterpret_cast<const char *>(
1032 file_->GetStringData(panda_file::File::EntityId(string_offset_val.value())).data)};
1033 field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::STRING>(val));
1034 }
1035 } else if (field->type.GetRank() > 0) {
1036 std::optional<uint32_t> litarray_offset_val = field_accessor.GetValue<uint32_t>();
1037 if (litarray_offset_val.has_value()) {
1038 field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::LITERALARRAY>(
1039 std::string_view {std::to_string(litarray_offset_val.value())}));
1040 }
1041 } else {
1042 UNREACHABLE();
1043 }
1044 }
1045
GetMetaData(pandasm::Field * field,const panda_file::File::EntityId & field_id,bool is_scope_names_record)1046 void Disassembler::GetMetaData(pandasm::Field *field, const panda_file::File::EntityId &field_id,
1047 bool is_scope_names_record)
1048 {
1049 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nfield id: " << field_id << " (0x" << std::hex << field_id << ")";
1050
1051 if (field == nullptr) {
1052 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
1053
1054 return;
1055 }
1056
1057 panda_file::FieldDataAccessor field_accessor(*file_, field_id);
1058
1059 if (field_accessor.IsExternal()) {
1060 field->metadata->SetAttribute("external");
1061 }
1062
1063 if (field_accessor.IsStatic()) {
1064 field->metadata->SetAttribute("static");
1065 }
1066
1067 GetMetadataFieldValue(field_accessor, field, is_scope_names_record);
1068 }
1069
AnnotationTagToString(const char tag) const1070 std::string Disassembler::AnnotationTagToString(const char tag) const
1071 {
1072 switch (tag) {
1073 case '1':
1074 return "u1";
1075 case '2':
1076 return "i8";
1077 case '3':
1078 return "u8";
1079 case '4':
1080 return "i16";
1081 case '5':
1082 return "u16";
1083 case '6':
1084 return "i32";
1085 case '7':
1086 return "u32";
1087 case '8':
1088 return "i64";
1089 case '9':
1090 return "u64";
1091 case 'A':
1092 return "f32";
1093 case 'B':
1094 return "f64";
1095 case 'C':
1096 return "string";
1097 case 'D':
1098 return "record";
1099 case 'E':
1100 return "method";
1101 case 'F':
1102 return "enum";
1103 case 'G':
1104 return "annotation";
1105 case 'I':
1106 return "void";
1107 case 'J':
1108 return "method_handle";
1109 case 'K':
1110 return "u1[]";
1111 case 'L':
1112 return "i8[]";
1113 case 'M':
1114 return "u8[]";
1115 case 'N':
1116 return "i16[]";
1117 case 'O':
1118 return "u16[]";
1119 case 'P':
1120 return "i32[]";
1121 case 'Q':
1122 return "u32[]";
1123 case 'R':
1124 return "i64[]";
1125 case 'S':
1126 return "u64[]";
1127 case 'T':
1128 return "f32[]";
1129 case 'U':
1130 return "f64[]";
1131 case 'V':
1132 return "string[]";
1133 case 'W':
1134 return "record[]";
1135 case 'X':
1136 return "method[]";
1137 case 'Y':
1138 return "enum[]";
1139 case 'Z':
1140 return "annotation[]";
1141 case '@':
1142 return "method_handle[]";
1143 case '*':
1144 return "nullptr string";
1145 default:
1146 return std::string();
1147 }
1148 }
1149
ScalarValueToString(const panda_file::ScalarValue & value,const std::string & type)1150 std::string Disassembler::ScalarValueToString(const panda_file::ScalarValue &value, const std::string &type)
1151 {
1152 std::stringstream ss;
1153
1154 if (type == "i8") {
1155 int8_t res = value.Get<int8_t>();
1156 ss << static_cast<int>(res);
1157 } else if (type == "u1" || type == "u8") {
1158 uint8_t res = value.Get<uint8_t>();
1159 ss << static_cast<unsigned int>(res);
1160 } else if (type == "i16") {
1161 ss << value.Get<int16_t>();
1162 } else if (type == "u16") {
1163 ss << value.Get<uint16_t>();
1164 } else if (type == "i32") {
1165 ss << value.Get<int32_t>();
1166 } else if (type == "u32") {
1167 ss << value.Get<uint32_t>();
1168 } else if (type == "i64") {
1169 ss << value.Get<int64_t>();
1170 } else if (type == "u64") {
1171 ss << value.Get<uint64_t>();
1172 } else if (type == "f32") {
1173 ss << value.Get<float>();
1174 } else if (type == "f64") {
1175 ss << value.Get<double>();
1176 } else if (type == "string") {
1177 const auto id = value.Get<panda_file::File::EntityId>();
1178 ss << "\"" << StringDataToString(file_->GetStringData(id)) << "\"";
1179 } else if (type == "record") {
1180 const auto id = value.Get<panda_file::File::EntityId>();
1181 ss << GetFullRecordName(id);
1182 } else if (type == "method") {
1183 const auto id = value.Get<panda_file::File::EntityId>();
1184 AddMethodToTables(id);
1185 ss << GetMethodSignature(id);
1186 } else if (type == "enum") {
1187 const auto id = value.Get<panda_file::File::EntityId>();
1188 panda_file::FieldDataAccessor field_accessor(*file_, id);
1189 ss << GetFullRecordName(field_accessor.GetClassId()) << "."
1190 << StringDataToString(file_->GetStringData(field_accessor.GetNameId()));
1191 } else if (type == "annotation") {
1192 const auto id = value.Get<panda_file::File::EntityId>();
1193 ss << "id_" << id;
1194 } else if (type == "void") {
1195 return std::string();
1196 } else if (type == "method_handle") {
1197 }
1198
1199 return ss.str();
1200 }
1201
ArrayValueToString(const panda_file::ArrayValue & value,const std::string & type,const size_t idx)1202 std::string Disassembler::ArrayValueToString(const panda_file::ArrayValue &value, const std::string &type,
1203 const size_t idx)
1204 {
1205 std::stringstream ss;
1206
1207 if (type == "i8") {
1208 int8_t res = value.Get<int8_t>(idx);
1209 ss << static_cast<int>(res);
1210 } else if (type == "u1" || type == "u8") {
1211 uint8_t res = value.Get<uint8_t>(idx);
1212 ss << static_cast<unsigned int>(res);
1213 } else if (type == "i16") {
1214 ss << value.Get<int16_t>(idx);
1215 } else if (type == "u16") {
1216 ss << value.Get<uint16_t>(idx);
1217 } else if (type == "i32") {
1218 ss << value.Get<int32_t>(idx);
1219 } else if (type == "u32") {
1220 ss << value.Get<uint32_t>(idx);
1221 } else if (type == "i64") {
1222 ss << value.Get<int64_t>(idx);
1223 } else if (type == "u64") {
1224 ss << value.Get<uint64_t>(idx);
1225 } else if (type == "f32") {
1226 ss << value.Get<float>(idx);
1227 } else if (type == "f64") {
1228 ss << value.Get<double>(idx);
1229 } else if (type == "string") {
1230 const auto id = value.Get<panda_file::File::EntityId>(idx);
1231 ss << '\"' << StringDataToString(file_->GetStringData(id)) << '\"';
1232 } else if (type == "record") {
1233 const auto id = value.Get<panda_file::File::EntityId>(idx);
1234 ss << GetFullRecordName(id);
1235 } else if (type == "method") {
1236 const auto id = value.Get<panda_file::File::EntityId>(idx);
1237 AddMethodToTables(id);
1238 ss << GetMethodSignature(id);
1239 } else if (type == "enum") {
1240 const auto id = value.Get<panda_file::File::EntityId>(idx);
1241 panda_file::FieldDataAccessor field_accessor(*file_, id);
1242 ss << GetFullRecordName(field_accessor.GetClassId()) << "."
1243 << StringDataToString(file_->GetStringData(field_accessor.GetNameId()));
1244 } else if (type == "annotation") {
1245 const auto id = value.Get<panda_file::File::EntityId>(idx);
1246 ss << "id_" << id;
1247 } else if (type == "method_handle") {
1248 } else if (type == "nullptr string") {
1249 }
1250
1251 return ss.str();
1252 }
1253
GetFullMethodName(const panda_file::File::EntityId & method_id) const1254 std::string Disassembler::GetFullMethodName(const panda_file::File::EntityId &method_id) const
1255 {
1256 panda::panda_file::MethodDataAccessor method_accessor(*file_, method_id);
1257
1258 const auto method_name_raw = StringDataToString(file_->GetStringData(method_accessor.GetNameId()));
1259
1260 std::string class_name = GetFullRecordName(method_accessor.GetClassId());
1261 if (IsSystemType(class_name)) {
1262 class_name = "";
1263 } else {
1264 class_name += ".";
1265 }
1266
1267 return class_name + method_name_raw;
1268 }
1269
GetMethodSignature(const panda_file::File::EntityId & method_id) const1270 std::string Disassembler::GetMethodSignature(const panda_file::File::EntityId &method_id) const
1271 {
1272 panda::panda_file::MethodDataAccessor method_accessor(*file_, method_id);
1273
1274 pandasm::Function method(GetFullMethodName(method_id), GetMethodLanguage(method_id));
1275 if (method_accessor.GetCodeId().has_value()) {
1276 GetParams(&method, method_accessor.GetCodeId().value());
1277 }
1278 GetMetaData(&method, method_id);
1279
1280 return pandasm::GetFunctionSignatureFromName(method.name, method.params);
1281 }
1282
GetFullRecordName(const panda_file::File::EntityId & class_id) const1283 std::string Disassembler::GetFullRecordName(const panda_file::File::EntityId &class_id) const
1284 {
1285 std::string name = StringDataToString(file_->GetStringData(class_id));
1286
1287 auto type = pandasm::Type::FromDescriptor(name);
1288 type = pandasm::Type(type.GetComponentName(), type.GetRank());
1289
1290 return type.GetPandasmName();
1291 }
1292
GetRecordInfo(const panda_file::File::EntityId & record_id,RecordInfo * record_info) const1293 void Disassembler::GetRecordInfo(const panda_file::File::EntityId &record_id, RecordInfo *record_info) const
1294 {
1295 constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
1296
1297 if (file_->IsExternal(record_id)) {
1298 return;
1299 }
1300
1301 panda_file::ClassDataAccessor class_accessor {*file_, record_id};
1302 std::stringstream ss;
1303
1304 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1305 << class_accessor.GetClassId() << ", size: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH)
1306 << class_accessor.GetSize() << " (" << std::dec << class_accessor.GetSize() << ")";
1307
1308 record_info->record_info = ss.str();
1309 ss.str(std::string());
1310
1311 class_accessor.EnumerateFields([&](panda_file::FieldDataAccessor &field_accessor) -> void {
1312 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1313 << field_accessor.GetFieldId();
1314
1315 record_info->fields_info.push_back(ss.str());
1316
1317 ss.str(std::string());
1318 });
1319 }
1320
GetMethodInfo(const panda_file::File::EntityId & method_id,MethodInfo * method_info) const1321 void Disassembler::GetMethodInfo(const panda_file::File::EntityId &method_id, MethodInfo *method_info) const
1322 {
1323 constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
1324
1325 panda_file::MethodDataAccessor method_accessor {*file_, method_id};
1326 std::stringstream ss;
1327
1328 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1329 << method_accessor.GetMethodId();
1330
1331 if (method_accessor.GetCodeId().has_value()) {
1332 ss << ", code offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1333 << method_accessor.GetCodeId().value();
1334
1335 GetInsInfo(method_accessor.GetCodeId().value(), method_info);
1336 } else {
1337 ss << ", <no code>";
1338 }
1339
1340 method_info->method_info = ss.str();
1341
1342 if (method_accessor.GetCodeId()) {
1343 ASSERT(debug_info_extractor_ != nullptr);
1344 method_info->line_number_table = debug_info_extractor_->GetLineNumberTable(method_id);
1345 method_info->column_number_table = debug_info_extractor_->GetColumnNumberTable(method_id);
1346 method_info->local_variable_table = debug_info_extractor_->GetLocalVariableTable(method_id);
1347
1348 // Add information about parameters into the table
1349 panda_file::CodeDataAccessor codeda(*file_, method_accessor.GetCodeId().value());
1350 auto arg_idx = static_cast<int32_t>(codeda.GetNumVregs());
1351 uint32_t code_size = codeda.GetCodeSize();
1352 for (auto info : debug_info_extractor_->GetParameterInfo(method_id)) {
1353 panda_file::LocalVariableInfo arg_info {info.name, info.signature, "", arg_idx++, 0, code_size};
1354 method_info->local_variable_table.emplace_back(arg_info);
1355 }
1356 }
1357 }
1358
IsArray(const panda_file::LiteralTag & tag)1359 static bool IsArray(const panda_file::LiteralTag &tag)
1360 {
1361 switch (tag) {
1362 case panda_file::LiteralTag::ARRAY_U1:
1363 case panda_file::LiteralTag::ARRAY_U8:
1364 case panda_file::LiteralTag::ARRAY_I8:
1365 case panda_file::LiteralTag::ARRAY_U16:
1366 case panda_file::LiteralTag::ARRAY_I16:
1367 case panda_file::LiteralTag::ARRAY_U32:
1368 case panda_file::LiteralTag::ARRAY_I32:
1369 case panda_file::LiteralTag::ARRAY_U64:
1370 case panda_file::LiteralTag::ARRAY_I64:
1371 case panda_file::LiteralTag::ARRAY_F32:
1372 case panda_file::LiteralTag::ARRAY_F64:
1373 case panda_file::LiteralTag::ARRAY_STRING:
1374 return true;
1375 default:
1376 return false;
1377 }
1378 }
1379
SerializeLiteralArray(const pandasm::LiteralArray & lit_array) const1380 std::string Disassembler::SerializeLiteralArray(const pandasm::LiteralArray &lit_array) const
1381 {
1382 std::stringstream ret;
1383 if (lit_array.literals_.empty()) {
1384 return "";
1385 }
1386
1387 std::stringstream ss;
1388 ss << "{ ";
1389 const auto &tag = lit_array.literals_[0].tag_;
1390 if (IsArray(tag)) {
1391 ss << LiteralTagToString(tag);
1392 }
1393 ss << lit_array.literals_.size();
1394 ss << " [ ";
1395 SerializeValues(lit_array, ss);
1396 ss << "]}";
1397 return ss.str();
1398 }
1399
Serialize(const std::string & key,const pandasm::LiteralArray & lit_array,std::ostream & os) const1400 void Disassembler::Serialize(const std::string &key, const pandasm::LiteralArray &lit_array, std::ostream &os) const
1401 {
1402 os << key << " ";
1403 os << SerializeLiteralArray(lit_array);
1404 os << "\n";
1405 }
1406
Serialize(const std::string & module_offset,const std::vector<std::string> & module_array,std::ostream & os) const1407 void Disassembler::Serialize(const std::string &module_offset, const std::vector<std::string> &module_array,
1408 std::ostream &os) const
1409 {
1410 os << module_offset << " ";
1411 os << SerializeModuleLiteralArray(module_array);
1412 os << "\n";
1413 }
1414
SerializeModuleLiteralArray(const std::vector<std::string> & module_array) const1415 std::string Disassembler::SerializeModuleLiteralArray(const std::vector<std::string> &module_array) const
1416 {
1417 if (module_array.empty()) {
1418 return "";
1419 }
1420
1421 std::stringstream ss;
1422 ss << "{ ";
1423 ss << (module_array.size() - 1); // Only needs to show the count of module tag, exclude module request array
1424 ss << " [\n";
1425 for (size_t index = 0; index < module_array.size(); index++) {
1426 ss << module_array[index] << ";\n";
1427 }
1428 ss << "]}";
1429 return ss.str();
1430 }
1431
LiteralTagToString(const panda_file::LiteralTag & tag) const1432 std::string Disassembler::LiteralTagToString(const panda_file::LiteralTag &tag) const
1433 {
1434 switch (tag) {
1435 case panda_file::LiteralTag::BOOL:
1436 case panda_file::LiteralTag::ARRAY_U1:
1437 return "u1";
1438 case panda_file::LiteralTag::ARRAY_U8:
1439 return "u8";
1440 case panda_file::LiteralTag::ARRAY_I8:
1441 return "i8";
1442 case panda_file::LiteralTag::ARRAY_U16:
1443 return "u16";
1444 case panda_file::LiteralTag::ARRAY_I16:
1445 return "i16";
1446 case panda_file::LiteralTag::ARRAY_U32:
1447 return "u32";
1448 case panda_file::LiteralTag::INTEGER:
1449 case panda_file::LiteralTag::ARRAY_I32:
1450 return "i32";
1451 case panda_file::LiteralTag::ARRAY_U64:
1452 return "u64";
1453 case panda_file::LiteralTag::ARRAY_I64:
1454 return "i64";
1455 case panda_file::LiteralTag::ARRAY_F32:
1456 return "f32";
1457 case panda_file::LiteralTag::DOUBLE:
1458 case panda_file::LiteralTag::ARRAY_F64:
1459 return "f64";
1460 case panda_file::LiteralTag::STRING:
1461 case panda_file::LiteralTag::ARRAY_STRING:
1462 return "string";
1463 case panda_file::LiteralTag::METHOD:
1464 return "method";
1465 case panda_file::LiteralTag::GETTER:
1466 return "getter";
1467 case panda_file::LiteralTag::SETTER:
1468 return "setter";
1469 case panda_file::LiteralTag::GENERATORMETHOD:
1470 return "generator_method";
1471 case panda_file::LiteralTag::ETS_IMPLEMENTS:
1472 return "ets_implements";
1473 case panda_file::LiteralTag::ACCESSOR:
1474 return "accessor";
1475 case panda_file::LiteralTag::METHODAFFILIATE:
1476 return "method_affiliate";
1477 case panda_file::LiteralTag::NULLVALUE:
1478 return "null_value";
1479 case panda_file::LiteralTag::TAGVALUE:
1480 return "tagvalue";
1481 case panda_file::LiteralTag::LITERALBUFFERINDEX:
1482 return "lit_index";
1483 case panda_file::LiteralTag::LITERALARRAY:
1484 return "lit_offset";
1485 case panda_file::LiteralTag::BUILTINTYPEINDEX:
1486 return "builtin_type";
1487 default:
1488 UNREACHABLE();
1489 }
1490 }
1491
1492 template <typename T>
SerializeValues(const pandasm::LiteralArray & lit_array,T & os) const1493 void Disassembler::SerializeValues(const pandasm::LiteralArray &lit_array, T &os) const
1494 {
1495 switch (lit_array.literals_[0].tag_) {
1496 case panda_file::LiteralTag::ARRAY_U1: {
1497 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1498 os << std::get<bool>(lit_array.literals_[i].value_) << " ";
1499 }
1500 break;
1501 }
1502 case panda_file::LiteralTag::ARRAY_U8: {
1503 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1504 os << static_cast<uint16_t>(std::get<uint8_t>(lit_array.literals_[i].value_)) << " ";
1505 }
1506 break;
1507 }
1508 case panda_file::LiteralTag::ARRAY_I8: {
1509 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1510 os << static_cast<int16_t>(bit_cast<int8_t>(std::get<uint8_t>(lit_array.literals_[i].value_))) << " ";
1511 }
1512 break;
1513 }
1514 case panda_file::LiteralTag::ARRAY_U16: {
1515 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1516 os << std::get<uint16_t>(lit_array.literals_[i].value_) << " ";
1517 }
1518 break;
1519 }
1520 case panda_file::LiteralTag::ARRAY_I16: {
1521 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1522 os << bit_cast<int16_t>(std::get<uint16_t>(lit_array.literals_[i].value_)) << " ";
1523 }
1524 break;
1525 }
1526 case panda_file::LiteralTag::ARRAY_U32: {
1527 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1528 os << std::get<uint32_t>(lit_array.literals_[i].value_) << " ";
1529 }
1530 break;
1531 }
1532 case panda_file::LiteralTag::ARRAY_I32: {
1533 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1534 os << bit_cast<int32_t>(std::get<uint32_t>(lit_array.literals_[i].value_)) << " ";
1535 }
1536 break;
1537 }
1538 case panda_file::LiteralTag::ARRAY_U64: {
1539 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1540 os << std::get<uint64_t>(lit_array.literals_[i].value_) << " ";
1541 }
1542 break;
1543 }
1544 case panda_file::LiteralTag::ARRAY_I64: {
1545 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1546 os << bit_cast<int64_t>(std::get<uint64_t>(lit_array.literals_[i].value_)) << " ";
1547 }
1548 break;
1549 }
1550 case panda_file::LiteralTag::ARRAY_F32: {
1551 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1552 os << std::get<float>(lit_array.literals_[i].value_) << " ";
1553 }
1554 break;
1555 }
1556 case panda_file::LiteralTag::ARRAY_F64: {
1557 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1558 os << std::get<double>(lit_array.literals_[i].value_) << " ";
1559 }
1560 break;
1561 }
1562 case panda_file::LiteralTag::ARRAY_STRING: {
1563 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1564 os << "\"" << std::get<std::string>(lit_array.literals_[i].value_) << "\" ";
1565 }
1566 break;
1567 }
1568 default:
1569 SerializeLiterals(lit_array, os);
1570 }
1571 }
1572
1573 template <typename T>
SerializeLiterals(const pandasm::LiteralArray & lit_array,T & os) const1574 void Disassembler::SerializeLiterals(const pandasm::LiteralArray &lit_array, T &os) const
1575 {
1576 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1577 const auto &tag = lit_array.literals_[i].tag_;
1578 os << LiteralTagToString(tag) << ":";
1579 const auto &val = lit_array.literals_[i].value_;
1580 switch (lit_array.literals_[i].tag_) {
1581 case panda_file::LiteralTag::BOOL: {
1582 os << std::get<bool>(val);
1583 break;
1584 }
1585 case panda_file::LiteralTag::LITERALBUFFERINDEX:
1586 case panda_file::LiteralTag::INTEGER: {
1587 os << bit_cast<int32_t>(std::get<uint32_t>(val));
1588 break;
1589 }
1590 case panda_file::LiteralTag::DOUBLE: {
1591 os << std::get<double>(val);
1592 break;
1593 }
1594 case panda_file::LiteralTag::STRING:
1595 case panda_file::LiteralTag::ETS_IMPLEMENTS: {
1596 os << "\"" << std::get<std::string>(val) << "\"";
1597 break;
1598 }
1599 case panda_file::LiteralTag::METHOD:
1600 case panda_file::LiteralTag::GETTER:
1601 case panda_file::LiteralTag::SETTER:
1602 case panda_file::LiteralTag::GENERATORMETHOD: {
1603 os << std::get<std::string>(val);
1604 break;
1605 }
1606 case panda_file::LiteralTag::NULLVALUE:
1607 case panda_file::LiteralTag::ACCESSOR: {
1608 os << static_cast<int16_t>(bit_cast<int8_t>(std::get<uint8_t>(val)));
1609 break;
1610 }
1611 case panda_file::LiteralTag::METHODAFFILIATE: {
1612 os << std::get<uint16_t>(val);
1613 break;
1614 }
1615 case panda_file::LiteralTag::LITERALARRAY: {
1616 os << std::get<std::string>(val);
1617 break;
1618 }
1619 case panda_file::LiteralTag::BUILTINTYPEINDEX: {
1620 os << static_cast<int16_t>(std::get<uint8_t>(val));
1621 break;
1622 }
1623 default:
1624 UNREACHABLE();
1625 }
1626 os << ", ";
1627 }
1628 }
1629
Serialize(const pandasm::Record & record,std::ostream & os,bool print_information) const1630 void Disassembler::Serialize(const pandasm::Record &record, std::ostream &os, bool print_information) const
1631 {
1632 if (IsSystemType(record.name)) {
1633 return;
1634 }
1635 os << ".language " << panda::panda_file::LanguageToString(record.language) << std::endl;
1636 os << ".record " << record.name;
1637
1638 const auto record_iter = prog_ann_.record_annotations.find(record.name);
1639 const bool record_in_table = record_iter != prog_ann_.record_annotations.end();
1640 if (record_in_table) {
1641 Serialize(*record.metadata, record_iter->second.ann_list, os);
1642 } else {
1643 Serialize(*record.metadata, {}, os);
1644 }
1645
1646 if (record.metadata->IsForeign()) {
1647 os << "\n\n";
1648 return;
1649 }
1650
1651 os << " {";
1652
1653 if (print_information && prog_info_.records_info.find(record.name) != prog_info_.records_info.end()) {
1654 os << " # " << prog_info_.records_info.at(record.name).record_info << "\n";
1655 SerializeFields(record, os, true);
1656 } else {
1657 os << "\n";
1658 SerializeFields(record, os, false);
1659 }
1660
1661 os << "}\n\n";
1662 }
1663
DumpLiteralArray(const pandasm::LiteralArray & literal_array,std::stringstream & ss) const1664 void Disassembler::DumpLiteralArray(const pandasm::LiteralArray &literal_array, std::stringstream &ss) const
1665 {
1666 ss << "[";
1667 bool firstItem = true;
1668 for (const auto &item : literal_array.literals_) {
1669 if (!firstItem) {
1670 ss << ", ";
1671 } else {
1672 firstItem = false;
1673 }
1674
1675 switch (item.tag_) {
1676 case panda_file::LiteralTag::DOUBLE: {
1677 ss << std::get<double>(item.value_);
1678 break;
1679 }
1680 case panda_file::LiteralTag::BOOL: {
1681 ss << std::get<bool>(item.value_);
1682 break;
1683 }
1684 case panda_file::LiteralTag::STRING: {
1685 ss << "\"" << std::get<std::string>(item.value_) << "\"";
1686 break;
1687 }
1688 case panda_file::LiteralTag::LITERALARRAY: {
1689 std::string offset_str = std::get<std::string>(item.value_);
1690 uint32_t lit_array_fffset = std::stoi(offset_str, nullptr, 16);
1691 pandasm::LiteralArray lit_array;
1692 GetLiteralArrayByOffset(&lit_array, panda_file::File::EntityId(lit_array_fffset));
1693 DumpLiteralArray(lit_array, ss);
1694 break;
1695 }
1696 case panda_file::LiteralTag::BUILTINTYPEINDEX: {
1697 // By convention, BUILTINTYPEINDEX is used to store type of empty arrays,
1698 // therefore it has no value
1699 break;
1700 }
1701 default: {
1702 UNREACHABLE();
1703 break;
1704 }
1705 }
1706 }
1707 ss << "]";
1708 }
1709
SerializeFieldValue(const pandasm::Field & f,std::stringstream & ss) const1710 void Disassembler::SerializeFieldValue(const pandasm::Field &f, std::stringstream &ss) const
1711 {
1712 if (f.type.GetId() == panda_file::Type::TypeId::U32) {
1713 ss << " = 0x" << std::hex << f.metadata->GetValue().value().GetValue<uint32_t>();
1714 } else if (f.type.GetId() == panda_file::Type::TypeId::U8) {
1715 ss << " = 0x" << std::hex << static_cast<uint32_t>(f.metadata->GetValue().value().GetValue<uint8_t>());
1716 } else if (f.type.GetId() == panda_file::Type::TypeId::F64) {
1717 ss << " = " << static_cast<double>(f.metadata->GetValue().value().GetValue<double>());
1718 } else if (f.type.GetId() == panda_file::Type::TypeId::U1) {
1719 ss << " = " << static_cast<bool>(f.metadata->GetValue().value().GetValue<bool>());
1720 } else if (f.type.GetId() == panda_file::Type::TypeId::REFERENCE && f.type.GetName() == "panda.String") {
1721 ss << " = \"" << static_cast<std::string>(f.metadata->GetValue().value().GetValue<std::string>()) << "\"";
1722 } else if (f.type.GetRank() > 0) {
1723 uint32_t lit_array_fffset =
1724 std::stoi(static_cast<std::string>(f.metadata->GetValue().value().GetValue<std::string>()));
1725 pandasm::LiteralArray lit_array;
1726 GetLiteralArrayByOffset(&lit_array, panda_file::File::EntityId(lit_array_fffset));
1727 ss << " = ";
1728 DumpLiteralArray(lit_array, ss);
1729 }
1730 }
1731
SerializeFields(const pandasm::Record & record,std::ostream & os,bool print_information) const1732 void Disassembler::SerializeFields(const pandasm::Record &record, std::ostream &os, bool print_information) const
1733 {
1734 constexpr size_t INFO_OFFSET = 80;
1735
1736 const auto record_iter = prog_ann_.record_annotations.find(record.name);
1737 const bool record_in_table = record_iter != prog_ann_.record_annotations.end();
1738
1739 const auto rec_inf = (print_information) ? (prog_info_.records_info.at(record.name)) : (RecordInfo {});
1740
1741 size_t field_idx = 0;
1742
1743 std::stringstream ss;
1744 for (const auto &f : record.field_list) {
1745 std::string file = GetFileNameByPath(f.name);
1746 ss << "\t" << f.type.GetPandasmName() << " " << file;
1747 if (f.metadata->GetValue().has_value()) {
1748 SerializeFieldValue(f, ss);
1749 }
1750 if (record_in_table) {
1751 const auto field_iter = record_iter->second.field_annotations.find(f.name);
1752 if (field_iter != record_iter->second.field_annotations.end()) {
1753 Serialize(*f.metadata, field_iter->second, ss);
1754 } else {
1755 Serialize(*f.metadata, {}, ss);
1756 }
1757 } else {
1758 Serialize(*f.metadata, {}, ss);
1759 }
1760
1761 if (print_information) {
1762 os << std::setw(INFO_OFFSET) << std::left << ss.str() << " # " << rec_inf.fields_info.at(field_idx) << "\n";
1763 } else {
1764 os << ss.str() << "\n";
1765 }
1766
1767 ss.str(std::string());
1768 ss.clear();
1769
1770 field_idx++;
1771 }
1772 }
1773
getLiteralArrayTypeFromValue(const pandasm::LiteralArray & literal_array) const1774 std::string Disassembler::getLiteralArrayTypeFromValue(const pandasm::LiteralArray &literal_array) const
1775 {
1776 [[maybe_unused]] auto size = literal_array.literals_.size();
1777 ASSERT(size > 0);
1778 switch (literal_array.literals_[0].tag_) {
1779 case panda_file::LiteralTag::DOUBLE: {
1780 return "f64[]";
1781 }
1782 case panda_file::LiteralTag::BOOL: {
1783 return "u1[]";
1784 }
1785 case panda_file::LiteralTag::STRING: {
1786 return "panda.String[]";
1787 }
1788 case panda_file::LiteralTag::LITERALARRAY: {
1789 std::string offset_str = std::get<std::string>(literal_array.literals_[0].value_);
1790 uint32_t lit_array_fffset = std::stoi(offset_str, nullptr, 16);
1791 pandasm::LiteralArray lit_array;
1792 GetLiteralArrayByOffset(&lit_array, panda_file::File::EntityId(lit_array_fffset));
1793 return getLiteralArrayTypeFromValue(lit_array) + "[]";
1794 }
1795 case panda_file::LiteralTag::BUILTINTYPEINDEX: {
1796 uint8_t typeIndex = std::get<uint8_t>(literal_array.literals_[0].value_);
1797 static constexpr uint8_t EMPTY_LITERAL_ARRAY_WITH_NUMBER_TYPE = 0;
1798 static constexpr uint8_t EMPTY_LITERAL_ARRAY_WITH_BOOLEAN_TYPE = 1;
1799 static constexpr uint8_t EMPTY_LITERAL_ARRAY_WITH_STRING_TYPE = 2;
1800 switch (typeIndex) {
1801 case EMPTY_LITERAL_ARRAY_WITH_NUMBER_TYPE:
1802 return "f64[]";
1803 case EMPTY_LITERAL_ARRAY_WITH_BOOLEAN_TYPE:
1804 return "u1[]";
1805 case EMPTY_LITERAL_ARRAY_WITH_STRING_TYPE:
1806 return "panda.String[]";
1807 default:
1808 UNREACHABLE();
1809 break;
1810 }
1811 }
1812 default: {
1813 UNREACHABLE();
1814 break;
1815 }
1816 }
1817 }
1818
SerializeAnnotationElement(const std::vector<pandasm::AnnotationElement> & elements,std::stringstream & ss,uint32_t idx) const1819 void Disassembler::SerializeAnnotationElement(const std::vector<pandasm::AnnotationElement> &elements,
1820 std::stringstream &ss, uint32_t idx) const
1821 {
1822 for (const auto &elem : elements) {
1823 auto type = elem.GetValue()->GetType();
1824 if (type == pandasm::Value::Type::U32) {
1825 ss << "\t"
1826 << "u32"
1827 << " " << elem.GetName() << " { ";
1828 ss << "0x" << std::hex << elem.GetValue()->GetAsScalar()->GetValue<uint32_t>() << " }";
1829 } else if (type == pandasm::Value::Type::F64) {
1830 ss << "\t"
1831 << "f64"
1832 << " " << elem.GetName() << " { ";
1833 ss << elem.GetValue()->GetAsScalar()->GetValue<double>() << " }";
1834 } else if (type == pandasm::Value::Type::U1) {
1835 ss << "\t"
1836 << "u1"
1837 << " " << elem.GetName() << " { ";
1838 ss << elem.GetValue()->GetAsScalar()->GetValue<bool>() << " }";
1839 } else if (type == pandasm::Value::Type::STRING) {
1840 ss << "\t"
1841 << "panda.String"
1842 << " " << elem.GetName() << " { \"";
1843 ss << elem.GetValue()->GetAsScalar()->GetValue<std::string>() << "\" }";
1844 } else if (type == pandasm::Value::Type::LITERALARRAY) {
1845 uint32_t lit_array_fffset = std::stoi(elem.GetValue()->GetAsScalar()->GetValue<std::string>());
1846 pandasm::LiteralArray lit_array;
1847 GetLiteralArrayByOffset(&lit_array, panda_file::File::EntityId(lit_array_fffset));
1848 std::string typeName = getLiteralArrayTypeFromValue(lit_array);
1849 ss << "\t" << typeName << " " << elem.GetName() << " { ";
1850 DumpLiteralArray(lit_array, ss);
1851 ss << " }";
1852 } else {
1853 UNREACHABLE();
1854 }
1855 if (idx > 0) {
1856 ss << "\n";
1857 }
1858 --idx;
1859 }
1860 }
1861
SerializeMethodAnnotation(const pandasm::AnnotationData & ann,std::ostream & os) const1862 void Disassembler::SerializeMethodAnnotation(const pandasm::AnnotationData &ann, std::ostream &os) const
1863 {
1864 os << ann.GetName() << ":\n";
1865 std::stringstream ss;
1866 std::vector<pandasm::AnnotationElement> elements = ann.GetElements();
1867 if (elements.empty()) {
1868 return;
1869 }
1870 uint32_t idx = elements.size() - 1;
1871 SerializeAnnotationElement(elements, ss, idx);
1872 os << ss.str() << "\n";
1873 }
1874
SerializeMethodAnnotations(const pandasm::Function & method,std::ostream & os) const1875 void Disassembler::SerializeMethodAnnotations(const pandasm::Function &method, std::ostream &os) const
1876 {
1877 const auto annotations = method.metadata->GetAnnotations();
1878 if (annotations.empty()) {
1879 return;
1880 }
1881
1882 for (const auto &ann : annotations) {
1883 SerializeMethodAnnotation(ann, os);
1884 }
1885 }
1886
SerializeInstructions(const pandasm::Function & method,std::ostream & os,const std::map<std::string,MethodInfo>::const_iterator & method_info_it,bool print_method_info) const1887 void Disassembler::SerializeInstructions(const pandasm::Function &method, std::ostream &os,
1888 const std::map<std::string, MethodInfo>::const_iterator &method_info_it,
1889 bool print_method_info) const
1890 {
1891 std::string delim = ": ";
1892 size_t width = 0;
1893 if (print_method_info) {
1894 for (const auto &i : method.ins) {
1895 size_t ins_size = i->ToString().size();
1896 if (i->IsLabel()) {
1897 ins_size = i->Label().size() - delim.length();
1898 }
1899
1900 if (ins_size > width && ins_size < ark::INSTRUCTION_WIDTH_LIMIT) {
1901 width = i->ToString().size();
1902 }
1903 }
1904 }
1905
1906 size_t noLabelIdx = 0;
1907 for (size_t i = 0; i < method.ins.size(); i++) {
1908 std::string ins = method.ins[i]->ToString("", true, method.regs_num);
1909 if (method.ins[i]->IsLabel()) {
1910 size_t pos = ins.find(delim);
1911 std::string label = ins.substr(0, pos);
1912 ins.erase(0, pos + delim.length());
1913 os << label << ":\n";
1914 }
1915
1916 if (ins != "") {
1917 os << "\t" << std::setw(width) << std::left << ins;
1918 if (print_method_info && noLabelIdx < method_info_it->second.instructions_info.size()) {
1919 os << " # " << method_info_it->second.instructions_info.at(noLabelIdx);
1920 }
1921 os << "\n";
1922 }
1923
1924 if (!method.ins[i]->IsLabel()) {
1925 noLabelIdx++;
1926 }
1927 }
1928 }
1929
Serialize(const pandasm::Function & method,std::ostream & os,bool print_information) const1930 void Disassembler::Serialize(const pandasm::Function &method, std::ostream &os, bool print_information) const
1931 {
1932 SerializeMethodAnnotations(method, os);
1933 os << ".language " << panda::panda_file::LanguageToString(method.language) << std::endl;
1934 os << ".function " << method.return_type.GetPandasmName() << " " << method.name << "(";
1935
1936 if (method.params.size() > 0) {
1937 os << method.params[0].type.GetPandasmName() << " a0";
1938
1939 for (uint8_t i = 1; i < method.params.size(); i++) {
1940 os << ", " << method.params[i].type.GetPandasmName() << " a" << (size_t)i;
1941 }
1942 }
1943 os << ")";
1944
1945 const std::string signature = pandasm::GetFunctionSignatureFromName(method.name, method.params);
1946
1947 const auto method_iter = prog_ann_.method_annotations.find(signature);
1948 if (method_iter != prog_ann_.method_annotations.end()) {
1949 Serialize(*method.metadata, method_iter->second, os);
1950 } else {
1951 Serialize(*method.metadata, {}, os);
1952 }
1953
1954 auto method_info_it = prog_info_.methods_info.find(signature);
1955 bool print_method_info = print_information && method_info_it != prog_info_.methods_info.end();
1956 if (print_method_info) {
1957 os << " { # " << method_info_it->second.method_info << "\n# CODE:\n";
1958 } else {
1959 os << " {\n";
1960 }
1961 SerializeInstructions(method, os, method_info_it, print_method_info);
1962
1963 if (method.catch_blocks.size() != 0) {
1964 os << "\n";
1965
1966 for (const auto &catch_block : method.catch_blocks) {
1967 Serialize(catch_block, os);
1968
1969 os << "\n";
1970 }
1971 }
1972
1973 if (print_method_info) {
1974 const MethodInfo &method_info = method_info_it->second;
1975 SerializeLineNumberTable(method_info.line_number_table, os);
1976 SerializeColumnNumberTable(method_info.column_number_table, os);
1977 SerializeLocalVariableTable(method_info.local_variable_table, method, os);
1978 }
1979
1980 os << "}\n\n";
1981 }
1982
SerializeStrings(const panda_file::File::EntityId & offset,const std::string & name_value,std::ostream & os) const1983 void Disassembler::SerializeStrings(const panda_file::File::EntityId &offset, const std::string &name_value,
1984 std::ostream &os) const
1985 {
1986 os << "[offset:0x" << std::hex << offset << ", name_value:" << name_value << "]" << std::endl;
1987 }
1988
Serialize(const pandasm::Function::CatchBlock & catch_block,std::ostream & os) const1989 void Disassembler::Serialize(const pandasm::Function::CatchBlock &catch_block, std::ostream &os) const
1990 {
1991 if (catch_block.exception_record == "") {
1992 os << ".catchall ";
1993 } else {
1994 os << ".catch " << catch_block.exception_record << ", ";
1995 }
1996
1997 os << catch_block.try_begin_label << ", " << catch_block.try_end_label << ", " << catch_block.catch_begin_label;
1998
1999 if (catch_block.catch_end_label != "") {
2000 os << ", " << catch_block.catch_end_label;
2001 }
2002 }
2003
Serialize(const pandasm::ItemMetadata & meta,const AnnotationList & ann_list,std::ostream & os) const2004 void Disassembler::Serialize(const pandasm::ItemMetadata &meta, const AnnotationList &ann_list, std::ostream &os) const
2005 {
2006 auto bool_attributes = meta.GetBoolAttributes();
2007 auto attributes = meta.GetAttributes();
2008 if (bool_attributes.empty() && attributes.empty() && ann_list.empty()) {
2009 return;
2010 }
2011
2012 os << " <";
2013
2014 size_t size = bool_attributes.size();
2015 size_t idx = 0;
2016 for (const auto &attr : bool_attributes) {
2017 os << attr;
2018 ++idx;
2019
2020 if (!attributes.empty() || !ann_list.empty() || idx < size) {
2021 os << ", ";
2022 }
2023 }
2024
2025 size = attributes.size();
2026 idx = 0;
2027 for (const auto &[key, values] : attributes) {
2028 for (size_t i = 0; i < values.size(); i++) {
2029 os << key << "=" << values[i];
2030
2031 if (i < values.size() - 1) {
2032 os << ", ";
2033 }
2034 }
2035
2036 ++idx;
2037
2038 if (!ann_list.empty() || idx < size) {
2039 os << ", ";
2040 }
2041 }
2042
2043 size = ann_list.size();
2044 idx = 0;
2045 for (const auto &[key, value] : ann_list) {
2046 os << key << "=" << value;
2047
2048 ++idx;
2049
2050 if (idx < size) {
2051 os << ", ";
2052 }
2053 }
2054
2055 os << ">";
2056 }
2057
SerializeLineNumberTable(const panda_file::LineNumberTable & line_number_table,std::ostream & os) const2058 void Disassembler::SerializeLineNumberTable(const panda_file::LineNumberTable &line_number_table,
2059 std::ostream &os) const
2060 {
2061 if (line_number_table.empty()) {
2062 return;
2063 }
2064
2065 os << "\n# LINE_NUMBER_TABLE:\n";
2066 for (const auto &line_info : line_number_table) {
2067 os << "#\tline " << line_info.line << ": " << line_info.offset << "\n";
2068 }
2069 }
2070
SerializeColumnNumberTable(const panda_file::ColumnNumberTable & column_number_table,std::ostream & os) const2071 void Disassembler::SerializeColumnNumberTable(const panda_file::ColumnNumberTable &column_number_table,
2072 std::ostream &os) const
2073 {
2074 if (column_number_table.empty()) {
2075 return;
2076 }
2077
2078 os << "\n# COLUMN_NUMBER_TABLE:\n";
2079 for (const auto &column_info : column_number_table) {
2080 os << "#\tcolumn " << column_info.column << ": " << column_info.offset << "\n";
2081 }
2082 }
2083
SerializeLocalVariableTable(const panda_file::LocalVariableTable & local_variable_table,const pandasm::Function & method,std::ostream & os) const2084 void Disassembler::SerializeLocalVariableTable(const panda_file::LocalVariableTable &local_variable_table,
2085 const pandasm::Function &method, std::ostream &os) const
2086 {
2087 if (local_variable_table.empty()) {
2088 return;
2089 }
2090
2091 os << "\n# LOCAL_VARIABLE_TABLE:\n";
2092 os << "#\t Start End Register Name Signature\n";
2093 const int START_WIDTH = 5;
2094 const int END_WIDTH = 4;
2095 const int REG_WIDTH = 8;
2096 const int NAME_WIDTH = 14;
2097 for (const auto &variable_info : local_variable_table) {
2098 std::ostringstream reg_stream;
2099 reg_stream << variable_info.reg_number << '(';
2100 if (variable_info.reg_number < 0) {
2101 reg_stream << "acc";
2102 } else {
2103 uint32_t vreg = variable_info.reg_number;
2104 uint32_t first_arg_reg = method.GetTotalRegs();
2105 if (vreg < first_arg_reg) {
2106 reg_stream << 'v' << vreg;
2107 } else {
2108 reg_stream << 'a' << vreg - first_arg_reg;
2109 }
2110 }
2111 reg_stream << ')';
2112
2113 os << "#\t " << std::setw(START_WIDTH) << std::right << variable_info.start_offset << " ";
2114 os << std::setw(END_WIDTH) << std::right << variable_info.end_offset << " ";
2115 os << std::setw(REG_WIDTH) << std::right << reg_stream.str() << " ";
2116 os << std::setw(NAME_WIDTH) << std::right << variable_info.name << " " << variable_info.type;
2117 if (!variable_info.type_signature.empty() && variable_info.type_signature != variable_info.type) {
2118 os << " (" << variable_info.type_signature << ")";
2119 }
2120 os << "\n";
2121 }
2122 }
2123
BytecodeOpcodeToPandasmOpcode(uint8_t o) const2124 pandasm::Opcode Disassembler::BytecodeOpcodeToPandasmOpcode(uint8_t o) const
2125 {
2126 return BytecodeOpcodeToPandasmOpcode(BytecodeInstruction::Opcode(o));
2127 }
2128
IDToString(BytecodeInstruction bc_ins,panda_file::File::EntityId method_id,size_t idx) const2129 std::string Disassembler::IDToString(BytecodeInstruction bc_ins, panda_file::File::EntityId method_id, size_t idx) const
2130 {
2131 std::stringstream name;
2132 const auto offset = file_->ResolveOffsetByIndex(method_id, bc_ins.GetId(idx).AsIndex());
2133 std::string str_data = StringDataToString(file_->GetStringData(offset));
2134 if (bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::METHOD_ID)) {
2135 name << GetMethodSignature(offset);
2136 } else if (bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::STRING_ID)) {
2137 name << '\"';
2138 name << str_data;
2139 name << '\"';
2140 string_offset_to_name_.emplace(offset, str_data);
2141 } else {
2142 ASSERT(bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::LITERALARRAY_ID));
2143 pandasm::LiteralArray lit_array;
2144 GetLiteralArrayByOffset(&lit_array, panda_file::File::EntityId(offset));
2145 name << SerializeLiteralArray(lit_array);
2146 }
2147
2148 return name.str();
2149 }
2150
GetRecordLanguage(panda_file::File::EntityId class_id) const2151 panda::panda_file::SourceLang Disassembler::GetRecordLanguage(panda_file::File::EntityId class_id) const
2152 {
2153 if (file_->IsExternal(class_id)) {
2154 // Keep the same behavior with abc2program
2155 return panda_file::DEFUALT_SOURCE_LANG;
2156 }
2157
2158 panda_file::ClassDataAccessor cda(*file_, class_id);
2159 return cda.GetSourceLang().value_or(panda_file::DEFUALT_SOURCE_LANG);
2160 }
2161
GetMethodLanguage(panda_file::File::EntityId method_id) const2162 panda::panda_file::SourceLang Disassembler::GetMethodLanguage(panda_file::File::EntityId method_id) const
2163 {
2164 if (file_->IsExternal(method_id)) {
2165 // Keep the same behavior with abc2program
2166 return panda_file::DEFUALT_SOURCE_LANG;
2167 }
2168
2169 panda_file::MethodDataAccessor method_accessor(*file_, method_id);
2170 return method_accessor.GetSourceLang().value_or(panda_file::DEFUALT_SOURCE_LANG);
2171 }
2172
translateImmToLabel(pandasm::Ins * pa_ins,LabelTable * label_table,const uint8_t * ins_arr,BytecodeInstruction bc_ins,BytecodeInstruction bc_ins_last,panda_file::File::EntityId code_id)2173 static void translateImmToLabel(pandasm::Ins *pa_ins, LabelTable *label_table, const uint8_t *ins_arr,
2174 BytecodeInstruction bc_ins, BytecodeInstruction bc_ins_last,
2175 panda_file::File::EntityId code_id)
2176 {
2177 const int32_t jmp_offset = std::stoi(pa_ins->Ids().at(0));
2178 const auto bc_ins_dest = bc_ins.JumpTo(jmp_offset);
2179 if (bc_ins_last.GetAddress() > bc_ins_dest.GetAddress()) {
2180 size_t idx = getBytecodeInstructionNumber(BytecodeInstruction(ins_arr), bc_ins_dest);
2181 if (idx != std::numeric_limits<size_t>::max()) {
2182 if (label_table->find(idx) == label_table->end()) {
2183 std::stringstream ss {};
2184 ss << "jump_label_" << label_table->size();
2185 (*label_table)[idx] = ss.str();
2186 }
2187
2188 pa_ins->SetId(0, label_table->at(idx));
2189 } else {
2190 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
2191 << "). incorrect instruction at offset: 0x" << (bc_ins.GetAddress() - ins_arr)
2192 << ": invalid jump offset 0x" << jmp_offset
2193 << " - jumping in the middle of another instruction!";
2194 }
2195 } else {
2196 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
2197 << "). incorrect instruction at offset: 0x" << (bc_ins.GetAddress() - ins_arr)
2198 << ": invalid jump offset 0x" << jmp_offset << " - jumping out of bounds!";
2199 }
2200 }
2201
AddLabels(pandasm::Function * func,LabelTable & label_table)2202 static void AddLabels(pandasm::Function *func, LabelTable &label_table)
2203 {
2204 std::vector<pandasm::InsPtr> new_ins;
2205 new_ins.reserve(func->ins.size() + label_table.size());
2206
2207 for (size_t i = 0; i < func->ins.size(); i++) {
2208 if (label_table.find(i) != label_table.end()) {
2209 new_ins.emplace_back(new pandasm::LabelIns(label_table[i]));
2210 }
2211 new_ins.emplace_back(std::move(func->ins[i]));
2212 }
2213
2214 // In some case, the end label can be after the last instruction
2215 // Creating an invalid instruction for the label to make sure it can be serialized
2216 if (label_table.find(func->ins.size()) != label_table.end()) {
2217 new_ins.emplace_back(new pandasm::LabelIns(""));
2218 }
2219
2220 func->ins.swap(new_ins);
2221 }
2222
GetInstructions(pandasm::Function * method,panda_file::File::EntityId method_id,panda_file::File::EntityId code_id) const2223 IdList Disassembler::GetInstructions(pandasm::Function *method, panda_file::File::EntityId method_id,
2224 panda_file::File::EntityId code_id) const
2225 {
2226 panda_file::CodeDataAccessor code_accessor(*file_, code_id);
2227
2228 const auto ins_sz = code_accessor.GetCodeSize();
2229 const auto ins_arr = code_accessor.GetInstructions();
2230
2231 method->regs_num = code_accessor.GetNumVregs();
2232
2233 auto bc_ins = BytecodeInstruction(ins_arr);
2234 const auto bc_ins_last = bc_ins.JumpTo(ins_sz);
2235
2236 LabelTable label_table = GetExceptions(method, method_id, code_id);
2237
2238 IdList unknown_external_methods {};
2239
2240 while (bc_ins.GetAddress() != bc_ins_last.GetAddress()) {
2241 if (bc_ins.GetAddress() > bc_ins_last.GetAddress()) {
2242 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
2243 << "). bytecode instructions sequence corrupted for method " << method->name
2244 << "! went out of bounds";
2245
2246 break;
2247 }
2248
2249 auto pa_ins = BytecodeInstructionToPandasmInstruction(bc_ins, method_id);
2250 if (pa_ins->IsJump()) {
2251 translateImmToLabel(pa_ins, &label_table, ins_arr, bc_ins, bc_ins_last, code_id);
2252 }
2253
2254 // check if method id is unknown external method. if so, emplace it in table
2255 if (bc_ins.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) {
2256 const auto arg_method_idx = bc_ins.GetId().AsIndex();
2257 const auto arg_method_id = file_->ResolveMethodIndex(method_id, arg_method_idx);
2258
2259 const auto arg_method_signature = GetMethodSignature(arg_method_id);
2260
2261 const bool is_present = prog_.function_table.find(arg_method_signature) != prog_.function_table.cend();
2262 const bool is_external = file_->IsExternal(arg_method_id);
2263 if (is_external && !is_present) {
2264 unknown_external_methods.push_back(arg_method_id);
2265 }
2266 }
2267
2268 method->ins.emplace_back(pa_ins);
2269 bc_ins = bc_ins.GetNext();
2270 }
2271
2272 size_t instruction_count = method->ins.size();
2273 for (const auto &pair : label_table) {
2274 if (pair.first > instruction_count) {
2275 LOG(ERROR, DISASSEMBLER) << "> Wrong label index got, count of instructions is " << instruction_count
2276 << ", but the label index is " << pair.first;
2277 }
2278 }
2279
2280 AddLabels(method, label_table);
2281
2282 return unknown_external_methods;
2283 }
2284
GetColumnNumber()2285 std::vector<size_t> Disassembler::GetColumnNumber()
2286 {
2287 std::vector<size_t> columnNumber;
2288 for (const auto &method_info : prog_info_.methods_info) {
2289 for (const auto &column_number : method_info.second.column_number_table) {
2290 columnNumber.push_back(column_number.column);
2291 }
2292 }
2293 return columnNumber;
2294 }
2295
GetLineNumber()2296 std::vector<size_t> Disassembler::GetLineNumber()
2297 {
2298 std::vector<size_t> lineNumber;
2299 for (const auto &method_info : prog_info_.methods_info) {
2300 for (const auto &line_number : method_info.second.line_number_table) {
2301 lineNumber.push_back(line_number.line);
2302 }
2303 }
2304 return lineNumber;
2305 }
2306
2307 } // namespace panda::disasm
2308