1 /*
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "disassembler.h"
17 #include "libpandafile/util/collect_util.h"
18 #include "mangling.h"
19 #include "utils/logger.h"
20 #include "utils/const_value.h"
21
22 #include <iomanip>
23 #include <type_traits>
24
25 #include "get_language_specific_metadata.inc"
26
27 namespace panda::disasm {
28
Disassemble(const std::string & filename_in,const bool quiet,const bool skip_strings)29 void Disassembler::Disassemble(const std::string &filename_in, const bool quiet, const bool skip_strings)
30 {
31 auto file_new = panda_file::File::Open(filename_in);
32 file_.swap(file_new);
33
34 if (file_ != nullptr) {
35 prog_ = pandasm::Program {};
36
37 record_name_to_id_.clear();
38 method_name_to_id_.clear();
39 string_offset_to_name_.clear();
40 skip_strings_ = skip_strings;
41 quiet_ = quiet;
42
43 prog_info_ = ProgInfo {};
44
45 prog_ann_ = ProgAnnotations {};
46
47 GetRecords();
48 GetLiteralArrays();
49
50 GetLanguageSpecificMetadata();
51 } else {
52 LOG(ERROR, DISASSEMBLER) << "> unable to open specified pandafile: <" << filename_in << ">";
53 }
54 }
55
CollectInfo()56 void Disassembler::CollectInfo()
57 {
58 LOG(DEBUG, DISASSEMBLER) << "\n[getting program info]\n";
59
60 debug_info_extractor_ = std::make_unique<panda_file::DebugInfoExtractor>(file_.get());
61
62 for (const auto &pair : record_name_to_id_) {
63 GetRecordInfo(pair.second, &prog_info_.records_info[pair.first]);
64 }
65
66 for (const auto &pair : method_name_to_id_) {
67 GetMethodInfo(pair.second, &prog_info_.methods_info[pair.first]);
68 }
69 }
70
Serialize(std::ostream & os,bool add_separators,bool print_information) const71 void Disassembler::Serialize(std::ostream &os, bool add_separators, bool print_information) const
72 {
73 if (os.bad()) {
74 LOG(DEBUG, DISASSEMBLER) << "> serialization failed. os bad\n";
75 return;
76 }
77
78 if (file_ != nullptr) {
79 std::string abc_file = GetFileNameByPath(file_->GetFilename());
80 os << "# source binary: " << abc_file << "\n\n";
81 }
82
83 if (add_separators) {
84 os << "# ====================\n"
85 "# LITERALS\n\n";
86 }
87
88 LOG(DEBUG, DISASSEMBLER) << "[serializing literals]";
89
90 for (const auto &[key, lit_arr] : prog_.literalarray_table) {
91 Serialize(key, lit_arr, os);
92 }
93
94 for (const auto &[module_offset, array_table] : modulearray_table_) {
95 Serialize(module_offset, array_table, os);
96 }
97
98 os << "\n";
99
100 if (add_separators) {
101 os << "# ====================\n"
102 "# RECORDS\n\n";
103 }
104
105 LOG(DEBUG, DISASSEMBLER) << "[serializing records]";
106
107 for (const auto &r : prog_.record_table) {
108 Serialize(r.second, os, print_information);
109 }
110
111 if (add_separators) {
112 os << "# ====================\n"
113 "# METHODS\n\n";
114 }
115
116 LOG(DEBUG, DISASSEMBLER) << "[serializing methods]";
117
118 for (const auto &m : prog_.function_table) {
119 Serialize(m.second, os, print_information);
120 }
121
122 if (add_separators) {
123 os << "# ====================\n"
124 "# STRING\n\n";
125 }
126
127 LOG(DEBUG, DISASSEMBLER) << "[serializing strings]";
128
129 for (const auto &[offset, name_value] : string_offset_to_name_) {
130 SerializeStrings(offset, name_value, os);
131 }
132 }
133
IsSystemType(const std::string & type_name)134 inline bool Disassembler::IsSystemType(const std::string &type_name)
135 {
136 bool is_array_type = type_name.find('[') != std::string::npos;
137 bool is_global = type_name == "_GLOBAL";
138
139 return is_array_type || is_global;
140 }
141
GetRecord(pandasm::Record * record,const panda_file::File::EntityId & record_id)142 void Disassembler::GetRecord(pandasm::Record *record, const panda_file::File::EntityId &record_id)
143 {
144 LOG(DEBUG, DISASSEMBLER) << "\n[getting record]\nid: " << record_id << " (0x" << std::hex << record_id << ")";
145
146 if (record == nullptr) {
147 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but record ptr expected!";
148
149 return;
150 }
151
152 record->name = GetFullRecordName(record_id);
153
154 LOG(DEBUG, DISASSEMBLER) << "name: " << record->name;
155
156 GetMetaData(record, record_id);
157
158 if (!file_->IsExternal(record_id)) {
159 GetMethods(record_id);
160 GetFields(record, record_id);
161 }
162 }
163
AddMethodToTables(const panda_file::File::EntityId & method_id)164 void Disassembler::AddMethodToTables(const panda_file::File::EntityId &method_id)
165 {
166 pandasm::Function new_method("", GetMethodLanguage(method_id));
167 GetMethod(&new_method, method_id);
168
169 const auto signature = pandasm::GetFunctionSignatureFromName(new_method.name, new_method.params);
170 if (prog_.function_table.find(signature) != prog_.function_table.end()) {
171 return;
172 }
173
174 GetMethodAnnotations(new_method, method_id);
175 method_name_to_id_.emplace(signature, method_id);
176 prog_.function_synonyms[new_method.name].push_back(signature);
177 prog_.function_table.emplace(signature, std::move(new_method));
178 }
179
GetMethod(pandasm::Function * method,const panda_file::File::EntityId & method_id)180 void Disassembler::GetMethod(pandasm::Function *method, const panda_file::File::EntityId &method_id)
181 {
182 LOG(DEBUG, DISASSEMBLER) << "\n[getting method]\nid: " << method_id << " (0x" << std::hex << method_id << ")";
183
184 if (method == nullptr) {
185 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
186
187 return;
188 }
189
190 panda_file::MethodDataAccessor method_accessor(*file_, method_id);
191
192 method->name = GetFullMethodName(method_id);
193
194 LOG(DEBUG, DISASSEMBLER) << "name: " << method->name;
195
196 GetMetaData(method, method_id);
197
198 if (method_accessor.GetCodeId().has_value()) {
199 auto code_id = method_accessor.GetCodeId().value();
200 GetParams(method, code_id);
201 const IdList id_list = GetInstructions(method, method_id, code_id);
202
203 for (const auto &id : id_list) {
204 AddMethodToTables(id);
205 }
206 } else {
207 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << method_id << " (0x" << std::hex << method_id
208 << "). implementation of method expected, but no \'CODE\' tag was found!";
209
210 return;
211 }
212 }
213
214 template <typename T>
FillLiteralArrayData(pandasm::LiteralArray * lit_array,const panda_file::LiteralTag & tag,const panda_file::LiteralDataAccessor::LiteralValue & value) const215 void Disassembler::FillLiteralArrayData(pandasm::LiteralArray *lit_array, const panda_file::LiteralTag &tag,
216 const panda_file::LiteralDataAccessor::LiteralValue &value) const
217 {
218 panda_file::File::EntityId id(std::get<uint32_t>(value));
219 auto sp = file_->GetSpanFromId(id);
220 auto len = panda_file::helpers::Read<sizeof(uint32_t)>(&sp);
221 if (tag != panda_file::LiteralTag::ARRAY_STRING) {
222 for (size_t i = 0; i < len; i++) {
223 pandasm::LiteralArray::Literal lit;
224 lit.tag_ = tag;
225 lit.value_ = bit_cast<T>(panda_file::helpers::Read<sizeof(T)>(&sp));
226 lit_array->literals_.push_back(lit);
227 }
228 return;
229 }
230 for (size_t i = 0; i < len; i++) {
231 auto str_id = panda_file::helpers::Read<sizeof(T)>(&sp);
232 pandasm::LiteralArray::Literal lit;
233 lit.tag_ = tag;
234 lit.value_ = StringDataToString(file_->GetStringData(panda_file::File::EntityId(str_id)));
235 lit_array->literals_.push_back(lit);
236 }
237 }
238
FillLiteralData(pandasm::LiteralArray * lit_array,const panda_file::LiteralDataAccessor::LiteralValue & value,const panda_file::LiteralTag & tag) const239 void Disassembler::FillLiteralData(pandasm::LiteralArray *lit_array,
240 const panda_file::LiteralDataAccessor::LiteralValue &value,
241 const panda_file::LiteralTag &tag) const
242 {
243 pandasm::LiteralArray::Literal lit;
244 lit.tag_ = tag;
245 switch (tag) {
246 case panda_file::LiteralTag::BOOL: {
247 lit.value_ = std::get<bool>(value);
248 break;
249 }
250 case panda_file::LiteralTag::ACCESSOR:
251 case panda_file::LiteralTag::NULLVALUE:
252 case panda_file::LiteralTag::BUILTINTYPEINDEX: {
253 lit.value_ = std::get<uint8_t>(value);
254 break;
255 }
256 case panda_file::LiteralTag::METHODAFFILIATE: {
257 lit.value_ = std::get<uint16_t>(value);
258 break;
259 }
260 case panda_file::LiteralTag::LITERALBUFFERINDEX:
261 case panda_file::LiteralTag::INTEGER: {
262 lit.value_ = std::get<uint32_t>(value);
263 break;
264 }
265 case panda_file::LiteralTag::DOUBLE: {
266 lit.value_ = std::get<double>(value);
267 break;
268 }
269 case panda_file::LiteralTag::STRING: {
270 auto str_data = file_->GetStringData(panda_file::File::EntityId(std::get<uint32_t>(value)));
271 lit.value_ = StringDataToString(str_data);
272 break;
273 }
274 case panda_file::LiteralTag::METHOD:
275 case panda_file::LiteralTag::GETTER:
276 case panda_file::LiteralTag::SETTER:
277 case panda_file::LiteralTag::GENERATORMETHOD: {
278 panda_file::MethodDataAccessor mda(*file_, panda_file::File::EntityId(std::get<uint32_t>(value)));
279 lit.value_ = StringDataToString(file_->GetStringData(mda.GetNameId()));
280 break;
281 }
282 case panda_file::LiteralTag::LITERALARRAY: {
283 std::stringstream ss;
284 ss << "0x" << std::hex << std::get<uint32_t>(value);
285 lit.value_ = ss.str();
286 break;
287 }
288 case panda_file::LiteralTag::TAGVALUE: {
289 return;
290 }
291 default: {
292 UNREACHABLE();
293 }
294 }
295 lit_array->literals_.push_back(lit);
296 }
297
GetLiteralArrayByOffset(pandasm::LiteralArray * lit_array,panda_file::File::EntityId offset) const298 void Disassembler::GetLiteralArrayByOffset(pandasm::LiteralArray *lit_array, panda_file::File::EntityId offset) const
299 {
300 panda_file::LiteralDataAccessor lit_array_accessor(*file_, file_->GetLiteralArraysId());
301 lit_array_accessor.EnumerateLiteralVals(
302 offset, [this, lit_array](const panda_file::LiteralDataAccessor::LiteralValue &value,
303 const panda_file::LiteralTag &tag) {
304 switch (tag) {
305 case panda_file::LiteralTag::ARRAY_U1: {
306 FillLiteralArrayData<bool>(lit_array, tag, value);
307 break;
308 }
309 case panda_file::LiteralTag::ARRAY_I8:
310 case panda_file::LiteralTag::ARRAY_U8: {
311 FillLiteralArrayData<uint8_t>(lit_array, tag, value);
312 break;
313 }
314 case panda_file::LiteralTag::ARRAY_I16:
315 case panda_file::LiteralTag::ARRAY_U16: {
316 FillLiteralArrayData<uint16_t>(lit_array, tag, value);
317 break;
318 }
319 case panda_file::LiteralTag::ARRAY_I32:
320 case panda_file::LiteralTag::ARRAY_U32: {
321 FillLiteralArrayData<uint32_t>(lit_array, tag, value);
322 break;
323 }
324 case panda_file::LiteralTag::ARRAY_I64:
325 case panda_file::LiteralTag::ARRAY_U64: {
326 FillLiteralArrayData<uint64_t>(lit_array, tag, value);
327 break;
328 }
329 case panda_file::LiteralTag::ARRAY_F32: {
330 FillLiteralArrayData<float>(lit_array, tag, value);
331 break;
332 }
333 case panda_file::LiteralTag::ARRAY_F64: {
334 FillLiteralArrayData<double>(lit_array, tag, value);
335 break;
336 }
337 case panda_file::LiteralTag::ARRAY_STRING: {
338 FillLiteralArrayData<uint32_t>(lit_array, tag, value);
339 break;
340 }
341 default: {
342 FillLiteralData(lit_array, value, tag);
343 break;
344 }
345 }
346 });
347 }
348
GetLiteralArray(pandasm::LiteralArray * lit_array,size_t index) const349 void Disassembler::GetLiteralArray(pandasm::LiteralArray *lit_array, size_t index) const
350 {
351 panda_file::LiteralDataAccessor lit_array_accessor(*file_, file_->GetLiteralArraysId());
352 GetLiteralArrayByOffset(lit_array, lit_array_accessor.GetLiteralArrayId(index));
353 }
354
IsModuleLiteralOffset(const panda_file::File::EntityId & id) const355 bool Disassembler::IsModuleLiteralOffset(const panda_file::File::EntityId &id) const
356 {
357 return module_literals_.find(id.GetOffset()) != module_literals_.end();
358 }
359
GetLiteralArrays()360 void Disassembler::GetLiteralArrays()
361 {
362 if (panda_file::ContainsLiteralArrayInHeader(file_->GetHeader()->version)) {
363 const auto lit_arrays_id = file_->GetLiteralArraysId();
364 LOG(DEBUG, DISASSEMBLER) << "\n[getting literal arrays]\nid: " << lit_arrays_id << " (0x" << std::hex
365 << lit_arrays_id << ")";
366
367 panda_file::LiteralDataAccessor lda(*file_, lit_arrays_id);
368 size_t num_litarrays = lda.GetLiteralNum();
369 for (size_t index = 0; index < num_litarrays; index++) {
370 auto id = lda.GetLiteralArrayId(index);
371 if (module_request_phase_literals_.count(id.GetOffset())) {
372 continue;
373 }
374 FillLiteralArrayTable(id, index);
375 }
376 } else {
377 panda::libpandafile::CollectUtil collect_util;
378 std::unordered_set<uint32_t> literal_array_ids;
379 collect_util.CollectLiteralArray(*file_, literal_array_ids);
380 size_t index = 0;
381 for (uint32_t literal_array_id : literal_array_ids) {
382 panda_file::File::EntityId id {literal_array_id};
383 FillLiteralArrayTable(id, index);
384 index++;
385 }
386 }
387 }
388
FillLiteralArrayTable(panda_file::File::EntityId & id,size_t index)389 void Disassembler::FillLiteralArrayTable(panda_file::File::EntityId &id, size_t index)
390 {
391 if (IsModuleLiteralOffset(id)) {
392 std::stringstream ss;
393 ss << index << " 0x" << std::hex << id.GetOffset();
394 modulearray_table_.emplace(ss.str(), GetModuleLiteralArray(id));
395 return;
396 }
397 std::stringstream ss;
398 ss << index << " 0x" << std::hex << id.GetOffset();
399 panda::pandasm::LiteralArray lit_arr;
400 GetLiteralArrayByOffset(&lit_arr, id);
401 prog_.literalarray_table.emplace(ss.str(), lit_arr);
402 }
403
ModuleTagToString(panda_file::ModuleTag & tag) const404 std::string Disassembler::ModuleTagToString(panda_file::ModuleTag &tag) const
405 {
406 switch (tag) {
407 case panda_file::ModuleTag::REGULAR_IMPORT:
408 return "REGULAR_IMPORT";
409 case panda_file::ModuleTag::NAMESPACE_IMPORT:
410 return "NAMESPACE_IMPORT";
411 case panda_file::ModuleTag::LOCAL_EXPORT:
412 return "LOCAL_EXPORT";
413 case panda_file::ModuleTag::INDIRECT_EXPORT:
414 return "INDIRECT_EXPORT";
415 case panda_file::ModuleTag::STAR_EXPORT:
416 return "STAR_EXPORT";
417 default: {
418 UNREACHABLE();
419 break;
420 }
421 }
422 return "";
423 }
424
GetModuleLiteralArray(panda_file::File::EntityId & module_id) const425 std::vector<std::string> Disassembler::GetModuleLiteralArray(panda_file::File::EntityId &module_id) const
426 {
427 panda_file::ModuleDataAccessor mda(*file_, module_id);
428 const std::vector<uint32_t> &request_modules_offset = mda.getRequestModules();
429 std::vector<std::string> module_literal_array;
430 std::stringstream module_requests_stringstream;
431 module_requests_stringstream << "\tMODULE_REQUEST_ARRAY: {\n";
432 for (size_t index = 0; index < request_modules_offset.size(); ++index) {
433 module_requests_stringstream << "\t\t" << index <<
434 " : " << GetStringByOffset(request_modules_offset[index]) << ",\n";
435 }
436 module_requests_stringstream << "\t}";
437 module_literal_array.push_back(module_requests_stringstream.str());
438 mda.EnumerateModuleRecord([&](panda_file::ModuleTag tag, uint32_t export_name_offset,
439 uint32_t request_module_idx, uint32_t import_name_offset,
440 uint32_t local_name_offset) {
441 std::stringstream ss;
442 ss << "\tModuleTag: " << ModuleTagToString(tag);
443 if (tag == panda_file::ModuleTag::REGULAR_IMPORT ||
444 tag == panda_file::ModuleTag::NAMESPACE_IMPORT || tag == panda_file::ModuleTag::LOCAL_EXPORT) {
445 if (!IsValidOffset(local_name_offset)) {
446 LOG(FATAL, DISASSEMBLER) << "Get invalid local name offset!" << std::endl;
447 }
448 ss << ", local_name: " << GetStringByOffset(local_name_offset);
449 }
450 if (tag == panda_file::ModuleTag::LOCAL_EXPORT || tag == panda_file::ModuleTag::INDIRECT_EXPORT) {
451 if (!IsValidOffset(export_name_offset)) {
452 LOG(FATAL, DISASSEMBLER) << "Get invalid export name offset!" << std::endl;
453 }
454 ss << ", export_name: " << GetStringByOffset(export_name_offset);
455 }
456 if (tag == panda_file::ModuleTag::REGULAR_IMPORT || tag == panda_file::ModuleTag::INDIRECT_EXPORT) {
457 if (!IsValidOffset(import_name_offset)) {
458 LOG(FATAL, DISASSEMBLER) << "Get invalid import name offset!" << std::endl;
459 }
460 ss << ", import_name: " << GetStringByOffset(import_name_offset);
461 }
462 auto request_module_offset = request_modules_offset[request_module_idx];
463 if (tag != panda_file::ModuleTag::LOCAL_EXPORT) {
464 if (request_module_idx >= request_modules_offset.size() || !IsValidOffset(request_module_offset)) {
465 LOG(FATAL, DISASSEMBLER) << "Get invalid request module offset!" << std::endl;
466 }
467 ss << ", module_request: " << GetStringByOffset(request_module_offset);
468 }
469 module_literal_array.push_back(ss.str());
470 });
471
472 return module_literal_array;
473 }
474
GetRecords()475 void Disassembler::GetRecords()
476 {
477 LOG(DEBUG, DISASSEMBLER) << "\n[getting records]\n";
478
479 const auto class_idx = file_->GetClasses();
480
481 for (size_t i = 0; i < class_idx.size(); i++) {
482 uint32_t class_id = class_idx[i];
483 auto class_off = file_->GetHeader()->class_idx_off + sizeof(uint32_t) * i;
484
485 if (class_id > file_->GetHeader()->file_size) {
486 LOG(ERROR, DISASSEMBLER) << "> error encountered in record at " << class_off << " (0x" << std::hex
487 << class_off << "). binary file corrupted. record offset (0x" << class_id
488 << ") out of bounds (0x" << file_->GetHeader()->file_size << ")!";
489 break;
490 }
491
492 const panda_file::File::EntityId record_id {class_id};
493 auto language = GetRecordLanguage(record_id);
494
495 pandasm::Record record("", language);
496 GetRecord(&record, record_id);
497
498 if (prog_.record_table.find(record.name) == prog_.record_table.end()) {
499 record_name_to_id_.emplace(record.name, record_id);
500 prog_.record_table.emplace(record.name, std::move(record));
501 }
502 }
503 }
504
GetFields(pandasm::Record * record,const panda_file::File::EntityId & record_id)505 void Disassembler::GetFields(pandasm::Record *record, const panda_file::File::EntityId &record_id)
506 {
507 panda_file::ClassDataAccessor class_accessor {*file_, record_id};
508
509 class_accessor.EnumerateFields([&](panda_file::FieldDataAccessor &field_accessor) -> void {
510 pandasm::Field field(record->language);
511
512 panda_file::File::EntityId field_name_id = field_accessor.GetNameId();
513 field.name = StringDataToString(file_->GetStringData(field_name_id));
514
515 uint32_t field_type = field_accessor.GetType();
516 field.type = FieldTypeToPandasmType(field_type);
517
518 GetMetaData(&field, field_accessor.GetFieldId(), record->name == ark::SCOPE_NAME_RECORD);
519
520 record->field_list.push_back(std::move(field));
521 });
522 }
523
GetMethods(const panda_file::File::EntityId & record_id)524 void Disassembler::GetMethods(const panda_file::File::EntityId &record_id)
525 {
526 panda_file::ClassDataAccessor class_accessor {*file_, record_id};
527
528 class_accessor.EnumerateMethods([&](panda_file::MethodDataAccessor &method_accessor) -> void {
529 AddMethodToTables(method_accessor.GetMethodId());
530 });
531 }
532
GetAnnotationElements(pandasm::Function & method,const panda_file::AnnotationDataAccessor & ada,const std::string & annotation_name)533 void Disassembler::GetAnnotationElements(pandasm::Function &method, const panda_file::AnnotationDataAccessor &ada,
534 const std::string &annotation_name)
535 {
536 uint32_t elem_count = ada.GetCount();
537 for (uint32_t i = 0; i < elem_count; i++) {
538 panda_file::AnnotationDataAccessor::Elem adae = ada.GetElement(i);
539 const auto &elem_name =
540 std::string {reinterpret_cast<const char *>(file_->GetStringData(adae.GetNameId()).data)};
541 panda_file::AnnotationDataAccessor::Tag tag = ada.GetTag(i);
542 auto value_type = pandasm::Value::GetCharAsType(tag.GetItem());
543 switch (value_type) {
544 case pandasm::Value::Type::U1: {
545 bool ann_elem_value = adae.GetScalarValue().Get<bool>();
546 AddAnnotationElement<bool>(method, annotation_name, elem_name, ann_elem_value);
547 break;
548 }
549 case pandasm::Value::Type::U32: {
550 uint32_t ann_elem_value = adae.GetScalarValue().Get<uint32_t>();
551 AddAnnotationElement<uint32_t>(method, annotation_name, elem_name, ann_elem_value);
552 break;
553 }
554 case pandasm::Value::Type::F64: {
555 double ann_elem_value = adae.GetScalarValue().Get<double>();
556 AddAnnotationElement<double>(method, annotation_name, elem_name, ann_elem_value);
557 break;
558 }
559 case pandasm::Value::Type::STRING: {
560 uint32_t string_id = adae.GetScalarValue().Get<uint32_t>();
561 std::string_view ann_elem_value {
562 reinterpret_cast<const char *>(file_->GetStringData(panda_file::File::EntityId(string_id)).data)};
563 AddAnnotationElement<std::string_view>(method, annotation_name, elem_name, ann_elem_value);
564 break;
565 }
566 case pandasm::Value::Type::LITERALARRAY: {
567 uint32_t literalArray_offset = adae.GetScalarValue().Get<uint32_t>();
568 AddAnnotationElement<panda::pandasm::LiteralArray, std::string_view>(
569 method, annotation_name, elem_name, std::string_view {std::to_string(literalArray_offset)});
570 break;
571 }
572 default:
573 UNREACHABLE();
574 }
575 }
576 }
577
GetMethodAnnotations(pandasm::Function & method,const panda_file::File::EntityId & method_id)578 void Disassembler::GetMethodAnnotations(pandasm::Function &method, const panda_file::File::EntityId &method_id)
579 {
580 panda_file::MethodDataAccessor mda(*file_, method_id);
581 mda.EnumerateAnnotations([&](panda_file::File::EntityId annotation_id) {
582 panda_file::AnnotationDataAccessor ada(*file_, annotation_id);
583 auto annotation_name =
584 std::string {reinterpret_cast<const char *>(file_->GetStringData(ada.GetClassId()).data)};
585 annotation_name.pop_back(); // remove ; from annotation name
586
587 if (annotation_name.empty()) {
588 return;
589 }
590
591 std::vector<pandasm::AnnotationData> method_annotation = method.metadata->GetAnnotations();
592 std::vector<pandasm::AnnotationElement> elements;
593 pandasm::AnnotationData ann_data(annotation_name, elements);
594 std::vector<pandasm::AnnotationData> annotations;
595 annotations.push_back(std::move(ann_data));
596 method.metadata->AddAnnotations(annotations);
597
598 GetAnnotationElements(method, ada, annotation_name);
599 });
600 }
601
602 template <typename T, typename U = T>
AddAnnotationElement(pandasm::Function & method,const std::string & annotation_name,const std::string & key,const U & value)603 void Disassembler::AddAnnotationElement(pandasm::Function &method, const std::string &annotation_name,
604 const std::string &key, const U &value)
605 {
606 if (key.empty()) {
607 return;
608 }
609
610 std::unique_ptr<pandasm::Value> pandasmValue;
611 if constexpr (std::is_same<T, uint32_t>::value) {
612 pandasmValue = std::move(
613 std::make_unique<pandasm::ScalarValue>(pandasm::ScalarValue::Create<pandasm::Value::Type::U32>(value)));
614 } else if constexpr (std::is_same<T, double>::value) {
615 pandasmValue = std::move(
616 std::make_unique<pandasm::ScalarValue>(pandasm::ScalarValue::Create<pandasm::Value::Type::F64>(value)));
617 } else if constexpr (std::is_same<T, bool>::value) {
618 pandasmValue = std::move(
619 std::make_unique<pandasm::ScalarValue>(pandasm::ScalarValue::Create<pandasm::Value::Type::U1>(value)));
620 } else if constexpr (std::is_same<T, std::string_view>::value) {
621 pandasmValue = std::move(
622 std::make_unique<pandasm::ScalarValue>(pandasm::ScalarValue::Create<pandasm::Value::Type::STRING>(value)));
623 } else if constexpr (std::is_same<T, panda::pandasm::LiteralArray>::value) {
624 static_assert(std::is_same<U, std::string_view>::value);
625 pandasmValue = std::move(std::make_unique<pandasm::ScalarValue>(
626 pandasm::ScalarValue::Create<pandasm::Value::Type::LITERALARRAY>(value)));
627 } else {
628 UNREACHABLE();
629 }
630
631 std::vector<pandasm::AnnotationData> method_annotation = method.metadata->GetAnnotations();
632 const auto ann_iter =
633 std::find_if(method_annotation.begin(), method_annotation.end(),
634 [&](pandasm::AnnotationData &ann) -> bool { return ann.GetName() == annotation_name; });
635
636 pandasm::AnnotationElement annotation_element(key, std::move(pandasmValue));
637 ann_iter->AddElement(std::move(annotation_element));
638 method.metadata->SetAnnotations(std::move(method_annotation));
639 }
640
GetAnnotationByMethodName(const std::string & method_name) const641 std::optional<std::vector<std::string>> Disassembler::GetAnnotationByMethodName(const std::string &method_name) const
642 {
643 const auto method_synonyms_iter = prog_.function_synonyms.find(method_name);
644 bool is_signature = method_synonyms_iter != prog_.function_synonyms.end();
645 if (!is_signature) {
646 return std::nullopt;
647 }
648
649 const auto method_iter = prog_.function_table.find(method_synonyms_iter->second.back());
650 bool is_method = method_iter != prog_.function_table.end();
651 const auto annotations = method_iter->second.metadata->GetAnnotations();
652 if (!is_method || annotations.empty()) {
653 return std::nullopt;
654 }
655
656 std::vector<std::string> ann;
657 for (const auto &ann_data : annotations) {
658 ann.emplace_back(ann_data.GetName());
659 }
660 return ann;
661 }
662
GetSerializedMethodAnnotation(const std::string & method_name,const std::string & anno_name) const663 std::optional<std::string> Disassembler::GetSerializedMethodAnnotation(const std::string &method_name,
664 const std::string &anno_name) const
665 {
666 const auto method_synonyms_iter = prog_.function_synonyms.find(method_name);
667 if (method_synonyms_iter == prog_.function_synonyms.end()) {
668 return std::nullopt;
669 }
670
671 const auto method_iter = prog_.function_table.find(method_synonyms_iter->second.back());
672 if (method_iter == prog_.function_table.end()) {
673 return std::nullopt;
674 }
675
676 const auto annotations = method_iter->second.metadata->GetAnnotations();
677 if (annotations.empty()) {
678 return std::nullopt;
679 }
680
681 const auto annotation_iter =
682 std::find_if(annotations.begin(), annotations.end(),
683 [&](const pandasm::AnnotationData &ann) -> bool { return ann.GetName() == anno_name; });
684 if (annotation_iter == annotations.end()) {
685 return std::nullopt;
686 }
687
688 std::ostringstream os;
689 SerializeMethodAnnotation(*annotation_iter, os);
690 return os.str();
691 }
692
GetSerializedRecord(const std::string & record_name) const693 std::optional<std::string> Disassembler::GetSerializedRecord(const std::string &record_name) const
694 {
695 const auto record_iter = prog_.record_table.find(record_name);
696 if (record_iter == prog_.record_table.end()) {
697 return std::nullopt;
698 }
699 std::ostringstream os;
700 Serialize(record_iter->second, os, false);
701 return os.str();
702 }
703
GetStrings() const704 std::vector<std::string> Disassembler::GetStrings() const
705 {
706 std::vector<std::string> strings;
707 for (auto &str_info : string_offset_to_name_) {
708 strings.emplace_back(str_info.second);
709 }
710
711 return strings;
712 }
713
GetModuleLiterals() const714 std::vector<std::string> Disassembler::GetModuleLiterals() const
715 {
716 std::vector<std::string> module_literals;
717 for (auto &module_array : modulearray_table_) {
718 for (auto &module : module_array.second) {
719 module_literals.emplace_back(module);
720 }
721 }
722
723 return module_literals;
724 }
725
GetParams(pandasm::Function * method,const panda_file::File::EntityId & code_id) const726 void Disassembler::GetParams(pandasm::Function *method, const panda_file::File::EntityId &code_id) const
727 {
728 /**
729 * frame size - 2^16 - 1
730 */
731 static const uint32_t MAX_ARG_NUM = 0xFFFF;
732
733 LOG(DEBUG, DISASSEMBLER) << "[getting params number]\ncode id: " << code_id << " (0x" << std::hex << code_id << ")";
734
735 if (method == nullptr) {
736 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
737
738 return;
739 }
740
741 panda_file::CodeDataAccessor code_accessor(*file_, code_id);
742
743 auto params_num = code_accessor.GetNumArgs();
744 if (params_num > MAX_ARG_NUM) {
745 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
746 << "). number of function's arguments (" << std::dec << params_num
747 << ") exceeds MAX_ARG_NUM (" << MAX_ARG_NUM << ") !";
748
749 return;
750 }
751
752 method->return_type = pandasm::Type("any", 0);
753
754 for (uint8_t i = 0; i < params_num; i++) {
755 method->params.push_back(pandasm::Function::Parameter(pandasm::Type("any", 0), method->language));
756 }
757 }
758
GetExceptions(pandasm::Function * method,panda_file::File::EntityId method_id,panda_file::File::EntityId code_id) const759 LabelTable Disassembler::GetExceptions(pandasm::Function *method, panda_file::File::EntityId method_id,
760 panda_file::File::EntityId code_id) const
761 {
762 LOG(DEBUG, DISASSEMBLER) << "[getting exceptions]\ncode id: " << code_id << " (0x" << std::hex << code_id << ")";
763
764 if (method == nullptr) {
765 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!\n";
766 return LabelTable {};
767 }
768
769 panda_file::CodeDataAccessor code_accessor(*file_, code_id);
770
771 const auto bc_ins = BytecodeInstruction(code_accessor.GetInstructions());
772 const auto bc_ins_last = bc_ins.JumpTo(code_accessor.GetCodeSize());
773
774 size_t try_idx = 0;
775 LabelTable label_table {};
776 code_accessor.EnumerateTryBlocks([&](panda_file::CodeDataAccessor::TryBlock &try_block) {
777 pandasm::Function::CatchBlock catch_block_pa {};
778 if (!LocateTryBlock(bc_ins, bc_ins_last, try_block, &catch_block_pa, &label_table, try_idx)) {
779 return false;
780 }
781 size_t catch_idx = 0;
782 try_block.EnumerateCatchBlocks([&](panda_file::CodeDataAccessor::CatchBlock &catch_block) {
783 auto class_idx = catch_block.GetTypeIdx();
784 if (class_idx == panda_file::INVALID_INDEX) {
785 catch_block_pa.exception_record = "";
786 } else {
787 const auto class_id = file_->ResolveClassIndex(method_id, class_idx);
788 catch_block_pa.exception_record = GetFullRecordName(class_id);
789 }
790 if (!LocateCatchBlock(bc_ins, bc_ins_last, catch_block, &catch_block_pa, &label_table, try_idx,
791 catch_idx)) {
792 return false;
793 }
794
795 method->catch_blocks.push_back(catch_block_pa);
796 catch_block_pa.catch_begin_label = "";
797 catch_block_pa.catch_end_label = "";
798 catch_idx++;
799
800 return true;
801 });
802 try_idx++;
803
804 return true;
805 });
806
807 return label_table;
808 }
809
getBytecodeInstructionNumber(BytecodeInstruction bc_ins_first,BytecodeInstruction bc_ins_cur)810 static size_t getBytecodeInstructionNumber(BytecodeInstruction bc_ins_first, BytecodeInstruction bc_ins_cur)
811 {
812 size_t count = 0;
813
814 while (bc_ins_first.GetAddress() != bc_ins_cur.GetAddress()) {
815 count++;
816 bc_ins_first = bc_ins_first.GetNext();
817 if (bc_ins_first.GetAddress() > bc_ins_cur.GetAddress()) {
818 return std::numeric_limits<size_t>::max();
819 }
820 }
821
822 return count;
823 }
824
LocateTryBlock(const BytecodeInstruction & bc_ins,const BytecodeInstruction & bc_ins_last,const panda_file::CodeDataAccessor::TryBlock & try_block,pandasm::Function::CatchBlock * catch_block_pa,LabelTable * label_table,size_t try_idx) const825 bool Disassembler::LocateTryBlock(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last,
826 const panda_file::CodeDataAccessor::TryBlock &try_block,
827 pandasm::Function::CatchBlock *catch_block_pa, LabelTable *label_table,
828 size_t try_idx) const
829 {
830 const auto try_begin_bc_ins = bc_ins.JumpTo(try_block.GetStartPc());
831 const auto try_end_bc_ins = bc_ins.JumpTo(try_block.GetStartPc() + try_block.GetLength());
832
833 const size_t try_begin_idx = getBytecodeInstructionNumber(bc_ins, try_begin_bc_ins);
834 const size_t try_end_idx = getBytecodeInstructionNumber(bc_ins, try_end_bc_ins);
835
836 const bool try_begin_offset_in_range = bc_ins_last.GetAddress() > try_begin_bc_ins.GetAddress();
837 const bool try_end_offset_in_range = bc_ins_last.GetAddress() >= try_end_bc_ins.GetAddress();
838 const bool try_begin_offset_valid = try_begin_idx != std::numeric_limits<size_t>::max();
839 const bool try_end_offset_valid = try_end_idx != std::numeric_limits<size_t>::max();
840
841 if (!try_begin_offset_in_range || !try_begin_offset_valid) {
842 LOG(ERROR, DISASSEMBLER) << "> invalid try block begin offset! address is: 0x" << std::hex
843 << try_begin_bc_ins.GetAddress();
844 return false;
845 } else {
846 std::stringstream ss {};
847 ss << "try_begin_label_" << try_idx;
848
849 LabelTable::iterator it = label_table->find(try_begin_idx);
850 if (it == label_table->end()) {
851 catch_block_pa->try_begin_label = ss.str();
852 label_table->insert(std::pair<size_t, std::string>(try_begin_idx, ss.str()));
853 } else {
854 catch_block_pa->try_begin_label = it->second;
855 }
856 }
857
858 if (!try_end_offset_in_range || !try_end_offset_valid) {
859 LOG(ERROR, DISASSEMBLER) << "> invalid try block end offset! address is: 0x" << std::hex
860 << try_end_bc_ins.GetAddress();
861 return false;
862 } else {
863 std::stringstream ss {};
864 ss << "try_end_label_" << try_idx;
865
866 LabelTable::iterator it = label_table->find(try_end_idx);
867 if (it == label_table->end()) {
868 catch_block_pa->try_end_label = ss.str();
869 label_table->insert(std::pair<size_t, std::string>(try_end_idx, ss.str()));
870 } else {
871 catch_block_pa->try_end_label = it->second;
872 }
873 }
874
875 return true;
876 }
877
LocateCatchBlock(const BytecodeInstruction & bc_ins,const BytecodeInstruction & bc_ins_last,const panda_file::CodeDataAccessor::CatchBlock & catch_block,pandasm::Function::CatchBlock * catch_block_pa,LabelTable * label_table,size_t try_idx,size_t catch_idx) const878 bool Disassembler::LocateCatchBlock(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last,
879 const panda_file::CodeDataAccessor::CatchBlock &catch_block,
880 pandasm::Function::CatchBlock *catch_block_pa, LabelTable *label_table,
881 size_t try_idx, size_t catch_idx) const
882 {
883 const auto handler_begin_offset = catch_block.GetHandlerPc();
884 const auto handler_end_offset = handler_begin_offset + catch_block.GetCodeSize();
885
886 const auto handler_begin_bc_ins = bc_ins.JumpTo(handler_begin_offset);
887 const auto handler_end_bc_ins = bc_ins.JumpTo(handler_end_offset);
888
889 const size_t handler_begin_idx = getBytecodeInstructionNumber(bc_ins, handler_begin_bc_ins);
890 const size_t handler_end_idx = getBytecodeInstructionNumber(bc_ins, handler_end_bc_ins);
891
892 const bool handler_begin_offset_in_range = bc_ins_last.GetAddress() > handler_begin_bc_ins.GetAddress();
893 const bool handler_end_offset_in_range = bc_ins_last.GetAddress() >= handler_end_bc_ins.GetAddress();
894 const bool handler_end_present = catch_block.GetCodeSize() != 0;
895 const bool handler_begin_offset_valid = handler_begin_idx != std::numeric_limits<size_t>::max();
896 const bool handler_end_offset_valid = handler_end_idx != std::numeric_limits<size_t>::max();
897
898 if (!handler_begin_offset_in_range || !handler_begin_offset_valid) {
899 LOG(ERROR, DISASSEMBLER) << "> invalid catch block begin offset! address is: 0x" << std::hex
900 << handler_begin_bc_ins.GetAddress();
901 return false;
902 } else {
903 std::stringstream ss {};
904 ss << "handler_begin_label_" << try_idx << "_" << catch_idx;
905
906 LabelTable::iterator it = label_table->find(handler_begin_idx);
907 if (it == label_table->end()) {
908 catch_block_pa->catch_begin_label = ss.str();
909 label_table->insert(std::pair<size_t, std::string>(handler_begin_idx, ss.str()));
910 } else {
911 catch_block_pa->catch_begin_label = it->second;
912 }
913 }
914
915 if (!handler_end_offset_in_range || !handler_end_offset_valid) {
916 LOG(ERROR, DISASSEMBLER) << "> invalid catch block end offset! address is: 0x" << std::hex
917 << handler_end_bc_ins.GetAddress();
918 return false;
919 } else if (handler_end_present) {
920 std::stringstream ss {};
921 ss << "handler_end_label_" << try_idx << "_" << catch_idx;
922
923 LabelTable::iterator it = label_table->find(handler_end_idx);
924 if (it == label_table->end()) {
925 catch_block_pa->catch_end_label = ss.str();
926 label_table->insert(std::pair<size_t, std::string>(handler_end_idx, ss.str()));
927 } else {
928 catch_block_pa->catch_end_label = it->second;
929 }
930 }
931
932 return true;
933 }
934
GetMetaData(pandasm::Function * method,const panda_file::File::EntityId & method_id) const935 void Disassembler::GetMetaData(pandasm::Function *method, const panda_file::File::EntityId &method_id) const
936 {
937 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nmethod id: " << method_id << " (0x" << std::hex << method_id
938 << ")";
939
940 if (method == nullptr) {
941 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
942
943 return;
944 }
945
946 panda_file::MethodDataAccessor method_accessor(*file_, method_id);
947
948 const auto method_name_raw = StringDataToString(file_->GetStringData(method_accessor.GetNameId()));
949
950 if (!method_accessor.IsStatic()) {
951 const auto class_name = StringDataToString(file_->GetStringData(method_accessor.GetClassId()));
952 auto this_type = pandasm::Type::FromDescriptor(class_name);
953
954 LOG(DEBUG, DISASSEMBLER) << "method (raw: \'" << method_name_raw
955 << "\') is not static. emplacing self-argument of type " << this_type.GetName();
956
957 method->params.insert(method->params.begin(), pandasm::Function::Parameter(this_type, method->language));
958 } else {
959 method->metadata->SetAttribute("static");
960 }
961
962 if (file_->IsExternal(method_accessor.GetMethodId())) {
963 method->metadata->SetAttribute("external");
964 }
965
966 std::string ctor_name = panda::panda_file::GetCtorName(method->language);
967 std::string cctor_name = panda::panda_file::GetCctorName(method->language);
968
969 const bool is_ctor = (method_name_raw == ctor_name);
970 const bool is_cctor = (method_name_raw == cctor_name);
971
972 if (is_ctor) {
973 method->metadata->SetAttribute("ctor");
974 method->name.replace(method->name.find(ctor_name), ctor_name.length(), "_ctor_");
975 } else if (is_cctor) {
976 method->metadata->SetAttribute("cctor");
977 method->name.replace(method->name.find(cctor_name), cctor_name.length(), "_cctor_");
978 }
979 }
980
GetMetaData(pandasm::Record * record,const panda_file::File::EntityId & record_id) const981 void Disassembler::GetMetaData(pandasm::Record *record, const panda_file::File::EntityId &record_id) const
982 {
983 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nrecord id: " << record_id << " (0x" << std::hex << record_id
984 << ")";
985
986 if (record == nullptr) {
987 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but record ptr expected!";
988
989 return;
990 }
991
992 if (file_->IsExternal(record_id)) {
993 record->metadata->SetAttribute("external");
994 }
995 }
996
GetMetadataFieldValue(panda_file::FieldDataAccessor & field_accessor,pandasm::Field * field,bool isScopeNamesRecord)997 void Disassembler::GetMetadataFieldValue(panda_file::FieldDataAccessor &field_accessor, pandasm::Field *field,
998 bool isScopeNamesRecord)
999 {
1000 if (field->type.GetId() == panda_file::Type::TypeId::U32) {
1001 const auto offset = field_accessor.GetValue<uint32_t>().value();
1002 bool isScopeNameField = isScopeNamesRecord || field->name == ark::SCOPE_NAMES;
1003 if (field->name == ark::MODULE_REQUEST_PAHSE_IDX) {
1004 module_request_phase_literals_.insert(offset);
1005 } else if (field->name != ark::TYPE_SUMMARY_FIELD_NAME && !isScopeNameField) {
1006 LOG(DEBUG, DISASSEMBLER) << "Module literalarray " << field->name << " at offset 0x" << std::hex << offset
1007 << " is excluded";
1008 module_literals_.insert(offset);
1009 }
1010 field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::U32>(offset));
1011 } else if (field->type.GetId() == panda_file::Type::TypeId::U8) {
1012 const uint8_t val = field_accessor.GetValue<uint8_t>().value();
1013 field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::U8>(val));
1014 } else if (field->type.GetId() == panda_file::Type::TypeId::F64) {
1015 std::optional<double> val = field_accessor.GetValue<double>();
1016 if (val.has_value()) {
1017 field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::F64>(val.value()));
1018 }
1019 } else if (field->type.GetId() == panda_file::Type::TypeId::U1) {
1020 std::optional<bool> val = field_accessor.GetValue<bool>();
1021 if (val.has_value()) {
1022 field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::U1>(val.value()));
1023 }
1024 } else if (field->type.GetId() == panda_file::Type::TypeId::REFERENCE && field->type.GetName() == "panda.String") {
1025 std::optional<uint32_t> string_offset_val = field_accessor.GetValue<uint32_t>();
1026 if (string_offset_val.has_value()) {
1027 std::string_view val {reinterpret_cast<const char *>(
1028 file_->GetStringData(panda_file::File::EntityId(string_offset_val.value())).data)};
1029 field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::STRING>(val));
1030 }
1031 } else if (field->type.GetRank() > 0) {
1032 std::optional<uint32_t> litarray_offset_val = field_accessor.GetValue<uint32_t>();
1033 if (litarray_offset_val.has_value()) {
1034 field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::LITERALARRAY>(
1035 std::string_view {std::to_string(litarray_offset_val.value())}));
1036 }
1037 } else {
1038 UNREACHABLE();
1039 }
1040 }
1041
GetMetaData(pandasm::Field * field,const panda_file::File::EntityId & field_id,bool is_scope_names_record)1042 void Disassembler::GetMetaData(pandasm::Field *field, const panda_file::File::EntityId &field_id,
1043 bool is_scope_names_record)
1044 {
1045 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nfield id: " << field_id << " (0x" << std::hex << field_id << ")";
1046
1047 if (field == nullptr) {
1048 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
1049
1050 return;
1051 }
1052
1053 panda_file::FieldDataAccessor field_accessor(*file_, field_id);
1054
1055 if (field_accessor.IsExternal()) {
1056 field->metadata->SetAttribute("external");
1057 }
1058
1059 if (field_accessor.IsStatic()) {
1060 field->metadata->SetAttribute("static");
1061 }
1062
1063 GetMetadataFieldValue(field_accessor, field, is_scope_names_record);
1064 }
1065
AnnotationTagToString(const char tag) const1066 std::string Disassembler::AnnotationTagToString(const char tag) const
1067 {
1068 switch (tag) {
1069 case '1':
1070 return "u1";
1071 case '2':
1072 return "i8";
1073 case '3':
1074 return "u8";
1075 case '4':
1076 return "i16";
1077 case '5':
1078 return "u16";
1079 case '6':
1080 return "i32";
1081 case '7':
1082 return "u32";
1083 case '8':
1084 return "i64";
1085 case '9':
1086 return "u64";
1087 case 'A':
1088 return "f32";
1089 case 'B':
1090 return "f64";
1091 case 'C':
1092 return "string";
1093 case 'D':
1094 return "record";
1095 case 'E':
1096 return "method";
1097 case 'F':
1098 return "enum";
1099 case 'G':
1100 return "annotation";
1101 case 'I':
1102 return "void";
1103 case 'J':
1104 return "method_handle";
1105 case 'K':
1106 return "u1[]";
1107 case 'L':
1108 return "i8[]";
1109 case 'M':
1110 return "u8[]";
1111 case 'N':
1112 return "i16[]";
1113 case 'O':
1114 return "u16[]";
1115 case 'P':
1116 return "i32[]";
1117 case 'Q':
1118 return "u32[]";
1119 case 'R':
1120 return "i64[]";
1121 case 'S':
1122 return "u64[]";
1123 case 'T':
1124 return "f32[]";
1125 case 'U':
1126 return "f64[]";
1127 case 'V':
1128 return "string[]";
1129 case 'W':
1130 return "record[]";
1131 case 'X':
1132 return "method[]";
1133 case 'Y':
1134 return "enum[]";
1135 case 'Z':
1136 return "annotation[]";
1137 case '@':
1138 return "method_handle[]";
1139 case '*':
1140 return "nullptr string";
1141 default:
1142 return std::string();
1143 }
1144 }
1145
ScalarValueToString(const panda_file::ScalarValue & value,const std::string & type)1146 std::string Disassembler::ScalarValueToString(const panda_file::ScalarValue &value, const std::string &type)
1147 {
1148 std::stringstream ss;
1149
1150 if (type == "i8") {
1151 int8_t res = value.Get<int8_t>();
1152 ss << static_cast<int>(res);
1153 } else if (type == "u1" || type == "u8") {
1154 uint8_t res = value.Get<uint8_t>();
1155 ss << static_cast<unsigned int>(res);
1156 } else if (type == "i16") {
1157 ss << value.Get<int16_t>();
1158 } else if (type == "u16") {
1159 ss << value.Get<uint16_t>();
1160 } else if (type == "i32") {
1161 ss << value.Get<int32_t>();
1162 } else if (type == "u32") {
1163 ss << value.Get<uint32_t>();
1164 } else if (type == "i64") {
1165 ss << value.Get<int64_t>();
1166 } else if (type == "u64") {
1167 ss << value.Get<uint64_t>();
1168 } else if (type == "f32") {
1169 ss << value.Get<float>();
1170 } else if (type == "f64") {
1171 ss << value.Get<double>();
1172 } else if (type == "string") {
1173 const auto id = value.Get<panda_file::File::EntityId>();
1174 ss << "\"" << StringDataToString(file_->GetStringData(id)) << "\"";
1175 } else if (type == "record") {
1176 const auto id = value.Get<panda_file::File::EntityId>();
1177 ss << GetFullRecordName(id);
1178 } else if (type == "method") {
1179 const auto id = value.Get<panda_file::File::EntityId>();
1180 AddMethodToTables(id);
1181 ss << GetMethodSignature(id);
1182 } else if (type == "enum") {
1183 const auto id = value.Get<panda_file::File::EntityId>();
1184 panda_file::FieldDataAccessor field_accessor(*file_, id);
1185 ss << GetFullRecordName(field_accessor.GetClassId()) << "."
1186 << StringDataToString(file_->GetStringData(field_accessor.GetNameId()));
1187 } else if (type == "annotation") {
1188 const auto id = value.Get<panda_file::File::EntityId>();
1189 ss << "id_" << id;
1190 } else if (type == "void") {
1191 return std::string();
1192 } else if (type == "method_handle") {
1193 }
1194
1195 return ss.str();
1196 }
1197
ArrayValueToString(const panda_file::ArrayValue & value,const std::string & type,const size_t idx)1198 std::string Disassembler::ArrayValueToString(const panda_file::ArrayValue &value, const std::string &type,
1199 const size_t idx)
1200 {
1201 std::stringstream ss;
1202
1203 if (type == "i8") {
1204 int8_t res = value.Get<int8_t>(idx);
1205 ss << static_cast<int>(res);
1206 } else if (type == "u1" || type == "u8") {
1207 uint8_t res = value.Get<uint8_t>(idx);
1208 ss << static_cast<unsigned int>(res);
1209 } else if (type == "i16") {
1210 ss << value.Get<int16_t>(idx);
1211 } else if (type == "u16") {
1212 ss << value.Get<uint16_t>(idx);
1213 } else if (type == "i32") {
1214 ss << value.Get<int32_t>(idx);
1215 } else if (type == "u32") {
1216 ss << value.Get<uint32_t>(idx);
1217 } else if (type == "i64") {
1218 ss << value.Get<int64_t>(idx);
1219 } else if (type == "u64") {
1220 ss << value.Get<uint64_t>(idx);
1221 } else if (type == "f32") {
1222 ss << value.Get<float>(idx);
1223 } else if (type == "f64") {
1224 ss << value.Get<double>(idx);
1225 } else if (type == "string") {
1226 const auto id = value.Get<panda_file::File::EntityId>(idx);
1227 ss << '\"' << StringDataToString(file_->GetStringData(id)) << '\"';
1228 } else if (type == "record") {
1229 const auto id = value.Get<panda_file::File::EntityId>(idx);
1230 ss << GetFullRecordName(id);
1231 } else if (type == "method") {
1232 const auto id = value.Get<panda_file::File::EntityId>(idx);
1233 AddMethodToTables(id);
1234 ss << GetMethodSignature(id);
1235 } else if (type == "enum") {
1236 const auto id = value.Get<panda_file::File::EntityId>(idx);
1237 panda_file::FieldDataAccessor field_accessor(*file_, id);
1238 ss << GetFullRecordName(field_accessor.GetClassId()) << "."
1239 << StringDataToString(file_->GetStringData(field_accessor.GetNameId()));
1240 } else if (type == "annotation") {
1241 const auto id = value.Get<panda_file::File::EntityId>(idx);
1242 ss << "id_" << id;
1243 } else if (type == "method_handle") {
1244 } else if (type == "nullptr string") {
1245 }
1246
1247 return ss.str();
1248 }
1249
GetFullMethodName(const panda_file::File::EntityId & method_id) const1250 std::string Disassembler::GetFullMethodName(const panda_file::File::EntityId &method_id) const
1251 {
1252 panda::panda_file::MethodDataAccessor method_accessor(*file_, method_id);
1253
1254 const auto method_name_raw = StringDataToString(file_->GetStringData(method_accessor.GetNameId()));
1255
1256 std::string class_name = GetFullRecordName(method_accessor.GetClassId());
1257 if (IsSystemType(class_name)) {
1258 class_name = "";
1259 } else {
1260 class_name += ".";
1261 }
1262
1263 return class_name + method_name_raw;
1264 }
1265
GetMethodSignature(const panda_file::File::EntityId & method_id) const1266 std::string Disassembler::GetMethodSignature(const panda_file::File::EntityId &method_id) const
1267 {
1268 panda::panda_file::MethodDataAccessor method_accessor(*file_, method_id);
1269
1270 pandasm::Function method(GetFullMethodName(method_id), GetMethodLanguage(method_id));
1271 if (method_accessor.GetCodeId().has_value()) {
1272 GetParams(&method, method_accessor.GetCodeId().value());
1273 }
1274 GetMetaData(&method, method_id);
1275
1276 return pandasm::GetFunctionSignatureFromName(method.name, method.params);
1277 }
1278
GetFullRecordName(const panda_file::File::EntityId & class_id) const1279 std::string Disassembler::GetFullRecordName(const panda_file::File::EntityId &class_id) const
1280 {
1281 std::string name = StringDataToString(file_->GetStringData(class_id));
1282
1283 auto type = pandasm::Type::FromDescriptor(name);
1284 type = pandasm::Type(type.GetComponentName(), type.GetRank());
1285
1286 return type.GetPandasmName();
1287 }
1288
GetRecordInfo(const panda_file::File::EntityId & record_id,RecordInfo * record_info) const1289 void Disassembler::GetRecordInfo(const panda_file::File::EntityId &record_id, RecordInfo *record_info) const
1290 {
1291 constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
1292
1293 if (file_->IsExternal(record_id)) {
1294 return;
1295 }
1296
1297 panda_file::ClassDataAccessor class_accessor {*file_, record_id};
1298 std::stringstream ss;
1299
1300 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1301 << class_accessor.GetClassId() << ", size: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH)
1302 << class_accessor.GetSize() << " (" << std::dec << class_accessor.GetSize() << ")";
1303
1304 record_info->record_info = ss.str();
1305 ss.str(std::string());
1306
1307 class_accessor.EnumerateFields([&](panda_file::FieldDataAccessor &field_accessor) -> void {
1308 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1309 << field_accessor.GetFieldId();
1310
1311 record_info->fields_info.push_back(ss.str());
1312
1313 ss.str(std::string());
1314 });
1315 }
1316
GetMethodInfo(const panda_file::File::EntityId & method_id,MethodInfo * method_info) const1317 void Disassembler::GetMethodInfo(const panda_file::File::EntityId &method_id, MethodInfo *method_info) const
1318 {
1319 constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
1320
1321 panda_file::MethodDataAccessor method_accessor {*file_, method_id};
1322 std::stringstream ss;
1323
1324 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1325 << method_accessor.GetMethodId();
1326
1327 if (method_accessor.GetCodeId().has_value()) {
1328 ss << ", code offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1329 << method_accessor.GetCodeId().value();
1330
1331 GetInsInfo(method_accessor.GetCodeId().value(), method_info);
1332 } else {
1333 ss << ", <no code>";
1334 }
1335
1336 method_info->method_info = ss.str();
1337
1338 if (method_accessor.GetCodeId()) {
1339 ASSERT(debug_info_extractor_ != nullptr);
1340 method_info->line_number_table = debug_info_extractor_->GetLineNumberTable(method_id);
1341 method_info->column_number_table = debug_info_extractor_->GetColumnNumberTable(method_id);
1342 method_info->local_variable_table = debug_info_extractor_->GetLocalVariableTable(method_id);
1343
1344 // Add information about parameters into the table
1345 panda_file::CodeDataAccessor codeda(*file_, method_accessor.GetCodeId().value());
1346 auto arg_idx = static_cast<int32_t>(codeda.GetNumVregs());
1347 uint32_t code_size = codeda.GetCodeSize();
1348 for (auto info : debug_info_extractor_->GetParameterInfo(method_id)) {
1349 panda_file::LocalVariableInfo arg_info {info.name, info.signature, "", arg_idx++, 0, code_size};
1350 method_info->local_variable_table.emplace_back(arg_info);
1351 }
1352 }
1353 }
1354
IsArray(const panda_file::LiteralTag & tag)1355 static bool IsArray(const panda_file::LiteralTag &tag)
1356 {
1357 switch (tag) {
1358 case panda_file::LiteralTag::ARRAY_U1:
1359 case panda_file::LiteralTag::ARRAY_U8:
1360 case panda_file::LiteralTag::ARRAY_I8:
1361 case panda_file::LiteralTag::ARRAY_U16:
1362 case panda_file::LiteralTag::ARRAY_I16:
1363 case panda_file::LiteralTag::ARRAY_U32:
1364 case panda_file::LiteralTag::ARRAY_I32:
1365 case panda_file::LiteralTag::ARRAY_U64:
1366 case panda_file::LiteralTag::ARRAY_I64:
1367 case panda_file::LiteralTag::ARRAY_F32:
1368 case panda_file::LiteralTag::ARRAY_F64:
1369 case panda_file::LiteralTag::ARRAY_STRING:
1370 return true;
1371 default:
1372 return false;
1373 }
1374 }
1375
SerializeLiteralArray(const pandasm::LiteralArray & lit_array) const1376 std::string Disassembler::SerializeLiteralArray(const pandasm::LiteralArray &lit_array) const
1377 {
1378 std::stringstream ret;
1379 if (lit_array.literals_.empty()) {
1380 return "";
1381 }
1382
1383 std::stringstream ss;
1384 ss << "{ ";
1385 const auto &tag = lit_array.literals_[0].tag_;
1386 if (IsArray(tag)) {
1387 ss << LiteralTagToString(tag);
1388 }
1389 ss << lit_array.literals_.size();
1390 ss << " [ ";
1391 SerializeValues(lit_array, ss);
1392 ss << "]}";
1393 return ss.str();
1394 }
1395
Serialize(const std::string & key,const pandasm::LiteralArray & lit_array,std::ostream & os) const1396 void Disassembler::Serialize(const std::string &key, const pandasm::LiteralArray &lit_array, std::ostream &os) const
1397 {
1398 os << key << " ";
1399 os << SerializeLiteralArray(lit_array);
1400 os << "\n";
1401 }
1402
Serialize(const std::string & module_offset,const std::vector<std::string> & module_array,std::ostream & os) const1403 void Disassembler::Serialize(const std::string &module_offset, const std::vector<std::string> &module_array,
1404 std::ostream &os) const
1405 {
1406 os << module_offset << " ";
1407 os << SerializeModuleLiteralArray(module_array);
1408 os << "\n";
1409 }
1410
SerializeModuleLiteralArray(const std::vector<std::string> & module_array) const1411 std::string Disassembler::SerializeModuleLiteralArray(const std::vector<std::string> &module_array) const
1412 {
1413 if (module_array.empty()) {
1414 return "";
1415 }
1416
1417 std::stringstream ss;
1418 ss << "{ ";
1419 ss << (module_array.size() - 1); // Only needs to show the count of module tag, exclude module request array
1420 ss << " [\n";
1421 for (size_t index = 0; index < module_array.size(); index++) {
1422 ss << module_array[index] << ";\n";
1423 }
1424 ss << "]}";
1425 return ss.str();
1426 }
1427
LiteralTagToString(const panda_file::LiteralTag & tag) const1428 std::string Disassembler::LiteralTagToString(const panda_file::LiteralTag &tag) const
1429 {
1430 switch (tag) {
1431 case panda_file::LiteralTag::BOOL:
1432 case panda_file::LiteralTag::ARRAY_U1:
1433 return "u1";
1434 case panda_file::LiteralTag::ARRAY_U8:
1435 return "u8";
1436 case panda_file::LiteralTag::ARRAY_I8:
1437 return "i8";
1438 case panda_file::LiteralTag::ARRAY_U16:
1439 return "u16";
1440 case panda_file::LiteralTag::ARRAY_I16:
1441 return "i16";
1442 case panda_file::LiteralTag::ARRAY_U32:
1443 return "u32";
1444 case panda_file::LiteralTag::INTEGER:
1445 case panda_file::LiteralTag::ARRAY_I32:
1446 return "i32";
1447 case panda_file::LiteralTag::ARRAY_U64:
1448 return "u64";
1449 case panda_file::LiteralTag::ARRAY_I64:
1450 return "i64";
1451 case panda_file::LiteralTag::ARRAY_F32:
1452 return "f32";
1453 case panda_file::LiteralTag::DOUBLE:
1454 case panda_file::LiteralTag::ARRAY_F64:
1455 return "f64";
1456 case panda_file::LiteralTag::STRING:
1457 case panda_file::LiteralTag::ARRAY_STRING:
1458 return "string";
1459 case panda_file::LiteralTag::METHOD:
1460 return "method";
1461 case panda_file::LiteralTag::GETTER:
1462 return "getter";
1463 case panda_file::LiteralTag::SETTER:
1464 return "setter";
1465 case panda_file::LiteralTag::GENERATORMETHOD:
1466 return "generator_method";
1467 case panda_file::LiteralTag::ACCESSOR:
1468 return "accessor";
1469 case panda_file::LiteralTag::METHODAFFILIATE:
1470 return "method_affiliate";
1471 case panda_file::LiteralTag::NULLVALUE:
1472 return "null_value";
1473 case panda_file::LiteralTag::TAGVALUE:
1474 return "tagvalue";
1475 case panda_file::LiteralTag::LITERALBUFFERINDEX:
1476 return "lit_index";
1477 case panda_file::LiteralTag::LITERALARRAY:
1478 return "lit_offset";
1479 case panda_file::LiteralTag::BUILTINTYPEINDEX:
1480 return "builtin_type";
1481 default:
1482 UNREACHABLE();
1483 }
1484 }
1485
1486 template <typename T>
SerializeValues(const pandasm::LiteralArray & lit_array,T & os) const1487 void Disassembler::SerializeValues(const pandasm::LiteralArray &lit_array, T &os) const
1488 {
1489 switch (lit_array.literals_[0].tag_) {
1490 case panda_file::LiteralTag::ARRAY_U1: {
1491 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1492 os << std::get<bool>(lit_array.literals_[i].value_) << " ";
1493 }
1494 break;
1495 }
1496 case panda_file::LiteralTag::ARRAY_U8: {
1497 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1498 os << static_cast<uint16_t>(std::get<uint8_t>(lit_array.literals_[i].value_)) << " ";
1499 }
1500 break;
1501 }
1502 case panda_file::LiteralTag::ARRAY_I8: {
1503 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1504 os << static_cast<int16_t>(bit_cast<int8_t>(std::get<uint8_t>(lit_array.literals_[i].value_))) << " ";
1505 }
1506 break;
1507 }
1508 case panda_file::LiteralTag::ARRAY_U16: {
1509 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1510 os << std::get<uint16_t>(lit_array.literals_[i].value_) << " ";
1511 }
1512 break;
1513 }
1514 case panda_file::LiteralTag::ARRAY_I16: {
1515 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1516 os << bit_cast<int16_t>(std::get<uint16_t>(lit_array.literals_[i].value_)) << " ";
1517 }
1518 break;
1519 }
1520 case panda_file::LiteralTag::ARRAY_U32: {
1521 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1522 os << std::get<uint32_t>(lit_array.literals_[i].value_) << " ";
1523 }
1524 break;
1525 }
1526 case panda_file::LiteralTag::ARRAY_I32: {
1527 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1528 os << bit_cast<int32_t>(std::get<uint32_t>(lit_array.literals_[i].value_)) << " ";
1529 }
1530 break;
1531 }
1532 case panda_file::LiteralTag::ARRAY_U64: {
1533 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1534 os << std::get<uint64_t>(lit_array.literals_[i].value_) << " ";
1535 }
1536 break;
1537 }
1538 case panda_file::LiteralTag::ARRAY_I64: {
1539 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1540 os << bit_cast<int64_t>(std::get<uint64_t>(lit_array.literals_[i].value_)) << " ";
1541 }
1542 break;
1543 }
1544 case panda_file::LiteralTag::ARRAY_F32: {
1545 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1546 os << std::get<float>(lit_array.literals_[i].value_) << " ";
1547 }
1548 break;
1549 }
1550 case panda_file::LiteralTag::ARRAY_F64: {
1551 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1552 os << std::get<double>(lit_array.literals_[i].value_) << " ";
1553 }
1554 break;
1555 }
1556 case panda_file::LiteralTag::ARRAY_STRING: {
1557 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1558 os << "\"" << std::get<std::string>(lit_array.literals_[i].value_) << "\" ";
1559 }
1560 break;
1561 }
1562 default:
1563 SerializeLiterals(lit_array, os);
1564 }
1565 }
1566
1567 template <typename T>
SerializeLiterals(const pandasm::LiteralArray & lit_array,T & os) const1568 void Disassembler::SerializeLiterals(const pandasm::LiteralArray &lit_array, T &os) const
1569 {
1570 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1571 const auto &tag = lit_array.literals_[i].tag_;
1572 os << LiteralTagToString(tag) << ":";
1573 const auto &val = lit_array.literals_[i].value_;
1574 switch (lit_array.literals_[i].tag_) {
1575 case panda_file::LiteralTag::BOOL: {
1576 os << std::get<bool>(val);
1577 break;
1578 }
1579 case panda_file::LiteralTag::LITERALBUFFERINDEX:
1580 case panda_file::LiteralTag::INTEGER: {
1581 os << bit_cast<int32_t>(std::get<uint32_t>(val));
1582 break;
1583 }
1584 case panda_file::LiteralTag::DOUBLE: {
1585 os << std::get<double>(val);
1586 break;
1587 }
1588 case panda_file::LiteralTag::STRING: {
1589 os << "\"" << std::get<std::string>(val) << "\"";
1590 break;
1591 }
1592 case panda_file::LiteralTag::METHOD:
1593 case panda_file::LiteralTag::GETTER:
1594 case panda_file::LiteralTag::SETTER:
1595 case panda_file::LiteralTag::GENERATORMETHOD: {
1596 os << std::get<std::string>(val);
1597 break;
1598 }
1599 case panda_file::LiteralTag::NULLVALUE:
1600 case panda_file::LiteralTag::ACCESSOR: {
1601 os << static_cast<int16_t>(bit_cast<int8_t>(std::get<uint8_t>(val)));
1602 break;
1603 }
1604 case panda_file::LiteralTag::METHODAFFILIATE: {
1605 os << std::get<uint16_t>(val);
1606 break;
1607 }
1608 case panda_file::LiteralTag::LITERALARRAY: {
1609 os << std::get<std::string>(val);
1610 break;
1611 }
1612 case panda_file::LiteralTag::BUILTINTYPEINDEX: {
1613 os << static_cast<int16_t>(std::get<uint8_t>(val));
1614 break;
1615 }
1616 default:
1617 UNREACHABLE();
1618 }
1619 os << ", ";
1620 }
1621 }
1622
Serialize(const pandasm::Record & record,std::ostream & os,bool print_information) const1623 void Disassembler::Serialize(const pandasm::Record &record, std::ostream &os, bool print_information) const
1624 {
1625 if (IsSystemType(record.name)) {
1626 return;
1627 }
1628 os << ".language " << panda::panda_file::LanguageToString(record.language) << std::endl;
1629 os << ".record " << record.name;
1630
1631 const auto record_iter = prog_ann_.record_annotations.find(record.name);
1632 const bool record_in_table = record_iter != prog_ann_.record_annotations.end();
1633 if (record_in_table) {
1634 Serialize(*record.metadata, record_iter->second.ann_list, os);
1635 } else {
1636 Serialize(*record.metadata, {}, os);
1637 }
1638
1639 if (record.metadata->IsForeign()) {
1640 os << "\n\n";
1641 return;
1642 }
1643
1644 os << " {";
1645
1646 if (print_information && prog_info_.records_info.find(record.name) != prog_info_.records_info.end()) {
1647 os << " # " << prog_info_.records_info.at(record.name).record_info << "\n";
1648 SerializeFields(record, os, true);
1649 } else {
1650 os << "\n";
1651 SerializeFields(record, os, false);
1652 }
1653
1654 os << "}\n\n";
1655 }
1656
DumpLiteralArray(const pandasm::LiteralArray & literal_array,std::stringstream & ss) const1657 void Disassembler::DumpLiteralArray(const pandasm::LiteralArray &literal_array, std::stringstream &ss) const
1658 {
1659 ss << "[";
1660 bool firstItem = true;
1661 for (const auto &item : literal_array.literals_) {
1662 if (!firstItem) {
1663 ss << ", ";
1664 } else {
1665 firstItem = false;
1666 }
1667
1668 switch (item.tag_) {
1669 case panda_file::LiteralTag::DOUBLE: {
1670 ss << std::get<double>(item.value_);
1671 break;
1672 }
1673 case panda_file::LiteralTag::BOOL: {
1674 ss << std::get<bool>(item.value_);
1675 break;
1676 }
1677 case panda_file::LiteralTag::STRING: {
1678 ss << "\"" << std::get<std::string>(item.value_) << "\"";
1679 break;
1680 }
1681 case panda_file::LiteralTag::LITERALARRAY: {
1682 std::string offset_str = std::get<std::string>(item.value_);
1683 uint32_t lit_array_fffset = std::stoi(offset_str, nullptr, 16);
1684 pandasm::LiteralArray lit_array;
1685 GetLiteralArrayByOffset(&lit_array, panda_file::File::EntityId(lit_array_fffset));
1686 DumpLiteralArray(lit_array, ss);
1687 break;
1688 }
1689 case panda_file::LiteralTag::BUILTINTYPEINDEX: {
1690 // By convention, BUILTINTYPEINDEX is used to store type of empty arrays,
1691 // therefore it has no value
1692 break;
1693 }
1694 default: {
1695 UNREACHABLE();
1696 break;
1697 }
1698 }
1699 }
1700 ss << "]";
1701 }
1702
SerializeFieldValue(const pandasm::Field & f,std::stringstream & ss) const1703 void Disassembler::SerializeFieldValue(const pandasm::Field &f, std::stringstream &ss) const
1704 {
1705 if (f.type.GetId() == panda_file::Type::TypeId::U32) {
1706 ss << " = 0x" << std::hex << f.metadata->GetValue().value().GetValue<uint32_t>();
1707 } else if (f.type.GetId() == panda_file::Type::TypeId::U8) {
1708 ss << " = 0x" << std::hex << static_cast<uint32_t>(f.metadata->GetValue().value().GetValue<uint8_t>());
1709 } else if (f.type.GetId() == panda_file::Type::TypeId::F64) {
1710 ss << " = " << static_cast<double>(f.metadata->GetValue().value().GetValue<double>());
1711 } else if (f.type.GetId() == panda_file::Type::TypeId::U1) {
1712 ss << " = " << static_cast<bool>(f.metadata->GetValue().value().GetValue<bool>());
1713 } else if (f.type.GetId() == panda_file::Type::TypeId::REFERENCE && f.type.GetName() == "panda.String") {
1714 ss << " = \"" << static_cast<std::string>(f.metadata->GetValue().value().GetValue<std::string>()) << "\"";
1715 } else if (f.type.GetRank() > 0) {
1716 uint32_t lit_array_fffset =
1717 std::stoi(static_cast<std::string>(f.metadata->GetValue().value().GetValue<std::string>()));
1718 pandasm::LiteralArray lit_array;
1719 GetLiteralArrayByOffset(&lit_array, panda_file::File::EntityId(lit_array_fffset));
1720 ss << " = ";
1721 DumpLiteralArray(lit_array, ss);
1722 }
1723 }
1724
SerializeFields(const pandasm::Record & record,std::ostream & os,bool print_information) const1725 void Disassembler::SerializeFields(const pandasm::Record &record, std::ostream &os, bool print_information) const
1726 {
1727 constexpr size_t INFO_OFFSET = 80;
1728
1729 const auto record_iter = prog_ann_.record_annotations.find(record.name);
1730 const bool record_in_table = record_iter != prog_ann_.record_annotations.end();
1731
1732 const auto rec_inf = (print_information) ? (prog_info_.records_info.at(record.name)) : (RecordInfo {});
1733
1734 size_t field_idx = 0;
1735
1736 std::stringstream ss;
1737 for (const auto &f : record.field_list) {
1738 std::string file = GetFileNameByPath(f.name);
1739 ss << "\t" << f.type.GetPandasmName() << " " << file;
1740 if (f.metadata->GetValue().has_value()) {
1741 SerializeFieldValue(f, ss);
1742 }
1743 if (record_in_table) {
1744 const auto field_iter = record_iter->second.field_annotations.find(f.name);
1745 if (field_iter != record_iter->second.field_annotations.end()) {
1746 Serialize(*f.metadata, field_iter->second, ss);
1747 } else {
1748 Serialize(*f.metadata, {}, ss);
1749 }
1750 } else {
1751 Serialize(*f.metadata, {}, ss);
1752 }
1753
1754 if (print_information) {
1755 os << std::setw(INFO_OFFSET) << std::left << ss.str() << " # " << rec_inf.fields_info.at(field_idx) << "\n";
1756 } else {
1757 os << ss.str() << "\n";
1758 }
1759
1760 ss.str(std::string());
1761 ss.clear();
1762
1763 field_idx++;
1764 }
1765 }
1766
getLiteralArrayTypeFromValue(const pandasm::LiteralArray & literal_array) const1767 std::string Disassembler::getLiteralArrayTypeFromValue(const pandasm::LiteralArray &literal_array) const
1768 {
1769 [[maybe_unused]] auto size = literal_array.literals_.size();
1770 ASSERT(size > 0);
1771 switch (literal_array.literals_[0].tag_) {
1772 case panda_file::LiteralTag::DOUBLE: {
1773 return "f64[]";
1774 }
1775 case panda_file::LiteralTag::BOOL: {
1776 return "u1[]";
1777 }
1778 case panda_file::LiteralTag::STRING: {
1779 return "panda.String[]";
1780 }
1781 case panda_file::LiteralTag::LITERALARRAY: {
1782 std::string offset_str = std::get<std::string>(literal_array.literals_[0].value_);
1783 uint32_t lit_array_fffset = std::stoi(offset_str, nullptr, 16);
1784 pandasm::LiteralArray lit_array;
1785 GetLiteralArrayByOffset(&lit_array, panda_file::File::EntityId(lit_array_fffset));
1786 return getLiteralArrayTypeFromValue(lit_array) + "[]";
1787 }
1788 case panda_file::LiteralTag::BUILTINTYPEINDEX: {
1789 uint8_t typeIndex = std::get<uint8_t>(literal_array.literals_[0].value_);
1790 static constexpr uint8_t EMPTY_LITERAL_ARRAY_WITH_NUMBER_TYPE = 0;
1791 static constexpr uint8_t EMPTY_LITERAL_ARRAY_WITH_BOOLEAN_TYPE = 1;
1792 static constexpr uint8_t EMPTY_LITERAL_ARRAY_WITH_STRING_TYPE = 2;
1793 switch (typeIndex) {
1794 case EMPTY_LITERAL_ARRAY_WITH_NUMBER_TYPE:
1795 return "f64[]";
1796 case EMPTY_LITERAL_ARRAY_WITH_BOOLEAN_TYPE:
1797 return "u1[]";
1798 case EMPTY_LITERAL_ARRAY_WITH_STRING_TYPE:
1799 return "panda.String[]";
1800 default:
1801 UNREACHABLE();
1802 break;
1803 }
1804 }
1805 default: {
1806 UNREACHABLE();
1807 break;
1808 }
1809 }
1810 }
1811
SerializeAnnotationElement(const std::vector<pandasm::AnnotationElement> & elements,std::stringstream & ss,uint32_t idx) const1812 void Disassembler::SerializeAnnotationElement(const std::vector<pandasm::AnnotationElement> &elements,
1813 std::stringstream &ss, uint32_t idx) const
1814 {
1815 for (const auto &elem : elements) {
1816 auto type = elem.GetValue()->GetType();
1817 if (type == pandasm::Value::Type::U32) {
1818 ss << "\t"
1819 << "u32"
1820 << " " << elem.GetName() << " { ";
1821 ss << "0x" << std::hex << elem.GetValue()->GetAsScalar()->GetValue<uint32_t>() << " }";
1822 } else if (type == pandasm::Value::Type::F64) {
1823 ss << "\t"
1824 << "f64"
1825 << " " << elem.GetName() << " { ";
1826 ss << elem.GetValue()->GetAsScalar()->GetValue<double>() << " }";
1827 } else if (type == pandasm::Value::Type::U1) {
1828 ss << "\t"
1829 << "u1"
1830 << " " << elem.GetName() << " { ";
1831 ss << elem.GetValue()->GetAsScalar()->GetValue<bool>() << " }";
1832 } else if (type == pandasm::Value::Type::STRING) {
1833 ss << "\t"
1834 << "panda.String"
1835 << " " << elem.GetName() << " { \"";
1836 ss << elem.GetValue()->GetAsScalar()->GetValue<std::string>() << "\" }";
1837 } else if (type == pandasm::Value::Type::LITERALARRAY) {
1838 uint32_t lit_array_fffset = std::stoi(elem.GetValue()->GetAsScalar()->GetValue<std::string>());
1839 pandasm::LiteralArray lit_array;
1840 GetLiteralArrayByOffset(&lit_array, panda_file::File::EntityId(lit_array_fffset));
1841 std::string typeName = getLiteralArrayTypeFromValue(lit_array);
1842 ss << "\t" << typeName << " " << elem.GetName() << " { ";
1843 DumpLiteralArray(lit_array, ss);
1844 ss << " }";
1845 } else {
1846 UNREACHABLE();
1847 }
1848 if (idx > 0) {
1849 ss << "\n";
1850 }
1851 --idx;
1852 }
1853 }
1854
SerializeMethodAnnotation(const pandasm::AnnotationData & ann,std::ostream & os) const1855 void Disassembler::SerializeMethodAnnotation(const pandasm::AnnotationData &ann, std::ostream &os) const
1856 {
1857 os << ann.GetName() << ":\n";
1858 std::stringstream ss;
1859 std::vector<pandasm::AnnotationElement> elements = ann.GetElements();
1860 if (elements.empty()) {
1861 return;
1862 }
1863 uint32_t idx = elements.size() - 1;
1864 SerializeAnnotationElement(elements, ss, idx);
1865 os << ss.str() << "\n";
1866 }
1867
SerializeMethodAnnotations(const pandasm::Function & method,std::ostream & os) const1868 void Disassembler::SerializeMethodAnnotations(const pandasm::Function &method, std::ostream &os) const
1869 {
1870 const auto annotations = method.metadata->GetAnnotations();
1871 if (annotations.empty()) {
1872 return;
1873 }
1874
1875 for (const auto &ann : annotations) {
1876 SerializeMethodAnnotation(ann, os);
1877 }
1878 }
1879
SerializeInstructions(const pandasm::Function & method,std::ostream & os,const std::map<std::string,MethodInfo>::const_iterator & method_info_it,bool print_method_info) const1880 void Disassembler::SerializeInstructions(const pandasm::Function &method, std::ostream &os,
1881 const std::map<std::string, MethodInfo>::const_iterator &method_info_it,
1882 bool print_method_info) const
1883 {
1884 std::string delim = ": ";
1885 size_t width = 0;
1886 if (print_method_info) {
1887 for (const auto &i : method.ins) {
1888 size_t ins_size = i.ToString().size();
1889 if (i.set_label) {
1890 ins_size = ins_size - i.label.size() - delim.length();
1891 }
1892
1893 if (ins_size > width && ins_size < ark::INSTRUCTION_WIDTH_LIMIT) {
1894 width = i.ToString().size();
1895 }
1896 }
1897 }
1898
1899 for (size_t i = 0; i < method.ins.size(); i++) {
1900 std::string ins = method.ins[i].ToString("", true, method.regs_num);
1901 if (method.ins[i].set_label) {
1902 size_t pos = ins.find(delim);
1903 std::string label = ins.substr(0, pos);
1904 ins.erase(0, pos + delim.length());
1905 os << label << ":\n";
1906 }
1907
1908 if (ins != "") {
1909 os << "\t" << std::setw(width) << std::left << ins;
1910 if (print_method_info && i < method_info_it->second.instructions_info.size()) {
1911 os << " # " << method_info_it->second.instructions_info.at(i);
1912 }
1913 os << "\n";
1914 }
1915 }
1916 }
1917
Serialize(const pandasm::Function & method,std::ostream & os,bool print_information) const1918 void Disassembler::Serialize(const pandasm::Function &method, std::ostream &os, bool print_information) const
1919 {
1920 SerializeMethodAnnotations(method, os);
1921 os << ".language " << panda::panda_file::LanguageToString(method.language) << std::endl;
1922 os << ".function " << method.return_type.GetPandasmName() << " " << method.name << "(";
1923
1924 if (method.params.size() > 0) {
1925 os << method.params[0].type.GetPandasmName() << " a0";
1926
1927 for (uint8_t i = 1; i < method.params.size(); i++) {
1928 os << ", " << method.params[i].type.GetPandasmName() << " a" << (size_t)i;
1929 }
1930 }
1931 os << ")";
1932
1933 const std::string signature = pandasm::GetFunctionSignatureFromName(method.name, method.params);
1934
1935 const auto method_iter = prog_ann_.method_annotations.find(signature);
1936 if (method_iter != prog_ann_.method_annotations.end()) {
1937 Serialize(*method.metadata, method_iter->second, os);
1938 } else {
1939 Serialize(*method.metadata, {}, os);
1940 }
1941
1942 auto method_info_it = prog_info_.methods_info.find(signature);
1943 bool print_method_info = print_information && method_info_it != prog_info_.methods_info.end();
1944 if (print_method_info) {
1945 os << " { # " << method_info_it->second.method_info << "\n# CODE:\n";
1946 } else {
1947 os << " {\n";
1948 }
1949 SerializeInstructions(method, os, method_info_it, print_method_info);
1950
1951 if (method.catch_blocks.size() != 0) {
1952 os << "\n";
1953
1954 for (const auto &catch_block : method.catch_blocks) {
1955 Serialize(catch_block, os);
1956
1957 os << "\n";
1958 }
1959 }
1960
1961 if (print_method_info) {
1962 const MethodInfo &method_info = method_info_it->second;
1963 SerializeLineNumberTable(method_info.line_number_table, os);
1964 SerializeColumnNumberTable(method_info.column_number_table, os);
1965 SerializeLocalVariableTable(method_info.local_variable_table, method, os);
1966 }
1967
1968 os << "}\n\n";
1969 }
1970
SerializeStrings(const panda_file::File::EntityId & offset,const std::string & name_value,std::ostream & os) const1971 void Disassembler::SerializeStrings(const panda_file::File::EntityId &offset, const std::string &name_value,
1972 std::ostream &os) const
1973 {
1974 os << "[offset:0x" << std::hex << offset << ", name_value:" << name_value << "]" << std::endl;
1975 }
1976
Serialize(const pandasm::Function::CatchBlock & catch_block,std::ostream & os) const1977 void Disassembler::Serialize(const pandasm::Function::CatchBlock &catch_block, std::ostream &os) const
1978 {
1979 if (catch_block.exception_record == "") {
1980 os << ".catchall ";
1981 } else {
1982 os << ".catch " << catch_block.exception_record << ", ";
1983 }
1984
1985 os << catch_block.try_begin_label << ", " << catch_block.try_end_label << ", " << catch_block.catch_begin_label;
1986
1987 if (catch_block.catch_end_label != "") {
1988 os << ", " << catch_block.catch_end_label;
1989 }
1990 }
1991
Serialize(const pandasm::ItemMetadata & meta,const AnnotationList & ann_list,std::ostream & os) const1992 void Disassembler::Serialize(const pandasm::ItemMetadata &meta, const AnnotationList &ann_list, std::ostream &os) const
1993 {
1994 auto bool_attributes = meta.GetBoolAttributes();
1995 auto attributes = meta.GetAttributes();
1996 if (bool_attributes.empty() && attributes.empty() && ann_list.empty()) {
1997 return;
1998 }
1999
2000 os << " <";
2001
2002 size_t size = bool_attributes.size();
2003 size_t idx = 0;
2004 for (const auto &attr : bool_attributes) {
2005 os << attr;
2006 ++idx;
2007
2008 if (!attributes.empty() || !ann_list.empty() || idx < size) {
2009 os << ", ";
2010 }
2011 }
2012
2013 size = attributes.size();
2014 idx = 0;
2015 for (const auto &[key, values] : attributes) {
2016 for (size_t i = 0; i < values.size(); i++) {
2017 os << key << "=" << values[i];
2018
2019 if (i < values.size() - 1) {
2020 os << ", ";
2021 }
2022 }
2023
2024 ++idx;
2025
2026 if (!ann_list.empty() || idx < size) {
2027 os << ", ";
2028 }
2029 }
2030
2031 size = ann_list.size();
2032 idx = 0;
2033 for (const auto &[key, value] : ann_list) {
2034 os << key << "=" << value;
2035
2036 ++idx;
2037
2038 if (idx < size) {
2039 os << ", ";
2040 }
2041 }
2042
2043 os << ">";
2044 }
2045
SerializeLineNumberTable(const panda_file::LineNumberTable & line_number_table,std::ostream & os) const2046 void Disassembler::SerializeLineNumberTable(const panda_file::LineNumberTable &line_number_table,
2047 std::ostream &os) const
2048 {
2049 if (line_number_table.empty()) {
2050 return;
2051 }
2052
2053 os << "\n# LINE_NUMBER_TABLE:\n";
2054 for (const auto &line_info : line_number_table) {
2055 os << "#\tline " << line_info.line << ": " << line_info.offset << "\n";
2056 }
2057 }
2058
SerializeColumnNumberTable(const panda_file::ColumnNumberTable & column_number_table,std::ostream & os) const2059 void Disassembler::SerializeColumnNumberTable(const panda_file::ColumnNumberTable &column_number_table,
2060 std::ostream &os) const
2061 {
2062 if (column_number_table.empty()) {
2063 return;
2064 }
2065
2066 os << "\n# COLUMN_NUMBER_TABLE:\n";
2067 for (const auto &column_info : column_number_table) {
2068 os << "#\tcolumn " << column_info.column << ": " << column_info.offset << "\n";
2069 }
2070 }
2071
SerializeLocalVariableTable(const panda_file::LocalVariableTable & local_variable_table,const pandasm::Function & method,std::ostream & os) const2072 void Disassembler::SerializeLocalVariableTable(const panda_file::LocalVariableTable &local_variable_table,
2073 const pandasm::Function &method, std::ostream &os) const
2074 {
2075 if (local_variable_table.empty()) {
2076 return;
2077 }
2078
2079 os << "\n# LOCAL_VARIABLE_TABLE:\n";
2080 os << "#\t Start End Register Name Signature\n";
2081 const int START_WIDTH = 5;
2082 const int END_WIDTH = 4;
2083 const int REG_WIDTH = 8;
2084 const int NAME_WIDTH = 14;
2085 for (const auto &variable_info : local_variable_table) {
2086 std::ostringstream reg_stream;
2087 reg_stream << variable_info.reg_number << '(';
2088 if (variable_info.reg_number < 0) {
2089 reg_stream << "acc";
2090 } else {
2091 uint32_t vreg = variable_info.reg_number;
2092 uint32_t first_arg_reg = method.GetTotalRegs();
2093 if (vreg < first_arg_reg) {
2094 reg_stream << 'v' << vreg;
2095 } else {
2096 reg_stream << 'a' << vreg - first_arg_reg;
2097 }
2098 }
2099 reg_stream << ')';
2100
2101 os << "#\t " << std::setw(START_WIDTH) << std::right << variable_info.start_offset << " ";
2102 os << std::setw(END_WIDTH) << std::right << variable_info.end_offset << " ";
2103 os << std::setw(REG_WIDTH) << std::right << reg_stream.str() << " ";
2104 os << std::setw(NAME_WIDTH) << std::right << variable_info.name << " " << variable_info.type;
2105 if (!variable_info.type_signature.empty() && variable_info.type_signature != variable_info.type) {
2106 os << " (" << variable_info.type_signature << ")";
2107 }
2108 os << "\n";
2109 }
2110 }
2111
BytecodeOpcodeToPandasmOpcode(uint8_t o) const2112 pandasm::Opcode Disassembler::BytecodeOpcodeToPandasmOpcode(uint8_t o) const
2113 {
2114 return BytecodeOpcodeToPandasmOpcode(BytecodeInstruction::Opcode(o));
2115 }
2116
IDToString(BytecodeInstruction bc_ins,panda_file::File::EntityId method_id,size_t idx) const2117 std::string Disassembler::IDToString(BytecodeInstruction bc_ins, panda_file::File::EntityId method_id, size_t idx) const
2118 {
2119 std::stringstream name;
2120 const auto offset = file_->ResolveOffsetByIndex(method_id, bc_ins.GetId(idx).AsIndex());
2121 std::string str_data = StringDataToString(file_->GetStringData(offset));
2122 if (bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::METHOD_ID)) {
2123 name << GetMethodSignature(offset);
2124 } else if (bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::STRING_ID)) {
2125 name << '\"';
2126 name << str_data;
2127 name << '\"';
2128 string_offset_to_name_.emplace(offset, str_data);
2129 } else {
2130 ASSERT(bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::LITERALARRAY_ID));
2131 pandasm::LiteralArray lit_array;
2132 GetLiteralArrayByOffset(&lit_array, panda_file::File::EntityId(offset));
2133 name << SerializeLiteralArray(lit_array);
2134 }
2135
2136 return name.str();
2137 }
2138
GetRecordLanguage(panda_file::File::EntityId class_id) const2139 panda::panda_file::SourceLang Disassembler::GetRecordLanguage(panda_file::File::EntityId class_id) const
2140 {
2141 if (file_->IsExternal(class_id)) {
2142 // Keep the same behavior with abc2program
2143 return panda_file::DEFUALT_SOURCE_LANG;
2144 }
2145
2146 panda_file::ClassDataAccessor cda(*file_, class_id);
2147 return cda.GetSourceLang().value_or(panda_file::DEFUALT_SOURCE_LANG);
2148 }
2149
GetMethodLanguage(panda_file::File::EntityId method_id) const2150 panda::panda_file::SourceLang Disassembler::GetMethodLanguage(panda_file::File::EntityId method_id) const
2151 {
2152 if (file_->IsExternal(method_id)) {
2153 // Keep the same behavior with abc2program
2154 return panda_file::DEFUALT_SOURCE_LANG;
2155 }
2156
2157 panda_file::MethodDataAccessor method_accessor(*file_, method_id);
2158 return method_accessor.GetSourceLang().value_or(panda_file::DEFUALT_SOURCE_LANG);
2159 }
2160
translateImmToLabel(pandasm::Ins * pa_ins,LabelTable * label_table,const uint8_t * ins_arr,BytecodeInstruction bc_ins,BytecodeInstruction bc_ins_last,panda_file::File::EntityId code_id)2161 static void translateImmToLabel(pandasm::Ins *pa_ins, LabelTable *label_table, const uint8_t *ins_arr,
2162 BytecodeInstruction bc_ins, BytecodeInstruction bc_ins_last,
2163 panda_file::File::EntityId code_id)
2164 {
2165 const int32_t jmp_offset = std::get<int64_t>(pa_ins->imms.at(0));
2166 const auto bc_ins_dest = bc_ins.JumpTo(jmp_offset);
2167 if (bc_ins_last.GetAddress() > bc_ins_dest.GetAddress()) {
2168 size_t idx = getBytecodeInstructionNumber(BytecodeInstruction(ins_arr), bc_ins_dest);
2169 if (idx != std::numeric_limits<size_t>::max()) {
2170 if (label_table->find(idx) == label_table->end()) {
2171 std::stringstream ss {};
2172 ss << "jump_label_" << label_table->size();
2173 (*label_table)[idx] = ss.str();
2174 }
2175
2176 pa_ins->imms.clear();
2177 pa_ins->ids.push_back(label_table->at(idx));
2178 } else {
2179 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
2180 << "). incorrect instruction at offset: 0x" << (bc_ins.GetAddress() - ins_arr)
2181 << ": invalid jump offset 0x" << jmp_offset
2182 << " - jumping in the middle of another instruction!";
2183 }
2184 } else {
2185 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
2186 << "). incorrect instruction at offset: 0x" << (bc_ins.GetAddress() - ins_arr)
2187 << ": invalid jump offset 0x" << jmp_offset << " - jumping out of bounds!";
2188 }
2189 }
2190
GetInstructions(pandasm::Function * method,panda_file::File::EntityId method_id,panda_file::File::EntityId code_id) const2191 IdList Disassembler::GetInstructions(pandasm::Function *method, panda_file::File::EntityId method_id,
2192 panda_file::File::EntityId code_id) const
2193 {
2194 panda_file::CodeDataAccessor code_accessor(*file_, code_id);
2195
2196 const auto ins_sz = code_accessor.GetCodeSize();
2197 const auto ins_arr = code_accessor.GetInstructions();
2198
2199 method->regs_num = code_accessor.GetNumVregs();
2200
2201 auto bc_ins = BytecodeInstruction(ins_arr);
2202 const auto bc_ins_last = bc_ins.JumpTo(ins_sz);
2203
2204 LabelTable label_table = GetExceptions(method, method_id, code_id);
2205
2206 IdList unknown_external_methods {};
2207
2208 while (bc_ins.GetAddress() != bc_ins_last.GetAddress()) {
2209 if (bc_ins.GetAddress() > bc_ins_last.GetAddress()) {
2210 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
2211 << "). bytecode instructions sequence corrupted for method " << method->name
2212 << "! went out of bounds";
2213
2214 break;
2215 }
2216
2217 auto pa_ins = BytecodeInstructionToPandasmInstruction(bc_ins, method_id);
2218 if (pa_ins.IsJump()) {
2219 translateImmToLabel(&pa_ins, &label_table, ins_arr, bc_ins, bc_ins_last, code_id);
2220 }
2221
2222 // check if method id is unknown external method. if so, emplace it in table
2223 if (bc_ins.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) {
2224 const auto arg_method_idx = bc_ins.GetId().AsIndex();
2225 const auto arg_method_id = file_->ResolveMethodIndex(method_id, arg_method_idx);
2226
2227 const auto arg_method_signature = GetMethodSignature(arg_method_id);
2228
2229 const bool is_present = prog_.function_table.find(arg_method_signature) != prog_.function_table.cend();
2230 const bool is_external = file_->IsExternal(arg_method_id);
2231 if (is_external && !is_present) {
2232 unknown_external_methods.push_back(arg_method_id);
2233 }
2234 }
2235
2236 method->AddInstruction(pa_ins);
2237 bc_ins = bc_ins.GetNext();
2238 }
2239
2240 size_t instruction_count = method->ins.size();
2241 for (const auto &pair : label_table) {
2242 if (pair.first > instruction_count) {
2243 LOG(ERROR, DISASSEMBLER) << "> Wrong label index got, count of instructions is " << instruction_count
2244 << ", but the label index is " << pair.first;
2245 continue;
2246 }
2247
2248 // In some case, the end label can be after the last instruction
2249 // Creating an invalid instruction for the label to make sure it can be serialized
2250 if (pair.first == instruction_count) {
2251 pandasm::Ins ins {};
2252 ins.opcode = pandasm::Opcode::INVALID;
2253 method->AddInstruction(ins);
2254 }
2255
2256 method->ins[pair.first].label = pair.second;
2257 method->ins[pair.first].set_label = true;
2258 }
2259
2260 return unknown_external_methods;
2261 }
2262
GetColumnNumber()2263 std::vector<size_t> Disassembler::GetColumnNumber()
2264 {
2265 std::vector<size_t> columnNumber;
2266 for (const auto &method_info : prog_info_.methods_info) {
2267 for (const auto &column_number : method_info.second.column_number_table) {
2268 columnNumber.push_back(column_number.column);
2269 }
2270 }
2271 return columnNumber;
2272 }
2273
GetLineNumber()2274 std::vector<size_t> Disassembler::GetLineNumber()
2275 {
2276 std::vector<size_t> lineNumber;
2277 for (const auto &method_info : prog_info_.methods_info) {
2278 for (const auto &line_number : method_info.second.line_number_table) {
2279 lineNumber.push_back(line_number.line);
2280 }
2281 }
2282 return lineNumber;
2283 }
2284
2285 } // namespace panda::disasm
2286