1 /*
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "disassembler.h"
17 #include "mangling.h"
18 #include "utils/logger.h"
19 #include "utils/const_value.h"
20
21 #include <iomanip>
22
23 #include "get_language_specific_metadata.inc"
24
25 namespace panda::disasm {
26
Disassemble(const std::string & filename_in,const bool quiet,const bool skip_strings)27 void Disassembler::Disassemble(const std::string &filename_in, const bool quiet, const bool skip_strings)
28 {
29 auto file_new = panda_file::File::Open(filename_in);
30 file_.swap(file_new);
31
32 if (file_ != nullptr) {
33 prog_ = pandasm::Program {};
34
35 record_name_to_id_.clear();
36 method_name_to_id_.clear();
37 string_offset_to_name_.clear();
38 skip_strings_ = skip_strings;
39 quiet_ = quiet;
40
41 prog_info_ = ProgInfo {};
42
43 prog_ann_ = ProgAnnotations {};
44
45 GetRecords();
46 GetLiteralArrays();
47
48 GetLanguageSpecificMetadata();
49 } else {
50 LOG(ERROR, DISASSEMBLER) << "> unable to open specified pandafile: <" << filename_in << ">";
51 }
52 }
53
CollectInfo()54 void Disassembler::CollectInfo()
55 {
56 LOG(DEBUG, DISASSEMBLER) << "\n[getting program info]\n";
57
58 debug_info_extractor_ = std::make_unique<panda_file::DebugInfoExtractor>(file_.get());
59
60 for (const auto &pair : record_name_to_id_) {
61 GetRecordInfo(pair.second, &prog_info_.records_info[pair.first]);
62 }
63
64 for (const auto &pair : method_name_to_id_) {
65 GetMethodInfo(pair.second, &prog_info_.methods_info[pair.first]);
66 }
67 }
68
Serialize(std::ostream & os,bool add_separators,bool print_information) const69 void Disassembler::Serialize(std::ostream &os, bool add_separators, bool print_information) const
70 {
71 if (os.bad()) {
72 LOG(DEBUG, DISASSEMBLER) << "> serialization failed. os bad\n";
73
74 return;
75 }
76
77 if (file_ != nullptr) {
78 std::string abc_file = GetFileNameByPath(file_->GetFilename());
79 os << "# source binary: " << abc_file << "\n\n";
80 }
81
82 SerializeLanguage(os);
83
84 if (add_separators) {
85 os << "# ====================\n"
86 "# LITERALS\n\n";
87 }
88
89 LOG(DEBUG, DISASSEMBLER) << "[serializing literals]";
90
91 for (const auto &[key, lit_arr] : prog_.literalarray_table) {
92 Serialize(key, lit_arr, os);
93 }
94
95 for (const auto &[module_offset, array_table] : modulearray_table_) {
96 Serialize(module_offset, array_table, os);
97 }
98
99 os << "\n";
100
101 if (add_separators) {
102 os << "# ====================\n"
103 "# RECORDS\n\n";
104 }
105
106 LOG(DEBUG, DISASSEMBLER) << "[serializing records]";
107
108 for (const auto &r : prog_.record_table) {
109 Serialize(r.second, os, print_information);
110 }
111
112 if (add_separators) {
113 os << "# ====================\n"
114 "# METHODS\n\n";
115 }
116
117 LOG(DEBUG, DISASSEMBLER) << "[serializing methods]";
118
119 for (const auto &m : prog_.function_table) {
120 Serialize(m.second, os, print_information);
121 }
122
123 if (add_separators) {
124 os << "# ====================\n"
125 "# STRING\n\n";
126 }
127
128 LOG(DEBUG, DISASSEMBLER) << "[serializing strings]";
129
130 for (const auto &[offset, name_value] : string_offset_to_name_) {
131 SerializeStrings(offset, name_value, os);
132 }
133 }
134
IsSystemType(const std::string & type_name)135 inline bool Disassembler::IsSystemType(const std::string &type_name)
136 {
137 bool is_array_type = type_name.find('[') != std::string::npos;
138 bool is_global = type_name == "_GLOBAL";
139
140 return is_array_type || is_global;
141 }
142
GetRecord(pandasm::Record * record,const panda_file::File::EntityId & record_id)143 void Disassembler::GetRecord(pandasm::Record *record, const panda_file::File::EntityId &record_id)
144 {
145 LOG(DEBUG, DISASSEMBLER) << "\n[getting record]\nid: " << record_id << " (0x" << std::hex << record_id << ")";
146
147 if (record == nullptr) {
148 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but record ptr expected!";
149
150 return;
151 }
152
153 record->name = GetFullRecordName(record_id);
154
155 LOG(DEBUG, DISASSEMBLER) << "name: " << record->name;
156
157 GetMetaData(record, record_id);
158
159 if (!file_->IsExternal(record_id)) {
160 GetMethods(record_id);
161 GetFields(record, record_id);
162 }
163 }
164
AddMethodToTables(const panda_file::File::EntityId & method_id)165 void Disassembler::AddMethodToTables(const panda_file::File::EntityId &method_id)
166 {
167 pandasm::Function new_method("", file_language_);
168 GetMethod(&new_method, method_id);
169
170 const auto signature = pandasm::GetFunctionSignatureFromName(new_method.name, new_method.params);
171 if (prog_.function_table.find(signature) != prog_.function_table.end()) {
172 return;
173 }
174
175 GetMethodAnnotations(new_method, method_id);
176 method_name_to_id_.emplace(signature, method_id);
177 prog_.function_synonyms[new_method.name].push_back(signature);
178 prog_.function_table.emplace(signature, std::move(new_method));
179 }
180
GetMethod(pandasm::Function * method,const panda_file::File::EntityId & method_id)181 void Disassembler::GetMethod(pandasm::Function *method, const panda_file::File::EntityId &method_id)
182 {
183 LOG(DEBUG, DISASSEMBLER) << "\n[getting method]\nid: " << method_id << " (0x" << std::hex << method_id << ")";
184
185 if (method == nullptr) {
186 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
187
188 return;
189 }
190
191 panda_file::MethodDataAccessor method_accessor(*file_, method_id);
192
193 method->name = GetFullMethodName(method_id);
194
195 LOG(DEBUG, DISASSEMBLER) << "name: " << method->name;
196
197 GetMetaData(method, method_id);
198
199 if (method_accessor.GetCodeId().has_value()) {
200 auto code_id = method_accessor.GetCodeId().value();
201 GetParams(method, code_id);
202 const IdList id_list = GetInstructions(method, method_id, code_id);
203
204 for (const auto &id : id_list) {
205 AddMethodToTables(id);
206 }
207 } else {
208 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << method_id << " (0x" << std::hex << method_id
209 << "). implementation of method expected, but no \'CODE\' tag was found!";
210
211 return;
212 }
213 }
214
215 template <typename T>
FillLiteralArrayData(pandasm::LiteralArray * lit_array,const panda_file::LiteralTag & tag,const panda_file::LiteralDataAccessor::LiteralValue & value) const216 void Disassembler::FillLiteralArrayData(pandasm::LiteralArray *lit_array, const panda_file::LiteralTag &tag,
217 const panda_file::LiteralDataAccessor::LiteralValue &value) const
218 {
219 panda_file::File::EntityId id(std::get<uint32_t>(value));
220 auto sp = file_->GetSpanFromId(id);
221 auto len = panda_file::helpers::Read<sizeof(uint32_t)>(&sp);
222 if (tag != panda_file::LiteralTag::ARRAY_STRING) {
223 for (size_t i = 0; i < len; i++) {
224 pandasm::LiteralArray::Literal lit;
225 lit.tag_ = tag;
226 lit.value_ = bit_cast<T>(panda_file::helpers::Read<sizeof(T)>(&sp));
227 lit_array->literals_.push_back(lit);
228 }
229 return;
230 }
231 for (size_t i = 0; i < len; i++) {
232 auto str_id = panda_file::helpers::Read<sizeof(T)>(&sp);
233 pandasm::LiteralArray::Literal lit;
234 lit.tag_ = tag;
235 lit.value_ = StringDataToString(file_->GetStringData(panda_file::File::EntityId(str_id)));
236 lit_array->literals_.push_back(lit);
237 }
238 }
239
FillLiteralData(pandasm::LiteralArray * lit_array,const panda_file::LiteralDataAccessor::LiteralValue & value,const panda_file::LiteralTag & tag) const240 void Disassembler::FillLiteralData(pandasm::LiteralArray *lit_array,
241 const panda_file::LiteralDataAccessor::LiteralValue &value,
242 const panda_file::LiteralTag &tag) const
243 {
244 pandasm::LiteralArray::Literal lit;
245 lit.tag_ = tag;
246 switch (tag) {
247 case panda_file::LiteralTag::BOOL: {
248 lit.value_ = std::get<bool>(value);
249 break;
250 }
251 case panda_file::LiteralTag::ACCESSOR:
252 case panda_file::LiteralTag::NULLVALUE:
253 case panda_file::LiteralTag::BUILTINTYPEINDEX: {
254 lit.value_ = std::get<uint8_t>(value);
255 break;
256 }
257 case panda_file::LiteralTag::METHODAFFILIATE: {
258 lit.value_ = std::get<uint16_t>(value);
259 break;
260 }
261 case panda_file::LiteralTag::LITERALBUFFERINDEX:
262 case panda_file::LiteralTag::INTEGER: {
263 lit.value_ = std::get<uint32_t>(value);
264 break;
265 }
266 case panda_file::LiteralTag::DOUBLE: {
267 lit.value_ = std::get<double>(value);
268 break;
269 }
270 case panda_file::LiteralTag::STRING: {
271 auto str_data = file_->GetStringData(panda_file::File::EntityId(std::get<uint32_t>(value)));
272 lit.value_ = StringDataToString(str_data);
273 break;
274 }
275 case panda_file::LiteralTag::METHOD:
276 case panda_file::LiteralTag::GETTER:
277 case panda_file::LiteralTag::SETTER:
278 case panda_file::LiteralTag::GENERATORMETHOD: {
279 panda_file::MethodDataAccessor mda(*file_, panda_file::File::EntityId(std::get<uint32_t>(value)));
280 lit.value_ = StringDataToString(file_->GetStringData(mda.GetNameId()));
281 break;
282 }
283 case panda_file::LiteralTag::LITERALARRAY: {
284 std::stringstream ss;
285 ss << "0x" << std::hex << std::get<uint32_t>(value);
286 lit.value_ = ss.str();
287 break;
288 }
289 case panda_file::LiteralTag::TAGVALUE: {
290 return;
291 }
292 default: {
293 UNREACHABLE();
294 }
295 }
296 lit_array->literals_.push_back(lit);
297 }
298
GetLiteralArrayByOffset(pandasm::LiteralArray * lit_array,panda_file::File::EntityId offset) const299 void Disassembler::GetLiteralArrayByOffset(pandasm::LiteralArray *lit_array, panda_file::File::EntityId offset) const
300 {
301 panda_file::LiteralDataAccessor lit_array_accessor(*file_, file_->GetLiteralArraysId());
302 lit_array_accessor.EnumerateLiteralVals(
303 offset, [this, lit_array](const panda_file::LiteralDataAccessor::LiteralValue &value,
304 const panda_file::LiteralTag &tag) {
305 switch (tag) {
306 case panda_file::LiteralTag::ARRAY_U1: {
307 FillLiteralArrayData<bool>(lit_array, tag, value);
308 break;
309 }
310 case panda_file::LiteralTag::ARRAY_I8:
311 case panda_file::LiteralTag::ARRAY_U8: {
312 FillLiteralArrayData<uint8_t>(lit_array, tag, value);
313 break;
314 }
315 case panda_file::LiteralTag::ARRAY_I16:
316 case panda_file::LiteralTag::ARRAY_U16: {
317 FillLiteralArrayData<uint16_t>(lit_array, tag, value);
318 break;
319 }
320 case panda_file::LiteralTag::ARRAY_I32:
321 case panda_file::LiteralTag::ARRAY_U32: {
322 FillLiteralArrayData<uint32_t>(lit_array, tag, value);
323 break;
324 }
325 case panda_file::LiteralTag::ARRAY_I64:
326 case panda_file::LiteralTag::ARRAY_U64: {
327 FillLiteralArrayData<uint64_t>(lit_array, tag, value);
328 break;
329 }
330 case panda_file::LiteralTag::ARRAY_F32: {
331 FillLiteralArrayData<float>(lit_array, tag, value);
332 break;
333 }
334 case panda_file::LiteralTag::ARRAY_F64: {
335 FillLiteralArrayData<double>(lit_array, tag, value);
336 break;
337 }
338 case panda_file::LiteralTag::ARRAY_STRING: {
339 FillLiteralArrayData<uint32_t>(lit_array, tag, value);
340 break;
341 }
342 default: {
343 FillLiteralData(lit_array, value, tag);
344 break;
345 }
346 }
347 });
348 }
349
GetLiteralArray(pandasm::LiteralArray * lit_array,size_t index) const350 void Disassembler::GetLiteralArray(pandasm::LiteralArray *lit_array, size_t index) const
351 {
352 panda_file::LiteralDataAccessor lit_array_accessor(*file_, file_->GetLiteralArraysId());
353 GetLiteralArrayByOffset(lit_array, lit_array_accessor.GetLiteralArrayId(index));
354 }
355
IsModuleLiteralOffset(const panda_file::File::EntityId & id) const356 bool Disassembler::IsModuleLiteralOffset(const panda_file::File::EntityId &id) const
357 {
358 return module_literals_.find(id.GetOffset()) != module_literals_.end();
359 }
360
GetLiteralArrays()361 void Disassembler::GetLiteralArrays()
362 {
363 const auto lit_arrays_id = file_->GetLiteralArraysId();
364
365 LOG(DEBUG, DISASSEMBLER) << "\n[getting literal arrays]\nid: " << lit_arrays_id << " (0x" << std::hex
366 << lit_arrays_id << ")";
367
368 panda_file::LiteralDataAccessor lda(*file_, lit_arrays_id);
369 size_t num_litarrays = lda.GetLiteralNum();
370 for (size_t index = 0; index < num_litarrays; index++) {
371 auto id = lda.GetLiteralArrayId(index);
372 if (module_request_phase_literals_.count(id.GetOffset())) {
373 continue;
374 }
375 if (IsModuleLiteralOffset(id)) {
376 std::stringstream ss;
377 ss << index << " 0x" << std::hex << id.GetOffset();
378 modulearray_table_.emplace(ss.str(), GetModuleLiteralArray(id));
379 continue;
380 }
381 std::stringstream ss;
382 ss << index << " 0x" << std::hex << id.GetOffset();
383 panda::pandasm::LiteralArray lit_arr;
384 GetLiteralArray(&lit_arr, index);
385 prog_.literalarray_table.emplace(ss.str(), lit_arr);
386 }
387 }
388
ModuleTagToString(panda_file::ModuleTag & tag) const389 std::string Disassembler::ModuleTagToString(panda_file::ModuleTag &tag) const
390 {
391 switch (tag) {
392 case panda_file::ModuleTag::REGULAR_IMPORT:
393 return "REGULAR_IMPORT";
394 case panda_file::ModuleTag::NAMESPACE_IMPORT:
395 return "NAMESPACE_IMPORT";
396 case panda_file::ModuleTag::LOCAL_EXPORT:
397 return "LOCAL_EXPORT";
398 case panda_file::ModuleTag::INDIRECT_EXPORT:
399 return "INDIRECT_EXPORT";
400 case panda_file::ModuleTag::STAR_EXPORT:
401 return "STAR_EXPORT";
402 default: {
403 UNREACHABLE();
404 break;
405 }
406 }
407 return "";
408 }
409
GetModuleLiteralArray(panda_file::File::EntityId & module_id) const410 std::vector<std::string> Disassembler::GetModuleLiteralArray(panda_file::File::EntityId &module_id) const
411 {
412 panda_file::ModuleDataAccessor mda(*file_, module_id);
413 const std::vector<uint32_t> &request_modules_offset = mda.getRequestModules();
414 std::vector<std::string> module_literal_array;
415 std::stringstream module_requests_stringstream;
416 module_requests_stringstream << "\tMODULE_REQUEST_ARRAY: {\n";
417 for (size_t index = 0; index < request_modules_offset.size(); ++index) {
418 module_requests_stringstream << "\t\t" << index <<
419 " : " << GetStringByOffset(request_modules_offset[index]) << ",\n";
420 }
421 module_requests_stringstream << "\t}";
422 module_literal_array.push_back(module_requests_stringstream.str());
423 mda.EnumerateModuleRecord([&](panda_file::ModuleTag tag, uint32_t export_name_offset,
424 uint32_t request_module_idx, uint32_t import_name_offset,
425 uint32_t local_name_offset) {
426 std::stringstream ss;
427 ss << "\tModuleTag: " << ModuleTagToString(tag);
428 if (tag == panda_file::ModuleTag::REGULAR_IMPORT ||
429 tag == panda_file::ModuleTag::NAMESPACE_IMPORT || tag == panda_file::ModuleTag::LOCAL_EXPORT) {
430 if (!IsValidOffset(local_name_offset)) {
431 LOG(FATAL, DISASSEMBLER) << "Get invalid local name offset!" << std::endl;
432 }
433 ss << ", local_name: " << GetStringByOffset(local_name_offset);
434 }
435 if (tag == panda_file::ModuleTag::LOCAL_EXPORT || tag == panda_file::ModuleTag::INDIRECT_EXPORT) {
436 if (!IsValidOffset(export_name_offset)) {
437 LOG(FATAL, DISASSEMBLER) << "Get invalid export name offset!" << std::endl;
438 }
439 ss << ", export_name: " << GetStringByOffset(export_name_offset);
440 }
441 if (tag == panda_file::ModuleTag::REGULAR_IMPORT || tag == panda_file::ModuleTag::INDIRECT_EXPORT) {
442 if (!IsValidOffset(import_name_offset)) {
443 LOG(FATAL, DISASSEMBLER) << "Get invalid import name offset!" << std::endl;
444 }
445 ss << ", import_name: " << GetStringByOffset(import_name_offset);
446 }
447 auto request_module_offset = request_modules_offset[request_module_idx];
448 if (tag != panda_file::ModuleTag::LOCAL_EXPORT) {
449 if (request_module_idx >= request_modules_offset.size() || !IsValidOffset(request_module_offset)) {
450 LOG(FATAL, DISASSEMBLER) << "Get invalid request module offset!" << std::endl;
451 }
452 ss << ", module_request: " << GetStringByOffset(request_module_offset);
453 }
454 module_literal_array.push_back(ss.str());
455 });
456
457 return module_literal_array;
458 }
459
GetRecords()460 void Disassembler::GetRecords()
461 {
462 LOG(DEBUG, DISASSEMBLER) << "\n[getting records]\n";
463
464 const auto class_idx = file_->GetClasses();
465
466 for (size_t i = 0; i < class_idx.size(); i++) {
467 uint32_t class_id = class_idx[i];
468 auto class_off = file_->GetHeader()->class_idx_off + sizeof(uint32_t) * i;
469
470 if (class_id > file_->GetHeader()->file_size) {
471 LOG(ERROR, DISASSEMBLER) << "> error encountered in record at " << class_off << " (0x" << std::hex
472 << class_off << "). binary file corrupted. record offset (0x" << class_id
473 << ") out of bounds (0x" << file_->GetHeader()->file_size << ")!";
474 break;
475 }
476
477 const panda_file::File::EntityId record_id {class_id};
478 auto language = GetRecordLanguage(record_id);
479 if (language != file_language_) {
480 if (file_language_ == panda_file::SourceLang::PANDA_ASSEMBLY) {
481 file_language_ = language;
482 } else if (language != panda_file::SourceLang::PANDA_ASSEMBLY) {
483 LOG(ERROR, DISASSEMBLER) << "> possible error encountered in record at" << class_off << " (0x"
484 << std::hex << class_off << "). record's language ("
485 << panda_file::LanguageToString(language)
486 << ") differs from file's language ("
487 << panda_file::LanguageToString(file_language_) << ")!";
488 }
489 }
490
491 pandasm::Record record("", file_language_);
492 GetRecord(&record, record_id);
493
494 if (prog_.record_table.find(record.name) == prog_.record_table.end()) {
495 record_name_to_id_.emplace(record.name, record_id);
496 prog_.record_table.emplace(record.name, std::move(record));
497 }
498 }
499 }
500
GetFields(pandasm::Record * record,const panda_file::File::EntityId & record_id)501 void Disassembler::GetFields(pandasm::Record *record, const panda_file::File::EntityId &record_id)
502 {
503 panda_file::ClassDataAccessor class_accessor {*file_, record_id};
504
505 class_accessor.EnumerateFields([&](panda_file::FieldDataAccessor &field_accessor) -> void {
506 pandasm::Field field(file_language_);
507
508 panda_file::File::EntityId field_name_id = field_accessor.GetNameId();
509 field.name = StringDataToString(file_->GetStringData(field_name_id));
510
511 uint32_t field_type = field_accessor.GetType();
512 field.type = FieldTypeToPandasmType(field_type);
513
514 GetMetaData(&field, field_accessor.GetFieldId(), record->name == ark::SCOPE_NAME_RECORD);
515
516 record->field_list.push_back(std::move(field));
517 });
518 }
519
GetMethods(const panda_file::File::EntityId & record_id)520 void Disassembler::GetMethods(const panda_file::File::EntityId &record_id)
521 {
522 panda_file::ClassDataAccessor class_accessor {*file_, record_id};
523
524 class_accessor.EnumerateMethods([&](panda_file::MethodDataAccessor &method_accessor) -> void {
525 AddMethodToTables(method_accessor.GetMethodId());
526 });
527 }
528
GetMethodAnnotations(pandasm::Function & method,const panda_file::File::EntityId & method_id)529 void Disassembler::GetMethodAnnotations(pandasm::Function &method, const panda_file::File::EntityId &method_id)
530 {
531 static const std::string MODULE_REQUEST_ANN_NAME = "L_ESConcurrentModuleRequestsAnnotation";
532 static const std::string SLOT_NUMBER_ANN_NAME = "L_ESSlotNumberAnnotation";
533
534 panda_file::MethodDataAccessor mda(*file_, method_id);
535 mda.EnumerateAnnotations([&](panda_file::File::EntityId annotation_id) {
536 panda_file::AnnotationDataAccessor ada(*file_, annotation_id);
537 auto *annotation_name = reinterpret_cast<const char *>(file_->GetStringData(ada.GetClassId()).data);
538 if (std::strcmp("L_ESConcurrentModuleRequestsAnnotation;", annotation_name) == 0) {
539 CreateAnnotationElement(ada, method, MODULE_REQUEST_ANN_NAME,
540 "ConcurrentModuleRequest", "concurrentModuleRequestIdx");
541 } else if (std::strcmp("L_ESSlotNumberAnnotation;", annotation_name) == 0) {
542 CreateAnnotationElement(ada, method, SLOT_NUMBER_ANN_NAME, "SlotNumber", "slotNumberIdx");
543 }
544 });
545 }
546
CreateAnnotationElement(panda_file::AnnotationDataAccessor & ada,pandasm::Function & method,const std::string & ann_name,const std::string & ann_elem_name,const std::string & ann_elem_index)547 void Disassembler::CreateAnnotationElement(panda_file::AnnotationDataAccessor &ada, pandasm::Function &method,
548 const std::string &ann_name, const std::string &ann_elem_name,
549 const std::string &ann_elem_index)
550 {
551 if (ann_elem_name.empty() || ann_elem_index.empty()) {
552 return;
553 }
554
555 uint32_t elem_count = ada.GetCount();
556 for (uint32_t i = 0; i < elem_count; i++) {
557 panda_file::AnnotationDataAccessor::Elem adae = ada.GetElement(i);
558 auto *elem_name = reinterpret_cast<const char *>(file_->GetStringData(adae.GetNameId()).data);
559 if (ann_elem_name == elem_name) {
560 uint32_t ann_elem_value = adae.GetScalarValue().GetValue();
561 AddAnnotationElement(method, ann_name, ann_elem_index, ann_elem_value);
562 }
563 }
564 }
565
AddAnnotationElement(pandasm::Function & method,const std::string & annotation_name,const std::string & key,const uint32_t & value)566 void Disassembler::AddAnnotationElement(pandasm::Function &method, const std::string &annotation_name,
567 const std::string &key, const uint32_t &value)
568 {
569 if (annotation_name.empty() || key.empty()) {
570 return;
571 }
572
573 std::vector<pandasm::AnnotationData> method_annotation = method.metadata->GetAnnotations();
574 const auto ann_iter = std::find_if(method_annotation.begin(), method_annotation.end(),
575 [&](pandasm::AnnotationData &ann) -> bool {
576 return ann.GetName() == annotation_name;
577 });
578
579 pandasm::AnnotationElement annotation_element(key,
580 std::make_unique<pandasm::ScalarValue>(pandasm::ScalarValue::Create<pandasm::Value::Type::U32>(value)));
581 const bool is_annotation = ann_iter != method_annotation.end();
582 if (is_annotation) {
583 ann_iter->AddElement(std::move(annotation_element));
584 method.metadata->SetAnnotations(std::move(method_annotation));
585 } else {
586 std::vector<pandasm::AnnotationElement> elements;
587 pandasm::AnnotationData ann_data(annotation_name, elements);
588 ann_data.AddElement(std::move(annotation_element));
589 std::vector<pandasm::AnnotationData> annotations;
590 annotations.push_back(std::move(ann_data));
591 method.metadata->AddAnnotations(annotations);
592 }
593 }
594
GetAnnotationByMethodName(const std::string & method_name) const595 std::optional<std::vector<std::string>> Disassembler::GetAnnotationByMethodName(const std::string &method_name) const
596 {
597 const auto method_synonyms_iter = prog_.function_synonyms.find(method_name);
598 bool is_signature = method_synonyms_iter != prog_.function_synonyms.end();
599 if (!is_signature) {
600 return std::nullopt;
601 }
602
603 const auto method_iter = prog_.function_table.find(method_synonyms_iter->second.back());
604 bool is_method = method_iter != prog_.function_table.end();
605 const auto annotations = method_iter->second.metadata->GetAnnotations();
606 if (!is_method || annotations.empty()) {
607 return std::nullopt;
608 }
609
610 std::vector<std::string> ann;
611 for (const auto &ann_data : annotations) {
612 ann.emplace_back(ann_data.GetName());
613 }
614 return ann;
615 }
616
GetStrings() const617 std::vector<std::string> Disassembler::GetStrings() const
618 {
619 std::vector<std::string> strings;
620 for (auto &str_info : string_offset_to_name_) {
621 strings.emplace_back(str_info.second);
622 }
623
624 return strings;
625 }
626
GetModuleLiterals() const627 std::vector<std::string> Disassembler::GetModuleLiterals() const
628 {
629 std::vector<std::string> module_literals;
630 for (auto &module_array : modulearray_table_) {
631 for (auto &module : module_array.second) {
632 module_literals.emplace_back(module);
633 }
634 }
635
636 return module_literals;
637 }
638
GetParams(pandasm::Function * method,const panda_file::File::EntityId & code_id) const639 void Disassembler::GetParams(pandasm::Function *method, const panda_file::File::EntityId &code_id) const
640 {
641 /**
642 * frame size - 2^16 - 1
643 */
644 static const uint32_t MAX_ARG_NUM = 0xFFFF;
645
646 LOG(DEBUG, DISASSEMBLER) << "[getting params number]\ncode id: " << code_id << " (0x" << std::hex << code_id << ")";
647
648 if (method == nullptr) {
649 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
650
651 return;
652 }
653
654 panda_file::CodeDataAccessor code_accessor(*file_, code_id);
655
656 auto params_num = code_accessor.GetNumArgs();
657 if (params_num > MAX_ARG_NUM) {
658 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
659 << "). number of function's arguments (" << std::dec << params_num
660 << ") exceeds MAX_ARG_NUM (" << MAX_ARG_NUM << ") !";
661
662 return;
663 }
664
665 method->return_type = pandasm::Type("any", 0);
666
667 for (uint8_t i = 0; i < params_num; i++) {
668 method->params.push_back(pandasm::Function::Parameter(pandasm::Type("any", 0), file_language_));
669 }
670 }
671
GetExceptions(pandasm::Function * method,panda_file::File::EntityId method_id,panda_file::File::EntityId code_id) const672 LabelTable Disassembler::GetExceptions(pandasm::Function *method, panda_file::File::EntityId method_id,
673 panda_file::File::EntityId code_id) const
674 {
675 LOG(DEBUG, DISASSEMBLER) << "[getting exceptions]\ncode id: " << code_id << " (0x" << std::hex << code_id << ")";
676
677 if (method == nullptr) {
678 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!\n";
679 return LabelTable {};
680 }
681
682 panda_file::CodeDataAccessor code_accessor(*file_, code_id);
683
684 const auto bc_ins = BytecodeInstruction(code_accessor.GetInstructions());
685 const auto bc_ins_last = bc_ins.JumpTo(code_accessor.GetCodeSize());
686
687 size_t try_idx = 0;
688 LabelTable label_table {};
689 code_accessor.EnumerateTryBlocks([&](panda_file::CodeDataAccessor::TryBlock &try_block) {
690 pandasm::Function::CatchBlock catch_block_pa {};
691 if (!LocateTryBlock(bc_ins, bc_ins_last, try_block, &catch_block_pa, &label_table, try_idx)) {
692 return false;
693 }
694 size_t catch_idx = 0;
695 try_block.EnumerateCatchBlocks([&](panda_file::CodeDataAccessor::CatchBlock &catch_block) {
696 auto class_idx = catch_block.GetTypeIdx();
697 if (class_idx == panda_file::INVALID_INDEX) {
698 catch_block_pa.exception_record = "";
699 } else {
700 const auto class_id = file_->ResolveClassIndex(method_id, class_idx);
701 catch_block_pa.exception_record = GetFullRecordName(class_id);
702 }
703 if (!LocateCatchBlock(bc_ins, bc_ins_last, catch_block, &catch_block_pa, &label_table, try_idx,
704 catch_idx)) {
705 return false;
706 }
707
708 method->catch_blocks.push_back(catch_block_pa);
709 catch_block_pa.catch_begin_label = "";
710 catch_block_pa.catch_end_label = "";
711 catch_idx++;
712
713 return true;
714 });
715 try_idx++;
716
717 return true;
718 });
719
720 return label_table;
721 }
722
getBytecodeInstructionNumber(BytecodeInstruction bc_ins_first,BytecodeInstruction bc_ins_cur)723 static size_t getBytecodeInstructionNumber(BytecodeInstruction bc_ins_first, BytecodeInstruction bc_ins_cur)
724 {
725 size_t count = 0;
726
727 while (bc_ins_first.GetAddress() != bc_ins_cur.GetAddress()) {
728 count++;
729 bc_ins_first = bc_ins_first.GetNext();
730 if (bc_ins_first.GetAddress() > bc_ins_cur.GetAddress()) {
731 return std::numeric_limits<size_t>::max();
732 }
733 }
734
735 return count;
736 }
737
LocateTryBlock(const BytecodeInstruction & bc_ins,const BytecodeInstruction & bc_ins_last,const panda_file::CodeDataAccessor::TryBlock & try_block,pandasm::Function::CatchBlock * catch_block_pa,LabelTable * label_table,size_t try_idx) const738 bool Disassembler::LocateTryBlock(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last,
739 const panda_file::CodeDataAccessor::TryBlock &try_block,
740 pandasm::Function::CatchBlock *catch_block_pa, LabelTable *label_table,
741 size_t try_idx) const
742 {
743 const auto try_begin_bc_ins = bc_ins.JumpTo(try_block.GetStartPc());
744 const auto try_end_bc_ins = bc_ins.JumpTo(try_block.GetStartPc() + try_block.GetLength());
745
746 const size_t try_begin_idx = getBytecodeInstructionNumber(bc_ins, try_begin_bc_ins);
747 const size_t try_end_idx = getBytecodeInstructionNumber(bc_ins, try_end_bc_ins);
748
749 const bool try_begin_offset_in_range = bc_ins_last.GetAddress() > try_begin_bc_ins.GetAddress();
750 const bool try_end_offset_in_range = bc_ins_last.GetAddress() >= try_end_bc_ins.GetAddress();
751 const bool try_begin_offset_valid = try_begin_idx != std::numeric_limits<size_t>::max();
752 const bool try_end_offset_valid = try_end_idx != std::numeric_limits<size_t>::max();
753
754 if (!try_begin_offset_in_range || !try_begin_offset_valid) {
755 LOG(ERROR, DISASSEMBLER) << "> invalid try block begin offset! address is: 0x" << std::hex
756 << try_begin_bc_ins.GetAddress();
757 return false;
758 } else {
759 std::stringstream ss {};
760 ss << "try_begin_label_" << try_idx;
761
762 LabelTable::iterator it = label_table->find(try_begin_idx);
763 if (it == label_table->end()) {
764 catch_block_pa->try_begin_label = ss.str();
765 label_table->insert(std::pair<size_t, std::string>(try_begin_idx, ss.str()));
766 } else {
767 catch_block_pa->try_begin_label = it->second;
768 }
769 }
770
771 if (!try_end_offset_in_range || !try_end_offset_valid) {
772 LOG(ERROR, DISASSEMBLER) << "> invalid try block end offset! address is: 0x" << std::hex
773 << try_end_bc_ins.GetAddress();
774 return false;
775 } else {
776 std::stringstream ss {};
777 ss << "try_end_label_" << try_idx;
778
779 LabelTable::iterator it = label_table->find(try_end_idx);
780 if (it == label_table->end()) {
781 catch_block_pa->try_end_label = ss.str();
782 label_table->insert(std::pair<size_t, std::string>(try_end_idx, ss.str()));
783 } else {
784 catch_block_pa->try_end_label = it->second;
785 }
786 }
787
788 return true;
789 }
790
LocateCatchBlock(const BytecodeInstruction & bc_ins,const BytecodeInstruction & bc_ins_last,const panda_file::CodeDataAccessor::CatchBlock & catch_block,pandasm::Function::CatchBlock * catch_block_pa,LabelTable * label_table,size_t try_idx,size_t catch_idx) const791 bool Disassembler::LocateCatchBlock(const BytecodeInstruction &bc_ins, const BytecodeInstruction &bc_ins_last,
792 const panda_file::CodeDataAccessor::CatchBlock &catch_block,
793 pandasm::Function::CatchBlock *catch_block_pa, LabelTable *label_table,
794 size_t try_idx, size_t catch_idx) const
795 {
796 const auto handler_begin_offset = catch_block.GetHandlerPc();
797 const auto handler_end_offset = handler_begin_offset + catch_block.GetCodeSize();
798
799 const auto handler_begin_bc_ins = bc_ins.JumpTo(handler_begin_offset);
800 const auto handler_end_bc_ins = bc_ins.JumpTo(handler_end_offset);
801
802 const size_t handler_begin_idx = getBytecodeInstructionNumber(bc_ins, handler_begin_bc_ins);
803 const size_t handler_end_idx = getBytecodeInstructionNumber(bc_ins, handler_end_bc_ins);
804
805 const bool handler_begin_offset_in_range = bc_ins_last.GetAddress() > handler_begin_bc_ins.GetAddress();
806 const bool handler_end_offset_in_range = bc_ins_last.GetAddress() >= handler_end_bc_ins.GetAddress();
807 const bool handler_end_present = catch_block.GetCodeSize() != 0;
808 const bool handler_begin_offset_valid = handler_begin_idx != std::numeric_limits<size_t>::max();
809 const bool handler_end_offset_valid = handler_end_idx != std::numeric_limits<size_t>::max();
810
811 if (!handler_begin_offset_in_range || !handler_begin_offset_valid) {
812 LOG(ERROR, DISASSEMBLER) << "> invalid catch block begin offset! address is: 0x" << std::hex
813 << handler_begin_bc_ins.GetAddress();
814 return false;
815 } else {
816 std::stringstream ss {};
817 ss << "handler_begin_label_" << try_idx << "_" << catch_idx;
818
819 LabelTable::iterator it = label_table->find(handler_begin_idx);
820 if (it == label_table->end()) {
821 catch_block_pa->catch_begin_label = ss.str();
822 label_table->insert(std::pair<size_t, std::string>(handler_begin_idx, ss.str()));
823 } else {
824 catch_block_pa->catch_begin_label = it->second;
825 }
826 }
827
828 if (!handler_end_offset_in_range || !handler_end_offset_valid) {
829 LOG(ERROR, DISASSEMBLER) << "> invalid catch block end offset! address is: 0x" << std::hex
830 << handler_end_bc_ins.GetAddress();
831 return false;
832 } else if (handler_end_present) {
833 std::stringstream ss {};
834 ss << "handler_end_label_" << try_idx << "_" << catch_idx;
835
836 LabelTable::iterator it = label_table->find(handler_end_idx);
837 if (it == label_table->end()) {
838 catch_block_pa->catch_end_label = ss.str();
839 label_table->insert(std::pair<size_t, std::string>(handler_end_idx, ss.str()));
840 } else {
841 catch_block_pa->catch_end_label = it->second;
842 }
843 }
844
845 return true;
846 }
847
GetMetaData(pandasm::Function * method,const panda_file::File::EntityId & method_id) const848 void Disassembler::GetMetaData(pandasm::Function *method, const panda_file::File::EntityId &method_id) const
849 {
850 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nmethod id: " << method_id << " (0x" << std::hex << method_id
851 << ")";
852
853 if (method == nullptr) {
854 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
855
856 return;
857 }
858
859 panda_file::MethodDataAccessor method_accessor(*file_, method_id);
860
861 const auto method_name_raw = StringDataToString(file_->GetStringData(method_accessor.GetNameId()));
862
863 if (!method_accessor.IsStatic()) {
864 const auto class_name = StringDataToString(file_->GetStringData(method_accessor.GetClassId()));
865 auto this_type = pandasm::Type::FromDescriptor(class_name);
866
867 LOG(DEBUG, DISASSEMBLER) << "method (raw: \'" << method_name_raw
868 << "\') is not static. emplacing self-argument of type " << this_type.GetName();
869
870 method->params.insert(method->params.begin(), pandasm::Function::Parameter(this_type, file_language_));
871 } else {
872 method->metadata->SetAttribute("static");
873 }
874
875 if (file_->IsExternal(method_accessor.GetMethodId())) {
876 method->metadata->SetAttribute("external");
877 }
878
879 std::string ctor_name = panda::panda_file::GetCtorName(file_language_);
880 std::string cctor_name = panda::panda_file::GetCctorName(file_language_);
881
882 const bool is_ctor = (method_name_raw == ctor_name);
883 const bool is_cctor = (method_name_raw == cctor_name);
884
885 if (is_ctor) {
886 method->metadata->SetAttribute("ctor");
887 method->name.replace(method->name.find(ctor_name), ctor_name.length(), "_ctor_");
888 } else if (is_cctor) {
889 method->metadata->SetAttribute("cctor");
890 method->name.replace(method->name.find(cctor_name), cctor_name.length(), "_cctor_");
891 }
892 }
893
GetMetaData(pandasm::Record * record,const panda_file::File::EntityId & record_id) const894 void Disassembler::GetMetaData(pandasm::Record *record, const panda_file::File::EntityId &record_id) const
895 {
896 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nrecord id: " << record_id << " (0x" << std::hex << record_id
897 << ")";
898
899 if (record == nullptr) {
900 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but record ptr expected!";
901
902 return;
903 }
904
905 if (file_->IsExternal(record_id)) {
906 record->metadata->SetAttribute("external");
907 }
908 }
909
GetMetaData(pandasm::Field * field,const panda_file::File::EntityId & field_id,bool is_scope_names_record)910 void Disassembler::GetMetaData(pandasm::Field *field,
911 const panda_file::File::EntityId &field_id,
912 bool is_scope_names_record)
913 {
914 LOG(DEBUG, DISASSEMBLER) << "[getting metadata]\nfield id: " << field_id << " (0x" << std::hex << field_id << ")";
915
916 if (field == nullptr) {
917 LOG(ERROR, DISASSEMBLER) << "> nullptr recieved, but method ptr expected!";
918
919 return;
920 }
921
922 panda_file::FieldDataAccessor field_accessor(*file_, field_id);
923
924 if (field_accessor.IsExternal()) {
925 field->metadata->SetAttribute("external");
926 }
927
928 if (field_accessor.IsStatic()) {
929 field->metadata->SetAttribute("static");
930 }
931
932 if (field->type.GetId() == panda_file::Type::TypeId::U32) {
933 const auto offset = field_accessor.GetValue<uint32_t>().value();
934 bool is_scope_name_field = is_scope_names_record || field->name == ark::SCOPE_NAMES;
935 if (field->name == ark::MODULE_REQUEST_PAHSE_IDX) {
936 module_request_phase_literals_.insert(offset);
937 } else if (field->name != ark::TYPE_SUMMARY_FIELD_NAME && !is_scope_name_field) {
938 LOG(DEBUG, DISASSEMBLER) << "Module literalarray " << field->name << " at offset 0x" << std::hex << offset
939 << " is excluded";
940 module_literals_.insert(offset);
941 }
942 field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::U32>(offset));
943 }
944 if (field->type.GetId() == panda_file::Type::TypeId::U8) {
945 const auto val = field_accessor.GetValue<uint8_t>().value();
946 field->metadata->SetValue(pandasm::ScalarValue::Create<pandasm::Value::Type::U8>(val));
947 }
948 }
949
AnnotationTagToString(const char tag) const950 std::string Disassembler::AnnotationTagToString(const char tag) const
951 {
952 switch (tag) {
953 case '1':
954 return "u1";
955 case '2':
956 return "i8";
957 case '3':
958 return "u8";
959 case '4':
960 return "i16";
961 case '5':
962 return "u16";
963 case '6':
964 return "i32";
965 case '7':
966 return "u32";
967 case '8':
968 return "i64";
969 case '9':
970 return "u64";
971 case 'A':
972 return "f32";
973 case 'B':
974 return "f64";
975 case 'C':
976 return "string";
977 case 'D':
978 return "record";
979 case 'E':
980 return "method";
981 case 'F':
982 return "enum";
983 case 'G':
984 return "annotation";
985 case 'I':
986 return "void";
987 case 'J':
988 return "method_handle";
989 case 'K':
990 return "u1[]";
991 case 'L':
992 return "i8[]";
993 case 'M':
994 return "u8[]";
995 case 'N':
996 return "i16[]";
997 case 'O':
998 return "u16[]";
999 case 'P':
1000 return "i32[]";
1001 case 'Q':
1002 return "u32[]";
1003 case 'R':
1004 return "i64[]";
1005 case 'S':
1006 return "u64[]";
1007 case 'T':
1008 return "f32[]";
1009 case 'U':
1010 return "f64[]";
1011 case 'V':
1012 return "string[]";
1013 case 'W':
1014 return "record[]";
1015 case 'X':
1016 return "method[]";
1017 case 'Y':
1018 return "enum[]";
1019 case 'Z':
1020 return "annotation[]";
1021 case '@':
1022 return "method_handle[]";
1023 case '*':
1024 return "nullptr string";
1025 default:
1026 return std::string();
1027 }
1028 }
1029
ScalarValueToString(const panda_file::ScalarValue & value,const std::string & type)1030 std::string Disassembler::ScalarValueToString(const panda_file::ScalarValue &value, const std::string &type)
1031 {
1032 std::stringstream ss;
1033
1034 if (type == "i8") {
1035 int8_t res = value.Get<int8_t>();
1036 ss << static_cast<int>(res);
1037 } else if (type == "u1" || type == "u8") {
1038 uint8_t res = value.Get<uint8_t>();
1039 ss << static_cast<unsigned int>(res);
1040 } else if (type == "i16") {
1041 ss << value.Get<int16_t>();
1042 } else if (type == "u16") {
1043 ss << value.Get<uint16_t>();
1044 } else if (type == "i32") {
1045 ss << value.Get<int32_t>();
1046 } else if (type == "u32") {
1047 ss << value.Get<uint32_t>();
1048 } else if (type == "i64") {
1049 ss << value.Get<int64_t>();
1050 } else if (type == "u64") {
1051 ss << value.Get<uint64_t>();
1052 } else if (type == "f32") {
1053 ss << value.Get<float>();
1054 } else if (type == "f64") {
1055 ss << value.Get<double>();
1056 } else if (type == "string") {
1057 const auto id = value.Get<panda_file::File::EntityId>();
1058 ss << "\"" << StringDataToString(file_->GetStringData(id)) << "\"";
1059 } else if (type == "record") {
1060 const auto id = value.Get<panda_file::File::EntityId>();
1061 ss << GetFullRecordName(id);
1062 } else if (type == "method") {
1063 const auto id = value.Get<panda_file::File::EntityId>();
1064 AddMethodToTables(id);
1065 ss << GetMethodSignature(id);
1066 } else if (type == "enum") {
1067 const auto id = value.Get<panda_file::File::EntityId>();
1068 panda_file::FieldDataAccessor field_accessor(*file_, id);
1069 ss << GetFullRecordName(field_accessor.GetClassId()) << "."
1070 << StringDataToString(file_->GetStringData(field_accessor.GetNameId()));
1071 } else if (type == "annotation") {
1072 const auto id = value.Get<panda_file::File::EntityId>();
1073 ss << "id_" << id;
1074 } else if (type == "void") {
1075 return std::string();
1076 } else if (type == "method_handle") {
1077 }
1078
1079 return ss.str();
1080 }
1081
ArrayValueToString(const panda_file::ArrayValue & value,const std::string & type,const size_t idx)1082 std::string Disassembler::ArrayValueToString(const panda_file::ArrayValue &value, const std::string &type,
1083 const size_t idx)
1084 {
1085 std::stringstream ss;
1086
1087 if (type == "i8") {
1088 int8_t res = value.Get<int8_t>(idx);
1089 ss << static_cast<int>(res);
1090 } else if (type == "u1" || type == "u8") {
1091 uint8_t res = value.Get<uint8_t>(idx);
1092 ss << static_cast<unsigned int>(res);
1093 } else if (type == "i16") {
1094 ss << value.Get<int16_t>(idx);
1095 } else if (type == "u16") {
1096 ss << value.Get<uint16_t>(idx);
1097 } else if (type == "i32") {
1098 ss << value.Get<int32_t>(idx);
1099 } else if (type == "u32") {
1100 ss << value.Get<uint32_t>(idx);
1101 } else if (type == "i64") {
1102 ss << value.Get<int64_t>(idx);
1103 } else if (type == "u64") {
1104 ss << value.Get<uint64_t>(idx);
1105 } else if (type == "f32") {
1106 ss << value.Get<float>(idx);
1107 } else if (type == "f64") {
1108 ss << value.Get<double>(idx);
1109 } else if (type == "string") {
1110 const auto id = value.Get<panda_file::File::EntityId>(idx);
1111 ss << '\"' << StringDataToString(file_->GetStringData(id)) << '\"';
1112 } else if (type == "record") {
1113 const auto id = value.Get<panda_file::File::EntityId>(idx);
1114 ss << GetFullRecordName(id);
1115 } else if (type == "method") {
1116 const auto id = value.Get<panda_file::File::EntityId>(idx);
1117 AddMethodToTables(id);
1118 ss << GetMethodSignature(id);
1119 } else if (type == "enum") {
1120 const auto id = value.Get<panda_file::File::EntityId>(idx);
1121 panda_file::FieldDataAccessor field_accessor(*file_, id);
1122 ss << GetFullRecordName(field_accessor.GetClassId()) << "."
1123 << StringDataToString(file_->GetStringData(field_accessor.GetNameId()));
1124 } else if (type == "annotation") {
1125 const auto id = value.Get<panda_file::File::EntityId>(idx);
1126 ss << "id_" << id;
1127 } else if (type == "method_handle") {
1128 } else if (type == "nullptr string") {
1129 }
1130
1131 return ss.str();
1132 }
1133
GetFullMethodName(const panda_file::File::EntityId & method_id) const1134 std::string Disassembler::GetFullMethodName(const panda_file::File::EntityId &method_id) const
1135 {
1136 panda::panda_file::MethodDataAccessor method_accessor(*file_, method_id);
1137
1138 const auto method_name_raw = StringDataToString(file_->GetStringData(method_accessor.GetNameId()));
1139
1140 std::string class_name = GetFullRecordName(method_accessor.GetClassId());
1141 if (IsSystemType(class_name)) {
1142 class_name = "";
1143 } else {
1144 class_name += ".";
1145 }
1146
1147 return class_name + method_name_raw;
1148 }
1149
GetMethodSignature(const panda_file::File::EntityId & method_id) const1150 std::string Disassembler::GetMethodSignature(const panda_file::File::EntityId &method_id) const
1151 {
1152 panda::panda_file::MethodDataAccessor method_accessor(*file_, method_id);
1153
1154 pandasm::Function method(GetFullMethodName(method_id), file_language_);
1155 if (method_accessor.GetCodeId().has_value()) {
1156 GetParams(&method, method_accessor.GetCodeId().value());
1157 }
1158 GetMetaData(&method, method_id);
1159
1160 return pandasm::GetFunctionSignatureFromName(method.name, method.params);
1161 }
1162
GetFullRecordName(const panda_file::File::EntityId & class_id) const1163 std::string Disassembler::GetFullRecordName(const panda_file::File::EntityId &class_id) const
1164 {
1165 std::string name = StringDataToString(file_->GetStringData(class_id));
1166
1167 auto type = pandasm::Type::FromDescriptor(name);
1168 type = pandasm::Type(type.GetComponentName(), type.GetRank());
1169
1170 return type.GetPandasmName();
1171 }
1172
GetRecordInfo(const panda_file::File::EntityId & record_id,RecordInfo * record_info) const1173 void Disassembler::GetRecordInfo(const panda_file::File::EntityId &record_id, RecordInfo *record_info) const
1174 {
1175 constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
1176
1177 if (file_->IsExternal(record_id)) {
1178 return;
1179 }
1180
1181 panda_file::ClassDataAccessor class_accessor {*file_, record_id};
1182 std::stringstream ss;
1183
1184 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1185 << class_accessor.GetClassId() << ", size: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH)
1186 << class_accessor.GetSize() << " (" << std::dec << class_accessor.GetSize() << ")";
1187
1188 record_info->record_info = ss.str();
1189 ss.str(std::string());
1190
1191 class_accessor.EnumerateFields([&](panda_file::FieldDataAccessor &field_accessor) -> void {
1192 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1193 << field_accessor.GetFieldId();
1194
1195 record_info->fields_info.push_back(ss.str());
1196
1197 ss.str(std::string());
1198 });
1199 }
1200
GetMethodInfo(const panda_file::File::EntityId & method_id,MethodInfo * method_info) const1201 void Disassembler::GetMethodInfo(const panda_file::File::EntityId &method_id, MethodInfo *method_info) const
1202 {
1203 constexpr size_t DEFAULT_OFFSET_WIDTH = 4;
1204
1205 panda_file::MethodDataAccessor method_accessor {*file_, method_id};
1206 std::stringstream ss;
1207
1208 ss << "offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1209 << method_accessor.GetMethodId();
1210
1211 if (method_accessor.GetCodeId().has_value()) {
1212 ss << ", code offset: 0x" << std::setfill('0') << std::setw(DEFAULT_OFFSET_WIDTH) << std::hex
1213 << method_accessor.GetCodeId().value();
1214
1215 GetInsInfo(method_accessor.GetCodeId().value(), method_info);
1216 } else {
1217 ss << ", <no code>";
1218 }
1219
1220 method_info->method_info = ss.str();
1221
1222 if (method_accessor.GetCodeId()) {
1223 ASSERT(debug_info_extractor_ != nullptr);
1224 method_info->line_number_table = debug_info_extractor_->GetLineNumberTable(method_id);
1225 method_info->column_number_table = debug_info_extractor_->GetColumnNumberTable(method_id);
1226 method_info->local_variable_table = debug_info_extractor_->GetLocalVariableTable(method_id);
1227
1228 // Add information about parameters into the table
1229 panda_file::CodeDataAccessor codeda(*file_, method_accessor.GetCodeId().value());
1230 auto arg_idx = static_cast<int32_t>(codeda.GetNumVregs());
1231 uint32_t code_size = codeda.GetCodeSize();
1232 for (auto info : debug_info_extractor_->GetParameterInfo(method_id)) {
1233 panda_file::LocalVariableInfo arg_info {info.name, info.signature, "", arg_idx++, 0, code_size};
1234 method_info->local_variable_table.emplace_back(arg_info);
1235 }
1236 }
1237 }
1238
IsArray(const panda_file::LiteralTag & tag)1239 static bool IsArray(const panda_file::LiteralTag &tag)
1240 {
1241 switch (tag) {
1242 case panda_file::LiteralTag::ARRAY_U1:
1243 case panda_file::LiteralTag::ARRAY_U8:
1244 case panda_file::LiteralTag::ARRAY_I8:
1245 case panda_file::LiteralTag::ARRAY_U16:
1246 case panda_file::LiteralTag::ARRAY_I16:
1247 case panda_file::LiteralTag::ARRAY_U32:
1248 case panda_file::LiteralTag::ARRAY_I32:
1249 case panda_file::LiteralTag::ARRAY_U64:
1250 case panda_file::LiteralTag::ARRAY_I64:
1251 case panda_file::LiteralTag::ARRAY_F32:
1252 case panda_file::LiteralTag::ARRAY_F64:
1253 case panda_file::LiteralTag::ARRAY_STRING:
1254 return true;
1255 default:
1256 return false;
1257 }
1258 }
1259
SerializeLiteralArray(const pandasm::LiteralArray & lit_array) const1260 std::string Disassembler::SerializeLiteralArray(const pandasm::LiteralArray &lit_array) const
1261 {
1262 std::stringstream ret;
1263 if (lit_array.literals_.empty()) {
1264 return "";
1265 }
1266
1267 std::stringstream ss;
1268 ss << "{ ";
1269 const auto &tag = lit_array.literals_[0].tag_;
1270 if (IsArray(tag)) {
1271 ss << LiteralTagToString(tag);
1272 }
1273 ss << lit_array.literals_.size();
1274 ss << " [ ";
1275 SerializeValues(lit_array, ss);
1276 ss << "]}";
1277 return ss.str();
1278 }
1279
Serialize(const std::string & key,const pandasm::LiteralArray & lit_array,std::ostream & os) const1280 void Disassembler::Serialize(const std::string &key, const pandasm::LiteralArray &lit_array, std::ostream &os) const
1281 {
1282 os << key << " ";
1283 os << SerializeLiteralArray(lit_array);
1284 os << "\n";
1285 }
1286
Serialize(const std::string & module_offset,const std::vector<std::string> & module_array,std::ostream & os) const1287 void Disassembler::Serialize(const std::string &module_offset, const std::vector<std::string> &module_array,
1288 std::ostream &os) const
1289 {
1290 os << module_offset << " ";
1291 os << SerializeModuleLiteralArray(module_array);
1292 os << "\n";
1293 }
1294
SerializeModuleLiteralArray(const std::vector<std::string> & module_array) const1295 std::string Disassembler::SerializeModuleLiteralArray(const std::vector<std::string> &module_array) const
1296 {
1297 if (module_array.empty()) {
1298 return "";
1299 }
1300
1301 std::stringstream ss;
1302 ss << "{ ";
1303 ss << (module_array.size() - 1); // Only needs to show the count of module tag, exclude module request array
1304 ss << " [\n";
1305 for (size_t index = 0; index < module_array.size(); index++) {
1306 ss << module_array[index] << ";\n";
1307 }
1308 ss << "]}";
1309 return ss.str();
1310 }
1311
LiteralTagToString(const panda_file::LiteralTag & tag) const1312 std::string Disassembler::LiteralTagToString(const panda_file::LiteralTag &tag) const
1313 {
1314 switch (tag) {
1315 case panda_file::LiteralTag::BOOL:
1316 case panda_file::LiteralTag::ARRAY_U1:
1317 return "u1";
1318 case panda_file::LiteralTag::ARRAY_U8:
1319 return "u8";
1320 case panda_file::LiteralTag::ARRAY_I8:
1321 return "i8";
1322 case panda_file::LiteralTag::ARRAY_U16:
1323 return "u16";
1324 case panda_file::LiteralTag::ARRAY_I16:
1325 return "i16";
1326 case panda_file::LiteralTag::ARRAY_U32:
1327 return "u32";
1328 case panda_file::LiteralTag::INTEGER:
1329 case panda_file::LiteralTag::ARRAY_I32:
1330 return "i32";
1331 case panda_file::LiteralTag::ARRAY_U64:
1332 return "u64";
1333 case panda_file::LiteralTag::ARRAY_I64:
1334 return "i64";
1335 case panda_file::LiteralTag::ARRAY_F32:
1336 return "f32";
1337 case panda_file::LiteralTag::DOUBLE:
1338 case panda_file::LiteralTag::ARRAY_F64:
1339 return "f64";
1340 case panda_file::LiteralTag::STRING:
1341 case panda_file::LiteralTag::ARRAY_STRING:
1342 return "string";
1343 case panda_file::LiteralTag::METHOD:
1344 return "method";
1345 case panda_file::LiteralTag::GETTER:
1346 return "getter";
1347 case panda_file::LiteralTag::SETTER:
1348 return "setter";
1349 case panda_file::LiteralTag::GENERATORMETHOD:
1350 return "generator_method";
1351 case panda_file::LiteralTag::ACCESSOR:
1352 return "accessor";
1353 case panda_file::LiteralTag::METHODAFFILIATE:
1354 return "method_affiliate";
1355 case panda_file::LiteralTag::NULLVALUE:
1356 return "null_value";
1357 case panda_file::LiteralTag::TAGVALUE:
1358 return "tagvalue";
1359 case panda_file::LiteralTag::LITERALBUFFERINDEX:
1360 return "lit_index";
1361 case panda_file::LiteralTag::LITERALARRAY:
1362 return "lit_offset";
1363 case panda_file::LiteralTag::BUILTINTYPEINDEX:
1364 return "builtin_type";
1365 default:
1366 UNREACHABLE();
1367 }
1368 }
1369
1370 template <typename T>
SerializeValues(const pandasm::LiteralArray & lit_array,T & os) const1371 void Disassembler::SerializeValues(const pandasm::LiteralArray &lit_array, T &os) const
1372 {
1373 switch (lit_array.literals_[0].tag_) {
1374 case panda_file::LiteralTag::ARRAY_U1: {
1375 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1376 os << std::get<bool>(lit_array.literals_[i].value_) << " ";
1377 }
1378 break;
1379 }
1380 case panda_file::LiteralTag::ARRAY_U8: {
1381 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1382 os << static_cast<uint16_t>(std::get<uint8_t>(lit_array.literals_[i].value_)) << " ";
1383 }
1384 break;
1385 }
1386 case panda_file::LiteralTag::ARRAY_I8: {
1387 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1388 os << static_cast<int16_t>(bit_cast<int8_t>(std::get<uint8_t>(lit_array.literals_[i].value_))) << " ";
1389 }
1390 break;
1391 }
1392 case panda_file::LiteralTag::ARRAY_U16: {
1393 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1394 os << std::get<uint16_t>(lit_array.literals_[i].value_) << " ";
1395 }
1396 break;
1397 }
1398 case panda_file::LiteralTag::ARRAY_I16: {
1399 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1400 os << bit_cast<int16_t>(std::get<uint16_t>(lit_array.literals_[i].value_)) << " ";
1401 }
1402 break;
1403 }
1404 case panda_file::LiteralTag::ARRAY_U32: {
1405 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1406 os << std::get<uint32_t>(lit_array.literals_[i].value_) << " ";
1407 }
1408 break;
1409 }
1410 case panda_file::LiteralTag::ARRAY_I32: {
1411 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1412 os << bit_cast<int32_t>(std::get<uint32_t>(lit_array.literals_[i].value_)) << " ";
1413 }
1414 break;
1415 }
1416 case panda_file::LiteralTag::ARRAY_U64: {
1417 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1418 os << std::get<uint64_t>(lit_array.literals_[i].value_) << " ";
1419 }
1420 break;
1421 }
1422 case panda_file::LiteralTag::ARRAY_I64: {
1423 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1424 os << bit_cast<int64_t>(std::get<uint64_t>(lit_array.literals_[i].value_)) << " ";
1425 }
1426 break;
1427 }
1428 case panda_file::LiteralTag::ARRAY_F32: {
1429 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1430 os << std::get<float>(lit_array.literals_[i].value_) << " ";
1431 }
1432 break;
1433 }
1434 case panda_file::LiteralTag::ARRAY_F64: {
1435 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1436 os << std::get<double>(lit_array.literals_[i].value_) << " ";
1437 }
1438 break;
1439 }
1440 case panda_file::LiteralTag::ARRAY_STRING: {
1441 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1442 os << "\"" << std::get<std::string>(lit_array.literals_[i].value_) << "\" ";
1443 }
1444 break;
1445 }
1446 default:
1447 SerializeLiterals(lit_array, os);
1448 }
1449 }
1450
1451 template <typename T>
SerializeLiterals(const pandasm::LiteralArray & lit_array,T & os) const1452 void Disassembler::SerializeLiterals(const pandasm::LiteralArray &lit_array, T &os) const
1453 {
1454 for (size_t i = 0; i < lit_array.literals_.size(); i++) {
1455 const auto &tag = lit_array.literals_[i].tag_;
1456 os << LiteralTagToString(tag) << ":";
1457 const auto &val = lit_array.literals_[i].value_;
1458 switch (lit_array.literals_[i].tag_) {
1459 case panda_file::LiteralTag::BOOL: {
1460 os << std::get<bool>(val);
1461 break;
1462 }
1463 case panda_file::LiteralTag::LITERALBUFFERINDEX:
1464 case panda_file::LiteralTag::INTEGER: {
1465 os << bit_cast<int32_t>(std::get<uint32_t>(val));
1466 break;
1467 }
1468 case panda_file::LiteralTag::DOUBLE: {
1469 os << std::get<double>(val);
1470 break;
1471 }
1472 case panda_file::LiteralTag::STRING: {
1473 os << "\"" << std::get<std::string>(val) << "\"";
1474 break;
1475 }
1476 case panda_file::LiteralTag::METHOD:
1477 case panda_file::LiteralTag::GETTER:
1478 case panda_file::LiteralTag::SETTER:
1479 case panda_file::LiteralTag::GENERATORMETHOD: {
1480 os << std::get<std::string>(val);
1481 break;
1482 }
1483 case panda_file::LiteralTag::NULLVALUE:
1484 case panda_file::LiteralTag::ACCESSOR: {
1485 os << static_cast<int16_t>(bit_cast<int8_t>(std::get<uint8_t>(val)));
1486 break;
1487 }
1488 case panda_file::LiteralTag::METHODAFFILIATE: {
1489 os << std::get<uint16_t>(val);
1490 break;
1491 }
1492 case panda_file::LiteralTag::LITERALARRAY: {
1493 os << std::get<std::string>(val);
1494 break;
1495 }
1496 case panda_file::LiteralTag::BUILTINTYPEINDEX: {
1497 os << static_cast<int16_t>(std::get<uint8_t>(val));
1498 break;
1499 }
1500 default:
1501 UNREACHABLE();
1502 }
1503 os << ", ";
1504 }
1505 }
1506
Serialize(const pandasm::Record & record,std::ostream & os,bool print_information) const1507 void Disassembler::Serialize(const pandasm::Record &record, std::ostream &os, bool print_information) const
1508 {
1509 if (IsSystemType(record.name)) {
1510 return;
1511 }
1512
1513 os << ".record " << record.name;
1514
1515 const auto record_iter = prog_ann_.record_annotations.find(record.name);
1516 const bool record_in_table = record_iter != prog_ann_.record_annotations.end();
1517 if (record_in_table) {
1518 Serialize(*record.metadata, record_iter->second.ann_list, os);
1519 } else {
1520 Serialize(*record.metadata, {}, os);
1521 }
1522
1523 if (record.metadata->IsForeign()) {
1524 os << "\n\n";
1525 return;
1526 }
1527
1528 os << " {";
1529
1530 if (print_information && prog_info_.records_info.find(record.name) != prog_info_.records_info.end()) {
1531 os << " # " << prog_info_.records_info.at(record.name).record_info << "\n";
1532 SerializeFields(record, os, true);
1533 } else {
1534 os << "\n";
1535 SerializeFields(record, os, false);
1536 }
1537
1538 os << "}\n\n";
1539 }
1540
SerializeFields(const pandasm::Record & record,std::ostream & os,bool print_information) const1541 void Disassembler::SerializeFields(const pandasm::Record &record, std::ostream &os, bool print_information) const
1542 {
1543 constexpr size_t INFO_OFFSET = 80;
1544
1545 const auto record_iter = prog_ann_.record_annotations.find(record.name);
1546 const bool record_in_table = record_iter != prog_ann_.record_annotations.end();
1547
1548 const auto rec_inf = (print_information) ? (prog_info_.records_info.at(record.name)) : (RecordInfo {});
1549
1550 size_t field_idx = 0;
1551
1552 std::stringstream ss;
1553 for (const auto &f : record.field_list) {
1554 std::string file = GetFileNameByPath(f.name);
1555 ss << "\t" << f.type.GetPandasmName() << " " << file;
1556 if (f.metadata->GetValue().has_value()) {
1557 if (f.type.GetId() == panda_file::Type::TypeId::U32) {
1558 ss << " = 0x" << std::hex << f.metadata->GetValue().value().GetValue<uint32_t>();
1559 }
1560 if (f.type.GetId() == panda_file::Type::TypeId::U8) {
1561 ss << " = 0x" << std::hex << static_cast<uint32_t>(f.metadata->GetValue().value().GetValue<uint8_t>());
1562 }
1563 }
1564 if (record_in_table) {
1565 const auto field_iter = record_iter->second.field_annotations.find(f.name);
1566 if (field_iter != record_iter->second.field_annotations.end()) {
1567 Serialize(*f.metadata, field_iter->second, ss);
1568 } else {
1569 Serialize(*f.metadata, {}, ss);
1570 }
1571 } else {
1572 Serialize(*f.metadata, {}, ss);
1573 }
1574
1575 if (print_information) {
1576 os << std::setw(INFO_OFFSET) << std::left << ss.str() << " # " << rec_inf.fields_info.at(field_idx) << "\n";
1577 } else {
1578 os << ss.str() << "\n";
1579 }
1580
1581 ss.str(std::string());
1582 ss.clear();
1583
1584 field_idx++;
1585 }
1586 }
1587
SerializeMethodAnnotations(const pandasm::Function & method,std::ostream & os) const1588 void Disassembler::SerializeMethodAnnotations(const pandasm::Function &method, std::ostream &os) const
1589 {
1590 const auto annotations = method.metadata->GetAnnotations();
1591 if (annotations.empty()) {
1592 return;
1593 }
1594
1595 for (const auto &ann : annotations) {
1596 os << ann.GetName() << ":\n";
1597 std::stringstream ss;
1598 std::vector<pandasm::AnnotationElement> elements = ann.GetElements();
1599 if (elements.empty()) {
1600 continue;
1601 }
1602 uint32_t idx = elements.size() - 1;
1603 ss << "\t" << "u32" << " " << elements.back().GetName() << " { ";
1604 for (const auto &elem : elements) {
1605 ss << "0x" << std::hex << elem.GetValue()->GetAsScalar()->GetValue<uint32_t>();
1606 if (idx > 0) {
1607 ss << ", ";
1608 }
1609 --idx;
1610 }
1611 ss << " }";
1612 os << ss.str() << "\n";
1613 }
1614 }
1615
SerializeInstructions(const pandasm::Function & method,std::ostream & os,const std::map<std::string,MethodInfo>::const_iterator & method_info_it,bool print_method_info) const1616 void Disassembler::SerializeInstructions(const pandasm::Function &method, std::ostream &os,
1617 const std::map<std::string, MethodInfo>::const_iterator &method_info_it,
1618 bool print_method_info) const
1619 {
1620 std::string delim = ": ";
1621 size_t width = 0;
1622 if (print_method_info) {
1623 for (const auto &i : method.ins) {
1624 size_t ins_size = i.ToString().size();
1625 if (i.set_label) {
1626 ins_size = ins_size - i.label.size() - delim.length();
1627 }
1628
1629 if (ins_size > width && ins_size < ark::INSTRUCTION_WIDTH_LIMIT) {
1630 width = i.ToString().size();
1631 }
1632 }
1633 }
1634
1635 for (size_t i = 0; i < method.ins.size(); i++) {
1636 std::string ins = method.ins[i].ToString("", true, method.regs_num);
1637 if (method.ins[i].set_label) {
1638 size_t pos = ins.find(delim);
1639 std::string label = ins.substr(0, pos);
1640 ins.erase(0, pos + delim.length());
1641 os << label << ":\n";
1642 }
1643
1644 if (ins != "") {
1645 os << "\t" << std::setw(width) << std::left << ins;
1646 if (print_method_info && i < method_info_it->second.instructions_info.size()) {
1647 os << " # " << method_info_it->second.instructions_info.at(i);
1648 }
1649 os << "\n";
1650 }
1651 }
1652 }
1653
Serialize(const pandasm::Function & method,std::ostream & os,bool print_information) const1654 void Disassembler::Serialize(const pandasm::Function &method, std::ostream &os, bool print_information) const
1655 {
1656 SerializeMethodAnnotations(method, os);
1657 os << ".function " << method.return_type.GetPandasmName() << " " << method.name << "(";
1658
1659 if (method.params.size() > 0) {
1660 os << method.params[0].type.GetPandasmName() << " a0";
1661
1662 for (uint8_t i = 1; i < method.params.size(); i++) {
1663 os << ", " << method.params[i].type.GetPandasmName() << " a" << (size_t)i;
1664 }
1665 }
1666 os << ")";
1667
1668 const std::string signature = pandasm::GetFunctionSignatureFromName(method.name, method.params);
1669
1670 const auto method_iter = prog_ann_.method_annotations.find(signature);
1671 if (method_iter != prog_ann_.method_annotations.end()) {
1672 Serialize(*method.metadata, method_iter->second, os);
1673 } else {
1674 Serialize(*method.metadata, {}, os);
1675 }
1676
1677 auto method_info_it = prog_info_.methods_info.find(signature);
1678 bool print_method_info = print_information && method_info_it != prog_info_.methods_info.end();
1679 if (print_method_info) {
1680 os << " { # " << method_info_it->second.method_info << "\n# CODE:\n";
1681 } else {
1682 os << " {\n";
1683 }
1684 SerializeInstructions(method, os, method_info_it, print_method_info);
1685
1686 if (method.catch_blocks.size() != 0) {
1687 os << "\n";
1688
1689 for (const auto &catch_block : method.catch_blocks) {
1690 Serialize(catch_block, os);
1691
1692 os << "\n";
1693 }
1694 }
1695
1696 if (print_method_info) {
1697 const MethodInfo &method_info = method_info_it->second;
1698 SerializeLineNumberTable(method_info.line_number_table, os);
1699 SerializeColumnNumberTable(method_info.column_number_table, os);
1700 SerializeLocalVariableTable(method_info.local_variable_table, method, os);
1701 }
1702
1703 os << "}\n\n";
1704 }
1705
SerializeStrings(const panda_file::File::EntityId & offset,const std::string & name_value,std::ostream & os) const1706 void Disassembler::SerializeStrings(const panda_file::File::EntityId &offset, const std::string &name_value,
1707 std::ostream &os) const
1708 {
1709 os << "[offset:0x" << std::hex <<offset<< ", name_value:" << name_value<< "]" <<std::endl;
1710 }
1711
Serialize(const pandasm::Function::CatchBlock & catch_block,std::ostream & os) const1712 void Disassembler::Serialize(const pandasm::Function::CatchBlock &catch_block, std::ostream &os) const
1713 {
1714 if (catch_block.exception_record == "") {
1715 os << ".catchall ";
1716 } else {
1717 os << ".catch " << catch_block.exception_record << ", ";
1718 }
1719
1720 os << catch_block.try_begin_label << ", " << catch_block.try_end_label << ", " << catch_block.catch_begin_label;
1721
1722 if (catch_block.catch_end_label != "") {
1723 os << ", " << catch_block.catch_end_label;
1724 }
1725 }
1726
Serialize(const pandasm::ItemMetadata & meta,const AnnotationList & ann_list,std::ostream & os) const1727 void Disassembler::Serialize(const pandasm::ItemMetadata &meta, const AnnotationList &ann_list, std::ostream &os) const
1728 {
1729 auto bool_attributes = meta.GetBoolAttributes();
1730 auto attributes = meta.GetAttributes();
1731 if (bool_attributes.empty() && attributes.empty() && ann_list.empty()) {
1732 return;
1733 }
1734
1735 os << " <";
1736
1737 size_t size = bool_attributes.size();
1738 size_t idx = 0;
1739 for (const auto &attr : bool_attributes) {
1740 os << attr;
1741 ++idx;
1742
1743 if (!attributes.empty() || !ann_list.empty() || idx < size) {
1744 os << ", ";
1745 }
1746 }
1747
1748 size = attributes.size();
1749 idx = 0;
1750 for (const auto &[key, values] : attributes) {
1751 for (size_t i = 0; i < values.size(); i++) {
1752 os << key << "=" << values[i];
1753
1754 if (i < values.size() - 1) {
1755 os << ", ";
1756 }
1757 }
1758
1759 ++idx;
1760
1761 if (!ann_list.empty() || idx < size) {
1762 os << ", ";
1763 }
1764 }
1765
1766 size = ann_list.size();
1767 idx = 0;
1768 for (const auto &[key, value] : ann_list) {
1769 os << key << "=" << value;
1770
1771 ++idx;
1772
1773 if (idx < size) {
1774 os << ", ";
1775 }
1776 }
1777
1778 os << ">";
1779 }
1780
SerializeLineNumberTable(const panda_file::LineNumberTable & line_number_table,std::ostream & os) const1781 void Disassembler::SerializeLineNumberTable(const panda_file::LineNumberTable &line_number_table,
1782 std::ostream &os) const
1783 {
1784 if (line_number_table.empty()) {
1785 return;
1786 }
1787
1788 os << "\n# LINE_NUMBER_TABLE:\n";
1789 for (const auto &line_info : line_number_table) {
1790 os << "#\tline " << line_info.line << ": " << line_info.offset << "\n";
1791 }
1792 }
1793
SerializeColumnNumberTable(const panda_file::ColumnNumberTable & column_number_table,std::ostream & os) const1794 void Disassembler::SerializeColumnNumberTable(const panda_file::ColumnNumberTable &column_number_table,
1795 std::ostream &os) const
1796 {
1797 if (column_number_table.empty()) {
1798 return;
1799 }
1800
1801 os << "\n# COLUMN_NUMBER_TABLE:\n";
1802 for (const auto &column_info : column_number_table) {
1803 os << "#\tcolumn " << column_info.column << ": " << column_info.offset << "\n";
1804 }
1805 }
1806
SerializeLocalVariableTable(const panda_file::LocalVariableTable & local_variable_table,const pandasm::Function & method,std::ostream & os) const1807 void Disassembler::SerializeLocalVariableTable(const panda_file::LocalVariableTable &local_variable_table,
1808 const pandasm::Function &method, std::ostream &os) const
1809 {
1810 if (local_variable_table.empty()) {
1811 return;
1812 }
1813
1814 os << "\n# LOCAL_VARIABLE_TABLE:\n";
1815 os << "#\t Start End Register Name Signature\n";
1816 const int START_WIDTH = 5;
1817 const int END_WIDTH = 4;
1818 const int REG_WIDTH = 8;
1819 const int NAME_WIDTH = 14;
1820 for (const auto &variable_info : local_variable_table) {
1821 std::ostringstream reg_stream;
1822 reg_stream << variable_info.reg_number << '(';
1823 if (variable_info.reg_number < 0) {
1824 reg_stream << "acc";
1825 } else {
1826 uint32_t vreg = variable_info.reg_number;
1827 uint32_t first_arg_reg = method.GetTotalRegs();
1828 if (vreg < first_arg_reg) {
1829 reg_stream << 'v' << vreg;
1830 } else {
1831 reg_stream << 'a' << vreg - first_arg_reg;
1832 }
1833 }
1834 reg_stream << ')';
1835
1836 os << "#\t " << std::setw(START_WIDTH) << std::right << variable_info.start_offset << " ";
1837 os << std::setw(END_WIDTH) << std::right << variable_info.end_offset << " ";
1838 os << std::setw(REG_WIDTH) << std::right << reg_stream.str() << " ";
1839 os << std::setw(NAME_WIDTH) << std::right << variable_info.name << " " << variable_info.type;
1840 if (!variable_info.type_signature.empty() && variable_info.type_signature != variable_info.type) {
1841 os << " (" << variable_info.type_signature << ")";
1842 }
1843 os << "\n";
1844 }
1845 }
1846
BytecodeOpcodeToPandasmOpcode(uint8_t o) const1847 pandasm::Opcode Disassembler::BytecodeOpcodeToPandasmOpcode(uint8_t o) const
1848 {
1849 return BytecodeOpcodeToPandasmOpcode(BytecodeInstruction::Opcode(o));
1850 }
1851
IDToString(BytecodeInstruction bc_ins,panda_file::File::EntityId method_id,size_t idx) const1852 std::string Disassembler::IDToString(BytecodeInstruction bc_ins, panda_file::File::EntityId method_id,
1853 size_t idx) const
1854 {
1855 std::stringstream name;
1856 const auto offset = file_->ResolveOffsetByIndex(method_id, bc_ins.GetId(idx).AsIndex());
1857 std::string str_data = StringDataToString(file_->GetStringData(offset));
1858 if (bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::METHOD_ID)) {
1859 name << GetMethodSignature(offset);
1860 } else if (bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::STRING_ID)) {
1861 name << '\"';
1862 name << str_data;
1863 name << '\"';
1864 string_offset_to_name_.emplace(offset, str_data);
1865 } else {
1866 ASSERT(bc_ins.IsIdMatchFlag(idx, BytecodeInstruction::Flags::LITERALARRAY_ID));
1867 pandasm::LiteralArray lit_array;
1868 GetLiteralArrayByOffset(&lit_array, panda_file::File::EntityId(offset));
1869 name << SerializeLiteralArray(lit_array);
1870 }
1871
1872 return name.str();
1873 }
1874
GetRecordLanguage(panda_file::File::EntityId class_id) const1875 panda::panda_file::SourceLang Disassembler::GetRecordLanguage(panda_file::File::EntityId class_id) const
1876 {
1877 if (file_->IsExternal(class_id)) {
1878 return panda::panda_file::SourceLang::PANDA_ASSEMBLY;
1879 }
1880
1881 panda_file::ClassDataAccessor cda(*file_, class_id);
1882 return cda.GetSourceLang().value_or(panda_file::SourceLang::PANDA_ASSEMBLY);
1883 }
1884
translateImmToLabel(pandasm::Ins * pa_ins,LabelTable * label_table,const uint8_t * ins_arr,BytecodeInstruction bc_ins,BytecodeInstruction bc_ins_last,panda_file::File::EntityId code_id)1885 static void translateImmToLabel(pandasm::Ins *pa_ins, LabelTable *label_table, const uint8_t *ins_arr,
1886 BytecodeInstruction bc_ins, BytecodeInstruction bc_ins_last,
1887 panda_file::File::EntityId code_id)
1888 {
1889 const int32_t jmp_offset = std::get<int64_t>(pa_ins->imms.at(0));
1890 const auto bc_ins_dest = bc_ins.JumpTo(jmp_offset);
1891 if (bc_ins_last.GetAddress() > bc_ins_dest.GetAddress()) {
1892 size_t idx = getBytecodeInstructionNumber(BytecodeInstruction(ins_arr), bc_ins_dest);
1893 if (idx != std::numeric_limits<size_t>::max()) {
1894 if (label_table->find(idx) == label_table->end()) {
1895 std::stringstream ss {};
1896 ss << "jump_label_" << label_table->size();
1897 (*label_table)[idx] = ss.str();
1898 }
1899
1900 pa_ins->imms.clear();
1901 pa_ins->ids.push_back(label_table->at(idx));
1902 } else {
1903 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
1904 << "). incorrect instruction at offset: 0x" << (bc_ins.GetAddress() - ins_arr)
1905 << ": invalid jump offset 0x" << jmp_offset
1906 << " - jumping in the middle of another instruction!";
1907 }
1908 } else {
1909 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
1910 << "). incorrect instruction at offset: 0x" << (bc_ins.GetAddress() - ins_arr)
1911 << ": invalid jump offset 0x" << jmp_offset << " - jumping out of bounds!";
1912 }
1913 }
1914
GetInstructions(pandasm::Function * method,panda_file::File::EntityId method_id,panda_file::File::EntityId code_id) const1915 IdList Disassembler::GetInstructions(pandasm::Function *method, panda_file::File::EntityId method_id,
1916 panda_file::File::EntityId code_id) const
1917 {
1918 panda_file::CodeDataAccessor code_accessor(*file_, code_id);
1919
1920 const auto ins_sz = code_accessor.GetCodeSize();
1921 const auto ins_arr = code_accessor.GetInstructions();
1922
1923 method->regs_num = code_accessor.GetNumVregs();
1924
1925 auto bc_ins = BytecodeInstruction(ins_arr);
1926 const auto bc_ins_last = bc_ins.JumpTo(ins_sz);
1927
1928 LabelTable label_table = GetExceptions(method, method_id, code_id);
1929
1930 IdList unknown_external_methods {};
1931
1932 while (bc_ins.GetAddress() != bc_ins_last.GetAddress()) {
1933 if (bc_ins.GetAddress() > bc_ins_last.GetAddress()) {
1934 LOG(ERROR, DISASSEMBLER) << "> error encountered at " << code_id << " (0x" << std::hex << code_id
1935 << "). bytecode instructions sequence corrupted for method " << method->name
1936 << "! went out of bounds";
1937
1938 break;
1939 }
1940
1941 auto pa_ins = BytecodeInstructionToPandasmInstruction(bc_ins, method_id);
1942 if (pa_ins.IsJump()) {
1943 translateImmToLabel(&pa_ins, &label_table, ins_arr, bc_ins, bc_ins_last, code_id);
1944 }
1945
1946 // check if method id is unknown external method. if so, emplace it in table
1947 if (bc_ins.HasFlag(BytecodeInstruction::Flags::METHOD_ID)) {
1948 const auto arg_method_idx = bc_ins.GetId().AsIndex();
1949 const auto arg_method_id = file_->ResolveMethodIndex(method_id, arg_method_idx);
1950
1951 const auto arg_method_signature = GetMethodSignature(arg_method_id);
1952
1953 const bool is_present = prog_.function_table.find(arg_method_signature) != prog_.function_table.cend();
1954 const bool is_external = file_->IsExternal(arg_method_id);
1955 if (is_external && !is_present) {
1956 unknown_external_methods.push_back(arg_method_id);
1957 }
1958 }
1959
1960 method->AddInstruction(pa_ins);
1961 bc_ins = bc_ins.GetNext();
1962 }
1963
1964 size_t instruction_count = method->ins.size();
1965 for (const auto &pair : label_table) {
1966 if (pair.first > instruction_count) {
1967 LOG(ERROR, DISASSEMBLER) << "> Wrong label index got, count of instructions is " << instruction_count
1968 << ", but the label index is " << pair.first;
1969 continue;
1970 }
1971
1972 // In some case, the end label can be after the last instruction
1973 // Creating an invalid instruction for the label to make sure it can be serialized
1974 if (pair.first == instruction_count) {
1975 pandasm::Ins ins{};
1976 ins.opcode = pandasm::Opcode::INVALID;
1977 method->AddInstruction(ins);
1978 }
1979
1980 method->ins[pair.first].label = pair.second;
1981 method->ins[pair.first].set_label = true;
1982 }
1983
1984 return unknown_external_methods;
1985 }
1986
GetColumnNumber()1987 std::vector<size_t> Disassembler::GetColumnNumber()
1988 {
1989 std::vector<size_t> columnNumber;
1990 for (const auto &method_info : prog_info_.methods_info) {
1991 for (const auto &column_number : method_info.second.column_number_table) {
1992 columnNumber.push_back(column_number.column);
1993 }
1994 }
1995 return columnNumber;
1996 }
1997
GetLineNumber()1998 std::vector<size_t> Disassembler::GetLineNumber()
1999 {
2000 std::vector<size_t> lineNumber;
2001 for (const auto &method_info : prog_info_.methods_info) {
2002 for (const auto &line_number : method_info.second.line_number_table) {
2003 lineNumber.push_back(line_number.line);
2004 }
2005 }
2006 return lineNumber;
2007 }
2008
2009 } // namespace panda::disasm
2010